Stripped down version of SoundTouch 2.0.0

This commit is contained in:
MerryMage 2017-08-17 11:09:20 +01:00 committed by Yuri Kunde Schlesner
parent a8d656485d
commit 019d2089bb
31 changed files with 2456 additions and 119 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.DS_Store

View File

@ -1,14 +1,18 @@
set(SRCS
src/AAFilter.cpp
src/cpu_detect_x86.cpp
src/FIFOSampleBuffer.cpp
src/FIRFilter.cpp
src/InterpolateLinear.cpp
src/RateTransposer.cpp
src/SoundTouch.cpp
src/TDStretch.cpp
)
include_directories(src include)
src/AAFilter.cpp
src/BPMDetect.cpp
src/cpu_detect_x86.cpp
src/FIFOSampleBuffer.cpp
src/FIRFilter.cpp
src/InterpolateCubic.cpp
src/InterpolateLinear.cpp
src/InterpolateShannon.cpp
src/mmx_optimized.cpp
src/PeakFinder.cpp
src/RateTransposer.cpp
src/SoundTouch.cpp
src/sse_optimized.cpp
src/TDStretch.cpp)
add_library(SoundTouch STATIC ${SRCS})
target_include_directories(SoundTouch PUBLIC include PRIVATE src)

158
include/BPMDetect.h Executable file
View File

@ -0,0 +1,158 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Beats-per-minute (BPM) detection routine.
///
/// The beat detection algorithm works as follows:
/// - Use function 'inputSamples' to input a chunks of samples to the class for
/// analysis. It's a good idea to enter a large sound file or stream in smallish
/// chunks of around few kilosamples in order not to extinguish too much RAM memory.
/// - Input sound data is decimated to approx 500 Hz to reduce calculation burden,
/// which is basically ok as low (bass) frequencies mostly determine the beat rate.
/// Simple averaging is used for anti-alias filtering because the resulting signal
/// quality isn't of that high importance.
/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
/// taking absolute value that's smoothed by sliding average. Signal levels that
/// are below a couple of times the general RMS amplitude level are cut away to
/// leave only notable peaks there.
/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
/// autocorrelation function of the enveloped signal.
/// - After whole sound data file has been analyzed as above, the bpm level is
/// detected by function 'getBpm' that finds the highest peak of the autocorrelation
/// function, calculates it's precise location and converts this reading to bpm's.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2016-01-12 19:24:46 +0200 (ti, 12 tammi 2016) $
// File revision : $Revision: 4 $
//
// $Id: BPMDetect.h 239 2016-01-12 17:24:46Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _BPMDetect_H_
#define _BPMDetect_H_
#include "STTypes.h"
#include "FIFOSampleBuffer.h"
namespace soundtouch
{
/// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit.
#define MIN_BPM 29
/// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit.
#define MAX_BPM 200
/// Class for calculating BPM rate for audio data.
class BPMDetect
{
protected:
/// Auto-correlation accumulator bins.
float *xcorr;
/// Sample average counter.
int decimateCount;
/// Sample average accumulator for FIFO-like decimation.
soundtouch::LONG_SAMPLETYPE decimateSum;
/// Decimate sound by this coefficient to reach approx. 500 Hz.
int decimateBy;
/// Auto-correlation window length
int windowLen;
/// Number of channels (1 = mono, 2 = stereo)
int channels;
/// sample rate
int sampleRate;
/// Beginning of auto-correlation window: Autocorrelation isn't being updated for
/// the first these many correlation bins.
int windowStart;
/// FIFO-buffer for decimated processing samples.
soundtouch::FIFOSampleBuffer *buffer;
/// Updates auto-correlation function for given number of decimated samples that
/// are read from the internal 'buffer' pipe (samples aren't removed from the pipe
/// though).
void updateXCorr(int process_samples /// How many samples are processed.
);
/// Decimates samples to approx. 500 Hz.
///
/// \return Number of output samples.
int decimate(soundtouch::SAMPLETYPE *dest, ///< Destination buffer
const soundtouch::SAMPLETYPE *src, ///< Source sample buffer
int numsamples ///< Number of source samples.
);
/// Calculates amplitude envelope for the buffer of samples.
/// Result is output to 'samples'.
void calcEnvelope(soundtouch::SAMPLETYPE *samples, ///< Pointer to input/output data buffer
int numsamples ///< Number of samples in buffer
);
/// remove constant bias from xcorr data
void removeBias();
public:
/// Constructor.
BPMDetect(int numChannels, ///< Number of channels in sample data.
int sampleRate ///< Sample rate in Hz.
);
/// Destructor.
virtual ~BPMDetect();
/// Inputs a block of samples for analyzing: Envelopes the samples and then
/// updates the autocorrelation estimation. When whole song data has been input
/// in smaller blocks using this function, read the resulting bpm with 'getBpm'
/// function.
///
/// Notice that data in 'samples' array can be disrupted in processing.
void inputSamples(const soundtouch::SAMPLETYPE *samples, ///< Pointer to input/working data buffer
int numSamples ///< Number of samples in buffer
);
/// Analyzes the results and returns the BPM rate. Use this function to read result
/// after whole song data has been input to the class by consecutive calls of
/// 'inputSamples' function.
///
/// \return Beats-per-minute rate, or zero if detection failed.
float getBpm();
};
}
#endif // _BPMDetect_H_

2
include/FIFOSampleBuffer.h Normal file → Executable file
View File

@ -15,7 +15,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2014-01-05 23:40:22 +0200 (Sun, 05 Jan 2014) $
// Last changed : $Date: 2014-01-05 23:40:22 +0200 (su, 05 tammi 2014) $
// File revision : $Revision: 4 $
//
// $Id: FIFOSampleBuffer.h 177 2014-01-05 21:40:22Z oparviai $

2
include/FIFOSamplePipe.h Normal file → Executable file
View File

@ -17,7 +17,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2012-06-13 22:29:53 +0300 (Wed, 13 Jun 2012) $
// Last changed : $Date: 2012-06-13 22:29:53 +0300 (ke, 13 kesä 2012) $
// File revision : $Revision: 4 $
//
// $Id: FIFOSamplePipe.h 143 2012-06-13 19:29:53Z oparviai $

18
include/STTypes.h Normal file → Executable file
View File

@ -8,10 +8,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-05-18 18:25:07 +0300 (Mon, 18 May 2015) $
// Last changed : $Date: 2017-07-30 12:28:06 +0300 (su, 30 heinä 2017) $
// File revision : $Revision: 3 $
//
// $Id: STTypes.h 215 2015-05-18 15:25:07Z oparviai $
// $Id: STTypes.h 252 2017-07-30 09:28:06Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -53,6 +53,8 @@ typedef unsigned long ulong;
// Helper macro for aligning pointer up to next 16-byte boundary
#define SOUNDTOUCH_ALIGN_POINTER_16(x) ( ( (ulongptr)(x) + 15 ) & ~(ulongptr)15 )
#include "soundtouch_config.h"
namespace soundtouch
{
/// Activate these undef's to overrule the possible sampletype
@ -90,8 +92,8 @@ namespace soundtouch
/// However, if you still prefer to select the sample format here
/// also in GNU environment, then please #undef the INTEGER_SAMPLE
/// and FLOAT_SAMPLE defines first as in comments above.
#define SOUNDTOUCH_INTEGER_SAMPLES 1 //< 16bit integer samples
//#define SOUNDTOUCH_FLOAT_SAMPLES 1 //< 32bit float samples
//#define SOUNDTOUCH_INTEGER_SAMPLES 1 //< 16bit integer samples
#define SOUNDTOUCH_FLOAT_SAMPLES 1 //< 32bit float samples
#endif
@ -102,7 +104,7 @@ namespace soundtouch
/// routines compiled for whatever reason, you may disable these optimizations
/// to make the library compile.
//#define SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS 1
#define SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS 1
/// In GNU environment, allow the user to override this setting by
/// giving the following switch to the configure script:
@ -135,8 +137,10 @@ namespace soundtouch
#endif // SOUNDTOUCH_FLOAT_SAMPLES
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
// Allow MMX optimizations
#define SOUNDTOUCH_ALLOW_MMX 1
// Allow MMX optimizations (not available in X64 mode)
#if (!_M_X64)
#define SOUNDTOUCH_ALLOW_MMX 1
#endif
#endif
#else

86
include/SoundTouch.h Normal file → Executable file
View File

@ -41,10 +41,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-09-20 10:38:32 +0300 (Sun, 20 Sep 2015) $
// Last changed : $Date: 2017-07-30 12:35:00 +0300 (su, 30 heinä 2017) $
// File revision : $Revision: 4 $
//
// $Id: SoundTouch.h 230 2015-09-20 07:38:32Z oparviai $
// $Id: SoundTouch.h 253 2017-07-30 09:35:00Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -79,10 +79,10 @@ namespace soundtouch
{
/// Soundtouch library version string
#define SOUNDTOUCH_VERSION "1.9.2"
#define SOUNDTOUCH_VERSION "2.0.0"
/// SoundTouch library version id
#define SOUNDTOUCH_VERSION_ID (10902)
#define SOUNDTOUCH_VERSION_ID (20000)
//
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
@ -116,30 +116,61 @@ namespace soundtouch
#define SETTING_OVERLAP_MS 5
/// Call "getSetting" with this ID to query nominal average processing sequence
/// size in samples. This value tells approcimate value how many input samples
/// SoundTouch needs to gather before it does DSP processing run for the sample batch.
/// Call "getSetting" with this ID to query processing sequence size in samples.
/// This value gives approximate value of how many input samples you'll need to
/// feed into SoundTouch after initial buffering to get out a new batch of
/// output samples.
///
/// This value does not include initial buffering at beginning of a new processing
/// stream, use SETTING_INITIAL_LATENCY to get the initial buffering size.
///
/// Notices:
/// - This is read-only parameter, i.e. setSetting ignores this parameter
/// - Returned value is approximate average value, exact processing batch
/// size may wary from time to time
/// - This parameter value is not constant but may change depending on
/// - This parameter value is not constant but change depending on
/// tempo/pitch/rate/samplerate settings.
#define SETTING_NOMINAL_INPUT_SEQUENCE 6
#define SETTING_NOMINAL_INPUT_SEQUENCE 6
/// Call "getSetting" with this ID to query nominal average processing output
/// size in samples. This value tells approcimate value how many output samples
/// SoundTouch outputs once it does DSP processing run for a batch of input samples.
///
///
/// Notices:
/// - This is read-only parameter, i.e. setSetting ignores this parameter
/// - Returned value is approximate average value, exact processing batch
/// size may wary from time to time
/// - This parameter value is not constant but may change depending on
/// - This parameter value is not constant but change depending on
/// tempo/pitch/rate/samplerate settings.
#define SETTING_NOMINAL_OUTPUT_SEQUENCE 7
#define SETTING_NOMINAL_OUTPUT_SEQUENCE 7
/// Call "getSetting" with this ID to query initial processing latency, i.e.
/// approx. how many samples you'll need to enter to SoundTouch pipeline before
/// you can expect to get first batch of ready output samples out.
///
/// After the first output batch, you can then expect to get approx.
/// SETTING_NOMINAL_OUTPUT_SEQUENCE ready samples out for every
/// SETTING_NOMINAL_INPUT_SEQUENCE samples that you enter into SoundTouch.
///
/// Example:
/// processing with parameter -tempo=5
/// => initial latency = 5509 samples
/// input sequence = 4167 samples
/// output sequence = 3969 samples
///
/// Accordingly, you can expect to feed in approx. 5509 samples at beginning of
/// the stream, and then you'll get out the first 3969 samples. After that, for
/// every approx. 4167 samples that you'll put in, you'll receive again approx.
/// 3969 samples out.
///
/// This also means that average latency during stream processing is
/// INITIAL_LATENCY-OUTPUT_SEQUENCE/2, in the above example case 5509-3969/2
/// = 3524 samples
///
/// Notices:
/// - This is read-only parameter, i.e. setSetting ignores this parameter
/// - This parameter value is not constant but change depending on
/// tempo/pitch/rate/samplerate settings.
#define SETTING_INITIAL_LATENCY 8
class SoundTouch : public FIFOProcessor
{
@ -228,6 +259,24 @@ public:
/// Sets sample rate.
void setSampleRate(uint srate);
/// Get ratio between input and output audio durations, useful for calculating
/// processed output duration: if you'll process a stream of N samples, then
/// you can expect to get out N * getInputOutputSampleRatio() samples.
///
/// This ratio will give accurate target duration ratio for a full audio track,
/// given that the the whole track is processed with same processing parameters.
///
/// If this ratio is applied to calculate intermediate offsets inside a processing
/// stream, then this ratio is approximate and can deviate +- some tens of milliseconds
/// from ideal offset, yet by end of the audio stream the duration ratio will become
/// exact.
///
/// Example: if processing with parameters "-tempo=15 -pitch=-3", the function
/// will return value 0.8695652... Now, if processing an audio stream whose duration
/// is exactly one million audio samples, then you can expect the processed
/// output duration be 0.869565 * 1000000 = 869565 samples.
double getInputOutputSampleRatio();
/// Flushes the last samples from the processing pipeline to the output.
/// Clears also the internal processing buffers.
//
@ -286,6 +335,11 @@ public:
/// Returns number of samples currently unprocessed.
virtual uint numUnprocessedSamples() const;
/// Return number of channels
uint numChannels() const
{
return channels;
}
/// Other handy functions that are implemented in the ancestor classes (see
/// classes 'FIFOProcessor' and 'FIFOSamplePipe')

View File

@ -0,0 +1,5 @@
/* Use Float as Sample type */
#undef SOUNDTOUCH_FLOAT_SAMPLES
/* Use Integer as Sample type */
#define SOUNDTOUCH_INTEGER_SAMPLES 1

6
src/AAFilter.cpp Normal file → Executable file
View File

@ -12,10 +12,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2014-01-05 23:40:22 +0200 (Sun, 05 Jan 2014) $
// Last changed : $Date: 2016-01-12 19:26:21 +0200 (ti, 12 tammi 2016) $
// File revision : $Revision: 4 $
//
// $Id: AAFilter.cpp 177 2014-01-05 21:40:22Z oparviai $
// $Id: AAFilter.cpp 240 2016-01-12 17:26:21Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -49,7 +49,7 @@
using namespace soundtouch;
#define PI 3.141592655357989
#define PI 3.14159265358979323846
#define TWOPI (2 * PI)
// define this to save AA filter coefficients to a file

2
src/AAFilter.h Normal file → Executable file
View File

@ -13,7 +13,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2014-01-07 21:41:23 +0200 (Tue, 07 Jan 2014) $
// Last changed : $Date: 2014-01-07 21:41:23 +0200 (ti, 07 tammi 2014) $
// File revision : $Revision: 4 $
//
// $Id: AAFilter.h 187 2014-01-07 19:41:23Z oparviai $

318
src/BPMDetect.cpp Executable file
View File

@ -0,0 +1,318 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Beats-per-minute (BPM) detection routine.
///
/// The beat detection algorithm works as follows:
/// - Use function 'inputSamples' to input a chunks of samples to the class for
/// analysis. It's a good idea to enter a large sound file or stream in smallish
/// chunks of around few kilosamples in order not to extinguish too much RAM memory.
/// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
/// which is basically ok as low (bass) frequencies mostly determine the beat rate.
/// Simple averaging is used for anti-alias filtering because the resulting signal
/// quality isn't of that high importance.
/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
/// taking absolute value that's smoothed by sliding average. Signal levels that
/// are below a couple of times the general RMS amplitude level are cut away to
/// leave only notable peaks there.
/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
/// autocorrelation function of the enveloped signal.
/// - After whole sound data file has been analyzed as above, the bpm level is
/// detected by function 'getBpm' that finds the highest peak of the autocorrelation
/// function, calculates it's precise location and converts this reading to bpm's.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2016-01-05 22:59:57 +0200 (ti, 05 tammi 2016) $
// File revision : $Revision: 4 $
//
// $Id: BPMDetect.cpp 237 2016-01-05 20:59:57Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include <math.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include "FIFOSampleBuffer.h"
#include "PeakFinder.h"
#include "BPMDetect.h"
using namespace soundtouch;
#define INPUT_BLOCK_SAMPLES 2048
#define DECIMATED_BLOCK_SAMPLES 256
/// Target sample rate after decimation
const int target_srate = 1000;
/// XCorr update sequence size, update in about 200msec chunks
const int xcorr_update_sequence = 200;
/// XCorr decay time constant, decay to half in 30 seconds
/// If it's desired to have the system adapt quicker to beat rate
/// changes within a continuing music stream, then the
/// 'xcorr_decay_time_constant' value can be reduced, yet that
/// can increase possibility of glitches in bpm detection.
const double xcorr_decay_time_constant = 30.0;
////////////////////////////////////////////////////////////////////////////////
// Enable following define to create bpm analysis file:
// #define _CREATE_BPM_DEBUG_FILE
#ifdef _CREATE_BPM_DEBUG_FILE
#define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt"
static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
{
FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
int i;
if (fptr)
{
printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
for (i = minpos; i < maxpos; i ++)
{
fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
}
fclose(fptr);
}
}
#else
#define _SaveDebugData(a,b,c,d)
#endif
////////////////////////////////////////////////////////////////////////////////
BPMDetect::BPMDetect(int numChannels, int aSampleRate)
{
this->sampleRate = aSampleRate;
this->channels = numChannels;
decimateSum = 0;
decimateCount = 0;
// choose decimation factor so that result is approx. 1000 Hz
decimateBy = sampleRate / target_srate;
assert(decimateBy > 0);
assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
// Calculate window length & starting item according to desired min & max bpms
windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
assert(windowLen > windowStart);
// allocate new working objects
xcorr = new float[windowLen];
memset(xcorr, 0, windowLen * sizeof(float));
// allocate processing buffer
buffer = new FIFOSampleBuffer();
// we do processing in mono mode
buffer->setChannels(1);
buffer->clear();
}
BPMDetect::~BPMDetect()
{
delete[] xcorr;
delete buffer;
}
/// convert to mono, low-pass filter & decimate to about 500 Hz.
/// return number of outputted samples.
///
/// Decimation is used to remove the unnecessary frequencies and thus to reduce
/// the amount of data needed to be processed as calculating autocorrelation
/// function is a very-very heavy operation.
///
/// Anti-alias filtering is done simply by averaging the samples. This is really a
/// poor-man's anti-alias filtering, but it's not so critical in this kind of application
/// (it'd also be difficult to design a high-quality filter with steep cut-off at very
/// narrow band)
int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
{
int count, outcount;
LONG_SAMPLETYPE out;
assert(channels > 0);
assert(decimateBy > 0);
outcount = 0;
for (count = 0; count < numsamples; count ++)
{
int j;
// convert to mono and accumulate
for (j = 0; j < channels; j ++)
{
decimateSum += src[j];
}
src += j;
decimateCount ++;
if (decimateCount >= decimateBy)
{
// Store every Nth sample only
out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
decimateSum = 0;
decimateCount = 0;
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
// check ranges for sure (shouldn't actually be necessary)
if (out > 32767)
{
out = 32767;
}
else if (out < -32768)
{
out = -32768;
}
#endif // SOUNDTOUCH_INTEGER_SAMPLES
dest[outcount] = (SAMPLETYPE)out;
outcount ++;
}
}
return outcount;
}
// Calculates autocorrelation function of the sample history buffer
void BPMDetect::updateXCorr(int process_samples)
{
int offs;
SAMPLETYPE *pBuffer;
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
pBuffer = buffer->ptrBegin();
// calculate decay factor for xcorr filtering
float xcorr_decay = (float)pow(0.5, 1.0 / (xcorr_decay_time_constant * target_srate / process_samples));
#pragma omp parallel for
for (offs = windowStart; offs < windowLen; offs ++)
{
LONG_SAMPLETYPE sum;
int i;
sum = 0;
for (i = 0; i < process_samples; i ++)
{
sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
}
xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable time constant.
xcorr[offs] += (float)fabs(sum);
}
}
void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
{
SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
// iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
while (numSamples > 0)
{
int block;
int decSamples;
block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
// decimate. note that converts to mono at the same time
decSamples = decimate(decimated, samples, block);
samples += block * channels;
numSamples -= block;
buffer->putSamples(decimated, decSamples);
}
// when the buffer has enought samples for processing...
while ((int)buffer->numSamples() >= windowLen + xcorr_update_sequence)
{
// ... calculate autocorrelations for oldest samples...
updateXCorr(xcorr_update_sequence);
// ... and remove these from the buffer
buffer->receiveSamples(xcorr_update_sequence);
}
}
void BPMDetect::removeBias()
{
int i;
float minval = 1e12f; // arbitrary large number
for (i = windowStart; i < windowLen; i ++)
{
if (xcorr[i] < minval)
{
minval = xcorr[i];
}
}
for (i = windowStart; i < windowLen; i ++)
{
xcorr[i] -= minval;
}
}
float BPMDetect::getBpm()
{
double peakPos;
double coeff;
PeakFinder peakFinder;
coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
// save bpm debug analysis data if debug data enabled
_SaveDebugData(xcorr, windowStart, windowLen, coeff);
// remove bias from xcorr data
removeBias();
// find peak position
peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
assert(decimateBy != 0);
if (peakPos < 1e-9) return 0.0; // detection failed.
// calculate BPM
return (float) (coeff / peakPos);
}

2
src/FIFOSampleBuffer.cpp Normal file → Executable file
View File

@ -15,7 +15,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2012-11-08 20:53:01 +0200 (Thu, 08 Nov 2012) $
// Last changed : $Date: 2012-11-08 20:53:01 +0200 (to, 08 marras 2012) $
// File revision : $Revision: 4 $
//
// $Id: FIFOSampleBuffer.cpp 160 2012-11-08 18:53:01Z oparviai $

12
src/FIRFilter.cpp Normal file → Executable file
View File

@ -2,19 +2,25 @@
///
/// General FIR digital filter routines with MMX optimization.
///
/// Note : MMX optimized functions reside in a separate, platform-specific file,
/// Notes : MMX optimized functions reside in a separate, platform-specific file,
/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
///
/// This source file contains OpenMP optimizations that allow speeding up the
/// corss-correlation algorithm by executing it in several threads / CPU cores
/// in parallel. See the following article link for more detailed discussion
/// about SoundTouch OpenMP optimizations:
/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-02-21 23:24:29 +0200 (Sat, 21 Feb 2015) $
// Last changed : $Date: 2015-11-05 19:46:08 +0200 (to, 05 marras 2015) $
// File revision : $Revision: 4 $
//
// $Id: FIRFilter.cpp 202 2015-02-21 21:24:29Z oparviai $
// $Id: FIRFilter.cpp 234 2015-11-05 17:46:08Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//

2
src/FIRFilter.h Normal file → Executable file
View File

@ -11,7 +11,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-02-21 23:24:29 +0200 (Sat, 21 Feb 2015) $
// Last changed : $Date: 2015-02-21 23:24:29 +0200 (la, 21 helmi 2015) $
// File revision : $Revision: 4 $
//
// $Id: FIRFilter.h 202 2015-02-21 21:24:29Z oparviai $

200
src/InterpolateCubic.cpp Executable file
View File

@ -0,0 +1,200 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Cubic interpolation routine.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// $Id: InterpolateCubic.cpp 179 2014-01-06 18:41:42Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include <stddef.h>
#include <math.h>
#include "InterpolateCubic.h"
#include "STTypes.h"
using namespace soundtouch;
// cubic interpolation coefficients
static const float _coeffs[]=
{ -0.5f, 1.0f, -0.5f, 0.0f,
1.5f, -2.5f, 0.0f, 1.0f,
-1.5f, 2.0f, 0.5f, 0.0f,
0.5f, -0.5f, 0.0f, 0.0f};
InterpolateCubic::InterpolateCubic()
{
fract = 0;
}
void InterpolateCubic::resetRegisters()
{
fract = 0;
}
/// Transpose mono audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateCubic::transposeMono(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
{
int i;
int srcSampleEnd = srcSamples - 4;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
{
float out;
const float x3 = 1.0f;
const float x2 = (float)fract; // x
const float x1 = x2*x2; // x^2
const float x0 = x1*x2; // x^3
float y0, y1, y2, y3;
assert(fract < 1.0);
y0 = _coeffs[0] * x0 + _coeffs[1] * x1 + _coeffs[2] * x2 + _coeffs[3] * x3;
y1 = _coeffs[4] * x0 + _coeffs[5] * x1 + _coeffs[6] * x2 + _coeffs[7] * x3;
y2 = _coeffs[8] * x0 + _coeffs[9] * x1 + _coeffs[10] * x2 + _coeffs[11] * x3;
y3 = _coeffs[12] * x0 + _coeffs[13] * x1 + _coeffs[14] * x2 + _coeffs[15] * x3;
out = y0 * psrc[0] + y1 * psrc[1] + y2 * psrc[2] + y3 * psrc[3];
pdest[i] = (SAMPLETYPE)out;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += whole;
srcCount += whole;
}
srcSamples = srcCount;
return i;
}
/// Transpose stereo audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateCubic::transposeStereo(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
{
int i;
int srcSampleEnd = srcSamples - 4;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
{
const float x3 = 1.0f;
const float x2 = (float)fract; // x
const float x1 = x2*x2; // x^2
const float x0 = x1*x2; // x^3
float y0, y1, y2, y3;
float out0, out1;
assert(fract < 1.0);
y0 = _coeffs[0] * x0 + _coeffs[1] * x1 + _coeffs[2] * x2 + _coeffs[3] * x3;
y1 = _coeffs[4] * x0 + _coeffs[5] * x1 + _coeffs[6] * x2 + _coeffs[7] * x3;
y2 = _coeffs[8] * x0 + _coeffs[9] * x1 + _coeffs[10] * x2 + _coeffs[11] * x3;
y3 = _coeffs[12] * x0 + _coeffs[13] * x1 + _coeffs[14] * x2 + _coeffs[15] * x3;
out0 = y0 * psrc[0] + y1 * psrc[2] + y2 * psrc[4] + y3 * psrc[6];
out1 = y0 * psrc[1] + y1 * psrc[3] + y2 * psrc[5] + y3 * psrc[7];
pdest[2*i] = (SAMPLETYPE)out0;
pdest[2*i+1] = (SAMPLETYPE)out1;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += 2*whole;
srcCount += whole;
}
srcSamples = srcCount;
return i;
}
/// Transpose multi-channel audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateCubic::transposeMulti(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
{
int i;
int srcSampleEnd = srcSamples - 4;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
{
const float x3 = 1.0f;
const float x2 = (float)fract; // x
const float x1 = x2*x2; // x^2
const float x0 = x1*x2; // x^3
float y0, y1, y2, y3;
assert(fract < 1.0);
y0 = _coeffs[0] * x0 + _coeffs[1] * x1 + _coeffs[2] * x2 + _coeffs[3] * x3;
y1 = _coeffs[4] * x0 + _coeffs[5] * x1 + _coeffs[6] * x2 + _coeffs[7] * x3;
y2 = _coeffs[8] * x0 + _coeffs[9] * x1 + _coeffs[10] * x2 + _coeffs[11] * x3;
y3 = _coeffs[12] * x0 + _coeffs[13] * x1 + _coeffs[14] * x2 + _coeffs[15] * x3;
for (int c = 0; c < numChannels; c ++)
{
float out;
out = y0 * psrc[c] + y1 * psrc[c + numChannels] + y2 * psrc[c + 2 * numChannels] + y3 * psrc[c + 3 * numChannels];
pdest[0] = (SAMPLETYPE)out;
pdest ++;
}
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += numChannels*whole;
srcCount += whole;
}
srcSamples = srcCount;
return i;
}

67
src/InterpolateCubic.h Executable file
View File

@ -0,0 +1,67 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Cubic interpolation routine.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// $Id: InterpolateCubic.h 225 2015-07-26 14:45:48Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _InterpolateCubic_H_
#define _InterpolateCubic_H_
#include "RateTransposer.h"
#include "STTypes.h"
namespace soundtouch
{
class InterpolateCubic : public TransposerBase
{
protected:
virtual void resetRegisters();
virtual int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeMulti(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
double fract;
public:
InterpolateCubic();
};
}
#endif

0
src/InterpolateLinear.cpp Normal file → Executable file
View File

0
src/InterpolateLinear.h Normal file → Executable file
View File

185
src/InterpolateShannon.cpp Executable file
View File

@ -0,0 +1,185 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Sample interpolation routine using 8-tap band-limited Shannon interpolation
/// with kaiser window.
///
/// Notice. This algorithm is remarkably much heavier than linear or cubic
/// interpolation, and not remarkably better than cubic algorithm. Thus mostly
/// for experimental purposes
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// $Id: InterpolateShannon.cpp 195 2014-04-06 15:57:21Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include <math.h>
#include "InterpolateShannon.h"
#include "STTypes.h"
using namespace soundtouch;
/// Kaiser window with beta = 2.0
/// Values scaled down by 5% to avoid overflows
static const double _kaiser8[8] =
{
0.41778693317814,
0.64888025049173,
0.83508562409944,
0.93887857733412,
0.93887857733412,
0.83508562409944,
0.64888025049173,
0.41778693317814
};
InterpolateShannon::InterpolateShannon()
{
fract = 0;
}
void InterpolateShannon::resetRegisters()
{
fract = 0;
}
#define PI 3.1415926536
#define sinc(x) (sin(PI * (x)) / (PI * (x)))
/// Transpose mono audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateShannon::transposeMono(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
{
int i;
int srcSampleEnd = srcSamples - 8;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
{
double out;
assert(fract < 1.0);
out = psrc[0] * sinc(-3.0 - fract) * _kaiser8[0];
out += psrc[1] * sinc(-2.0 - fract) * _kaiser8[1];
out += psrc[2] * sinc(-1.0 - fract) * _kaiser8[2];
if (fract < 1e-6)
{
out += psrc[3] * _kaiser8[3]; // sinc(0) = 1
}
else
{
out += psrc[3] * sinc(- fract) * _kaiser8[3];
}
out += psrc[4] * sinc( 1.0 - fract) * _kaiser8[4];
out += psrc[5] * sinc( 2.0 - fract) * _kaiser8[5];
out += psrc[6] * sinc( 3.0 - fract) * _kaiser8[6];
out += psrc[7] * sinc( 4.0 - fract) * _kaiser8[7];
pdest[i] = (SAMPLETYPE)out;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += whole;
srcCount += whole;
}
srcSamples = srcCount;
return i;
}
/// Transpose stereo audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateShannon::transposeStereo(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
{
int i;
int srcSampleEnd = srcSamples - 8;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
{
double out0, out1, w;
assert(fract < 1.0);
w = sinc(-3.0 - fract) * _kaiser8[0];
out0 = psrc[0] * w; out1 = psrc[1] * w;
w = sinc(-2.0 - fract) * _kaiser8[1];
out0 += psrc[2] * w; out1 += psrc[3] * w;
w = sinc(-1.0 - fract) * _kaiser8[2];
out0 += psrc[4] * w; out1 += psrc[5] * w;
w = _kaiser8[3] * ((fract < 1e-5) ? 1.0 : sinc(- fract)); // sinc(0) = 1
out0 += psrc[6] * w; out1 += psrc[7] * w;
w = sinc( 1.0 - fract) * _kaiser8[4];
out0 += psrc[8] * w; out1 += psrc[9] * w;
w = sinc( 2.0 - fract) * _kaiser8[5];
out0 += psrc[10] * w; out1 += psrc[11] * w;
w = sinc( 3.0 - fract) * _kaiser8[6];
out0 += psrc[12] * w; out1 += psrc[13] * w;
w = sinc( 4.0 - fract) * _kaiser8[7];
out0 += psrc[14] * w; out1 += psrc[15] * w;
pdest[2*i] = (SAMPLETYPE)out0;
pdest[2*i+1] = (SAMPLETYPE)out1;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += 2*whole;
srcCount += whole;
}
srcSamples = srcCount;
return i;
}
/// Transpose stereo audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateShannon::transposeMulti(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
{
// not implemented
assert(false);
return 0;
}

72
src/InterpolateShannon.h Executable file
View File

@ -0,0 +1,72 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Sample interpolation routine using 8-tap band-limited Shannon interpolation
/// with kaiser window.
///
/// Notice. This algorithm is remarkably much heavier than linear or cubic
/// interpolation, and not remarkably better than cubic algorithm. Thus mostly
/// for experimental purposes
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// $Id: InterpolateShannon.h 225 2015-07-26 14:45:48Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _InterpolateShannon_H_
#define _InterpolateShannon_H_
#include "RateTransposer.h"
#include "STTypes.h"
namespace soundtouch
{
class InterpolateShannon : public TransposerBase
{
protected:
void resetRegisters();
int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
int transposeMulti(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
double fract;
public:
InterpolateShannon();
};
}
#endif

286
src/PeakFinder.cpp Executable file
View File

@ -0,0 +1,286 @@
////////////////////////////////////////////////////////////////////////////////
///
/// Peak detection routine.
///
/// The routine detects highest value on an array of values and calculates the
/// precise peak location as a mass-center of the 'hump' around the peak value.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-05-18 18:22:02 +0300 (ma, 18 touko 2015) $
// File revision : $Revision: 4 $
//
// $Id: PeakFinder.cpp 213 2015-05-18 15:22:02Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include <math.h>
#include <assert.h>
#include "PeakFinder.h"
using namespace soundtouch;
#define max(x, y) (((x) > (y)) ? (x) : (y))
PeakFinder::PeakFinder()
{
minPos = maxPos = 0;
}
// Finds real 'top' of a peak hump from neighnourhood of the given 'peakpos'.
int PeakFinder::findTop(const float *data, int peakpos) const
{
int i;
int start, end;
float refvalue;
refvalue = data[peakpos];
// seek within ±10 points
start = peakpos - 10;
if (start < minPos) start = minPos;
end = peakpos + 10;
if (end > maxPos) end = maxPos;
for (i = start; i <= end; i ++)
{
if (data[i] > refvalue)
{
peakpos = i;
refvalue = data[i];
}
}
// failure if max value is at edges of seek range => it's not peak, it's at slope.
if ((peakpos == start) || (peakpos == end)) return 0;
return peakpos;
}
// Finds 'ground level' of a peak hump by starting from 'peakpos' and proceeding
// to direction defined by 'direction' until next 'hump' after minimum value will
// begin
int PeakFinder::findGround(const float *data, int peakpos, int direction) const
{
int lowpos;
int pos;
int climb_count;
float refvalue;
float delta;
climb_count = 0;
refvalue = data[peakpos];
lowpos = peakpos;
pos = peakpos;
while ((pos > minPos+1) && (pos < maxPos-1))
{
int prevpos;
prevpos = pos;
pos += direction;
// calculate derivate
delta = data[pos] - data[prevpos];
if (delta <= 0)
{
// going downhill, ok
if (climb_count)
{
climb_count --; // decrease climb count
}
// check if new minimum found
if (data[pos] < refvalue)
{
// new minimum found
lowpos = pos;
refvalue = data[pos];
}
}
else
{
// going uphill, increase climbing counter
climb_count ++;
if (climb_count > 5) break; // we've been climbing too long => it's next uphill => quit
}
}
return lowpos;
}
// Find offset where the value crosses the given level, when starting from 'peakpos' and
// proceeds to direction defined in 'direction'
int PeakFinder::findCrossingLevel(const float *data, float level, int peakpos, int direction) const
{
float peaklevel;
int pos;
peaklevel = data[peakpos];
assert(peaklevel >= level);
pos = peakpos;
while ((pos >= minPos) && (pos < maxPos))
{
if (data[pos + direction] < level) return pos; // crossing found
pos += direction;
}
return -1; // not found
}
// Calculates the center of mass location of 'data' array items between 'firstPos' and 'lastPos'
double PeakFinder::calcMassCenter(const float *data, int firstPos, int lastPos) const
{
int i;
float sum;
float wsum;
sum = 0;
wsum = 0;
for (i = firstPos; i <= lastPos; i ++)
{
sum += (float)i * data[i];
wsum += data[i];
}
if (wsum < 1e-6) return 0;
return sum / wsum;
}
/// get exact center of peak near given position by calculating local mass of center
double PeakFinder::getPeakCenter(const float *data, int peakpos) const
{
float peakLevel; // peak level
int crosspos1, crosspos2; // position where the peak 'hump' crosses cutting level
float cutLevel; // cutting value
float groundLevel; // ground level of the peak
int gp1, gp2; // bottom positions of the peak 'hump'
// find ground positions.
gp1 = findGround(data, peakpos, -1);
gp2 = findGround(data, peakpos, 1);
peakLevel = data[peakpos];
if (gp1 == gp2)
{
// avoid rounding errors when all are equal
assert(gp1 == peakpos);
cutLevel = groundLevel = peakLevel;
} else {
// get average of the ground levels
groundLevel = 0.5f * (data[gp1] + data[gp2]);
// calculate 70%-level of the peak
cutLevel = 0.70f * peakLevel + 0.30f * groundLevel;
}
// find mid-level crossings
crosspos1 = findCrossingLevel(data, cutLevel, peakpos, -1);
crosspos2 = findCrossingLevel(data, cutLevel, peakpos, 1);
if ((crosspos1 < 0) || (crosspos2 < 0)) return 0; // no crossing, no peak..
// calculate mass center of the peak surroundings
return calcMassCenter(data, crosspos1, crosspos2);
}
double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos)
{
int i;
int peakpos; // position of peak level
double highPeak, peak;
this->minPos = aminPos;
this->maxPos = amaxPos;
// find absolute peak
peakpos = minPos;
peak = data[minPos];
for (i = minPos + 1; i < maxPos; i ++)
{
if (data[i] > peak)
{
peak = data[i];
peakpos = i;
}
}
// Calculate exact location of the highest peak mass center
highPeak = getPeakCenter(data, peakpos);
peak = highPeak;
// Now check if the highest peak were in fact harmonic of the true base beat peak
// - sometimes the highest peak can be Nth harmonic of the true base peak yet
// just a slightly higher than the true base
for (i = 3; i < 10; i ++)
{
double peaktmp, harmonic;
int i1,i2;
harmonic = (double)i * 0.5;
peakpos = (int)(highPeak / harmonic + 0.5f);
if (peakpos < minPos) break;
peakpos = findTop(data, peakpos); // seek true local maximum index
if (peakpos == 0) continue; // no local max here
// calculate mass-center of possible harmonic peak
peaktmp = getPeakCenter(data, peakpos);
// accept harmonic peak if
// (a) it is found
// (b) is within ±4% of the expected harmonic interval
// (c) has at least half x-corr value of the max. peak
double diff = harmonic * peaktmp / highPeak;
if ((diff < 0.96) || (diff > 1.04)) continue; // peak too afar from expected
// now compare to highest detected peak
i1 = (int)(highPeak + 0.5);
i2 = (int)(peaktmp + 0.5);
if (data[i2] >= 0.4*data[i1])
{
// The harmonic is at least half as high primary peak,
// thus use the harmonic peak instead
peak = peaktmp;
}
}
return peak;
}

97
src/PeakFinder.h Executable file
View File

@ -0,0 +1,97 @@
////////////////////////////////////////////////////////////////////////////////
///
/// The routine detects highest value on an array of values and calculates the
/// precise peak location as a mass-center of the 'hump' around the peak value.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2011-12-30 22:33:46 +0200 (pe, 30 joulu 2011) $
// File revision : $Revision: 4 $
//
// $Id: PeakFinder.h 132 2011-12-30 20:33:46Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _PeakFinder_H_
#define _PeakFinder_H_
namespace soundtouch
{
class PeakFinder
{
protected:
/// Min, max allowed peak positions within the data vector
int minPos, maxPos;
/// Calculates the mass center between given vector items.
double calcMassCenter(const float *data, ///< Data vector.
int firstPos, ///< Index of first vector item beloging to the peak.
int lastPos ///< Index of last vector item beloging to the peak.
) const;
/// Finds the data vector index where the monotoniously decreasing signal crosses the
/// given level.
int findCrossingLevel(const float *data, ///< Data vector.
float level, ///< Goal crossing level.
int peakpos, ///< Peak position index within the data vector.
int direction /// Direction where to proceed from the peak: 1 = right, -1 = left.
) const;
// Finds real 'top' of a peak hump from neighnourhood of the given 'peakpos'.
int findTop(const float *data, int peakpos) const;
/// Finds the 'ground' level, i.e. smallest level between two neighbouring peaks, to right-
/// or left-hand side of the given peak position.
int findGround(const float *data, /// Data vector.
int peakpos, /// Peak position index within the data vector.
int direction /// Direction where to proceed from the peak: 1 = right, -1 = left.
) const;
/// get exact center of peak near given position by calculating local mass of center
double getPeakCenter(const float *data, int peakpos) const;
public:
/// Constructor.
PeakFinder();
/// Detect exact peak position of the data vector by finding the largest peak 'hump'
/// and calculating the mass-center location of the peak hump.
///
/// \return The location of the largest base harmonic peak hump.
double detectPeak(const float *data, /// Data vector to be analyzed. The data vector has
/// to be at least 'maxPos' items long.
int minPos, ///< Min allowed peak location within the vector data.
int maxPos ///< Max allowed peak location within the vector data.
);
};
}
#endif // _PeakFinder_H_

13
src/RateTransposer.cpp Normal file → Executable file
View File

@ -10,10 +10,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-07-26 17:45:48 +0300 (Sun, 26 Jul 2015) $
// Last changed : $Date: 2016-10-15 22:34:59 +0300 (la, 15 loka 2016) $
// File revision : $Revision: 4 $
//
// $Id: RateTransposer.cpp 225 2015-07-26 14:45:48Z oparviai $
// $Id: RateTransposer.cpp 243 2016-10-15 19:34:59Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -44,6 +44,8 @@
#include <stdio.h>
#include "RateTransposer.h"
#include "InterpolateLinear.h"
#include "InterpolateCubic.h"
#include "InterpolateShannon.h"
#include "AAFilter.h"
using namespace soundtouch;
@ -206,6 +208,13 @@ int RateTransposer::isEmpty() const
}
/// Return approximate initial input-output latency
int RateTransposer::getLatency() const
{
return (bUseAAFilter) ? pAAFilter->getLength() : 0;
}
//////////////////////////////////////////////////////////////////////////////
//
// TransposerBase - Base class for interpolation

7
src/RateTransposer.h Normal file → Executable file
View File

@ -14,10 +14,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-07-26 17:45:48 +0300 (Sun, 26 Jul 2015) $
// Last changed : $Date: 2016-10-15 22:34:59 +0300 (la, 15 loka 2016) $
// File revision : $Revision: 4 $
//
// $Id: RateTransposer.h 225 2015-07-26 14:45:48Z oparviai $
// $Id: RateTransposer.h 243 2016-10-15 19:34:59Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -172,6 +172,9 @@ public:
/// Returns nonzero if there aren't any samples available for outputting.
int isEmpty() const;
/// Return approximate initial input-output latency
int getLatency() const;
};
}

116
src/SoundTouch.cpp Normal file → Executable file
View File

@ -41,10 +41,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-07-26 17:45:48 +0300 (Sun, 26 Jul 2015) $
// Last changed : $Date: 2016-10-15 22:34:59 +0300 (la, 15 loka 2016) $
// File revision : $Revision: 4 $
//
// $Id: SoundTouch.cpp 225 2015-07-26 14:45:48Z oparviai $
// $Id: SoundTouch.cpp 243 2016-10-15 19:34:59Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -110,8 +110,8 @@ SoundTouch::SoundTouch()
calcEffectiveRateAndTempo();
samplesExpectedOut = 0;
samplesOutput = 0;
samplesExpectedOut = 0;
samplesOutput = 0;
channels = 0;
bSrateSet = false;
@ -149,7 +149,7 @@ void SoundTouch::setChannels(uint numChannels)
/*if (numChannels != 1 && numChannels != 2)
{
//ST_THROW_RT_ERROR("Illegal number of channels");
return;
return;
}*/
channels = numChannels;
pRateTransposer->setChannels((int)numChannels);
@ -240,11 +240,11 @@ void SoundTouch::calcEffectiveRateAndTempo()
double oldTempo = tempo;
double oldRate = rate;
tempo = virtualTempo / virtualPitch;
rate = virtualPitch * virtualRate;
tempo = virtualTempo / virtualPitch;
rate = virtualPitch * virtualRate;
if (!TEST_FLOAT_EQUAL(rate,oldRate)) pRateTransposer->setRate(rate);
if (!TEST_FLOAT_EQUAL(tempo, oldTempo)) pTDStretch->setTempo(tempo);
if (!TEST_FLOAT_EQUAL(tempo, oldTempo)) pTDStretch->setTempo(tempo);
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
if (rate <= 1.0f)
@ -321,9 +321,9 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
}
*/
// accumulate how many samples are expected out from processing, given the current
// processing setting
samplesExpectedOut += (double)nSamples / ((double)rate * (double)tempo);
// accumulate how many samples are expected out from processing, given the current
// processing setting
samplesExpectedOut += (double)nSamples / ((double)rate * (double)tempo);
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
if (rate <= 1.0f)
@ -354,23 +354,24 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
void SoundTouch::flush()
{
int i;
int numStillExpected;
int numStillExpected;
SAMPLETYPE *buff = new SAMPLETYPE[128 * channels];
// how many samples are still expected to output
numStillExpected = (int)((long)(samplesExpectedOut + 0.5) - samplesOutput);
// how many samples are still expected to output
numStillExpected = (int)((long)(samplesExpectedOut + 0.5) - samplesOutput);
if (numStillExpected < 0) numStillExpected = 0;
memset(buff, 0, 128 * channels * sizeof(SAMPLETYPE));
// "Push" the last active samples out from the processing pipeline by
// feeding blank samples into the processing pipeline until new,
// processed samples appear in the output (not however, more than
// 24ksamples in any case)
for (i = 0; (numStillExpected > (int)numSamples()) && (i < 200); i ++)
{
putSamples(buff, 128);
}
for (i = 0; (numStillExpected > (int)numSamples()) && (i < 200); i ++)
{
putSamples(buff, 128);
}
adjustAmountOfSamples(numStillExpected);
adjustAmountOfSamples(numStillExpected);
delete[] buff;
@ -446,7 +447,7 @@ int SoundTouch::getSetting(int settingId) const
return pRateTransposer->getAAFilter()->getLength();
case SETTING_USE_QUICKSEEK :
return (uint) pTDStretch->isQuickSeekEnabled();
return (uint)pTDStretch->isQuickSeekEnabled();
case SETTING_SEQUENCE_MS:
pTDStretch->getParameters(NULL, &temp, NULL, NULL);
@ -460,23 +461,65 @@ int SoundTouch::getSetting(int settingId) const
pTDStretch->getParameters(NULL, NULL, NULL, &temp);
return temp;
case SETTING_NOMINAL_INPUT_SEQUENCE :
return pTDStretch->getInputSampleReq();
case SETTING_NOMINAL_INPUT_SEQUENCE :
{
int size = pTDStretch->getInputSampleReq();
case SETTING_NOMINAL_OUTPUT_SEQUENCE :
return pTDStretch->getOutputBatchSize();
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
if (rate <= 1.0)
{
// transposing done before timestretch, which impacts latency
return (int)(size * rate + 0.5);
}
#endif
return size;
}
default :
case SETTING_NOMINAL_OUTPUT_SEQUENCE :
{
int size = pTDStretch->getOutputBatchSize();
if (rate > 1.0)
{
// transposing done after timestretch, which impacts latency
return (int)(size / rate + 0.5);
}
return size;
}
case SETTING_INITIAL_LATENCY:
{
double latency = pTDStretch->getLatency();
int latency_tr = pRateTransposer->getLatency();
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
if (rate <= 1.0)
{
// transposing done before timestretch, which impacts latency
latency = (latency + latency_tr) * rate;
}
else
#endif
{
latency += (double)latency_tr / rate;
}
return (int)(latency + 0.5);
}
default :
return 0;
}
}
// Clears all the samples in the object's output and internal processing
// buffers.
void SoundTouch::clear()
{
samplesExpectedOut = 0;
samplesExpectedOut = 0;
samplesOutput = 0;
pRateTransposer->clear();
pTDStretch->clear();
}
@ -507,9 +550,9 @@ uint SoundTouch::numUnprocessedSamples() const
/// \return Number of samples returned.
uint SoundTouch::receiveSamples(SAMPLETYPE *output, uint maxSamples)
{
uint ret = FIFOProcessor::receiveSamples(output, maxSamples);
samplesOutput += (long)ret;
return ret;
uint ret = FIFOProcessor::receiveSamples(output, maxSamples);
samplesOutput += (long)ret;
return ret;
}
@ -520,7 +563,16 @@ uint SoundTouch::receiveSamples(SAMPLETYPE *output, uint maxSamples)
/// with 'ptrBegin' function.
uint SoundTouch::receiveSamples(uint maxSamples)
{
uint ret = FIFOProcessor::receiveSamples(maxSamples);
samplesOutput += (long)ret;
return ret;
uint ret = FIFOProcessor::receiveSamples(maxSamples);
samplesOutput += (long)ret;
return ret;
}
/// Get ratio between input and output audio durations, useful for calculating
/// processed output duration: if you'll process a stream of N samples, then
/// you can expect to get out N * getInputOutputSampleRatio() samples.
double SoundTouch::getInputOutputSampleRatio()
{
return 1.0 / (tempo * rate);
}

104
src/TDStretch.cpp Normal file → Executable file
View File

@ -4,8 +4,14 @@
/// while maintaining the original pitch by using a time domain WSOLA-like
/// method with several performance-increasing tweaks.
///
/// Note : MMX optimized functions reside in a separate, platform-specific
/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
/// Notes : MMX optimized functions reside in a separate, platform-specific
/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
///
/// This source file contains OpenMP optimizations that allow speeding up the
/// corss-correlation algorithm by executing it in several threads / CPU cores
/// in parallel. See the following article link for more detailed discussion
/// about SoundTouch OpenMP optimizations:
/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
@ -13,10 +19,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-08-09 00:00:15 +0300 (Sun, 09 Aug 2015) $
// Last changed : $Date: 2017-04-07 22:01:22 +0300 (pe, 07 huhti 2017) $
// File revision : $Revision: 1.12 $
//
// $Id: TDStretch.cpp 226 2015-08-08 21:00:15Z oparviai $
// $Id: TDStretch.cpp 249 2017-04-07 19:01:22Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -219,6 +225,7 @@ void TDStretch::clearInput()
{
inputBuffer.clear();
clearMidBuffer();
isBeginning = true;
}
@ -297,12 +304,13 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
int i;
double norm;
bestCorr = FLT_MIN;
bestCorr = -FLT_MAX;
bestOffs = 0;
// Scans for the best correlation value by testing each possible position
// over the permitted range.
bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
bestCorr = (bestCorr + 0.1) * 0.75;
#pragma omp parallel for
for (i = 1; i < seekLength; i ++)
@ -373,12 +381,10 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
// note: 'float' types used in this function in case that the platform would need to use software-fp
bestCorr = FLT_MIN;
bestOffs = SCANWIND;
bestCorr2 = FLT_MIN;
bestOffs2 = 0;
int best = 0;
bestCorr =
bestCorr2 = -FLT_MAX;
bestOffs =
bestOffs2 = SCANWIND;
// Scans for the best correlation value by testing each possible position
// over the permitted range. Look for two best matches on the first pass to
@ -436,7 +442,6 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
{
bestCorr = corr;
bestOffs = i;
best = 1;
}
}
@ -458,7 +463,6 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
{
bestCorr = corr;
bestOffs = i;
best = 2;
}
}
@ -520,13 +524,13 @@ void TDStretch::calcSeqParameters()
#define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
// sequence-ms setting values at above low & top tempo
#define AUTOSEQ_AT_MIN 125.0
#define AUTOSEQ_AT_MAX 50.0
#define AUTOSEQ_AT_MIN 90.0
#define AUTOSEQ_AT_MAX 40.0
#define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
#define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
// seek-window-ms setting values at above low & top tempoq
#define AUTOSEEK_AT_MIN 25.0
#define AUTOSEEK_AT_MIN 20.0
#define AUTOSEEK_AT_MAX 15.0
#define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
#define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
@ -637,7 +641,8 @@ void TDStretch::processNominalTempo()
// the result into 'outputBuffer'
void TDStretch::processSamples()
{
int ovlSkip, offset;
int ovlSkip;
int offset = 0;
int temp;
/* Removed this small optimization - can introduce a click to sound when tempo setting
@ -654,35 +659,61 @@ void TDStretch::processSamples()
// to form a processing frame.
while ((int)inputBuffer.numSamples() >= sampleReq)
{
// If tempo differs from the normal ('SCALE'), scan for the best overlapping
// position
offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
if (isBeginning == false)
{
// apart from the very beginning of the track,
// scan for the best overlapping position & do overlap-add
offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
// Mix the samples in the 'inputBuffer' at position of 'offset' with the
// samples in 'midBuffer' using sliding overlapping
// ... first partially overlap with the end of the previous sequence
// (that's in 'midBuffer')
overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
outputBuffer.putSamples((uint)overlapLength);
// Mix the samples in the 'inputBuffer' at position of 'offset' with the
// samples in 'midBuffer' using sliding overlapping
// ... first partially overlap with the end of the previous sequence
// (that's in 'midBuffer')
overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
outputBuffer.putSamples((uint)overlapLength);
offset += overlapLength;
}
else
{
// Adjust processing offset at beginning of track by not perform initial overlapping
// and compensating that in the 'input buffer skip' calculation
isBeginning = false;
int skip = (int)(tempo * overlapLength + 0.5);
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
#ifdef SOUNDTOUCH_ALLOW_SSE
// if SSE mode, round the skip amount to value corresponding to aligned memory address
if (channels == 1)
{
skip &= -4;
}
else if (channels == 2)
{
skip &= -2;
}
#endif
#endif
skipFract -= skip;
assert(nominalSkip >= -skipFract);
}
// ... then copy sequence samples from 'inputBuffer' to output:
// length of sequence
temp = (seekWindowLength - 2 * overlapLength);
// crosscheck that we don't have buffer overflow...
if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
{
continue; // just in case, shouldn't really happen
}
outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
// length of sequence
temp = (seekWindowLength - 2 * overlapLength);
outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
// Copies the end of the current sequence from 'inputBuffer' to
// 'midBuffer' for being mixed with the beginning of the next
// processing sequence and so on
assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength),
assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),
channels * sizeof(SAMPLETYPE) * overlapLength);
// Remove the processed samples from the input buffer. Update
@ -879,7 +910,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
if (lnorm > maxnorm)
{
maxnorm = lnorm;
// modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
#pragma omp critical
if (lnorm > maxnorm)
{
maxnorm = lnorm;
}
}
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation

12
src/TDStretch.h Normal file → Executable file
View File

@ -13,10 +13,10 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-08-09 00:00:15 +0300 (Sun, 09 Aug 2015) $
// Last changed : $Date: 2016-10-20 19:30:11 +0300 (to, 20 loka 2016) $
// File revision : $Revision: 4 $
//
// $Id: TDStretch.h 226 2015-08-08 21:00:15Z oparviai $
// $Id: TDStretch.h 244 2016-10-20 16:30:11Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
@ -134,6 +134,7 @@ protected:
bool bQuickSeek;
bool bAutoSeqSetting;
bool bAutoSeekSetting;
bool isBeginning;
SAMPLETYPE *pMidBuffer;
SAMPLETYPE *pMidBufferUnaligned;
@ -247,6 +248,13 @@ public:
{
return seekWindowLength - overlapLength;
}
/// return approximate initial input-output latency
int getLatency() const
{
return sampleReq;
}
};

2
src/cpu_detect.h Normal file → Executable file
View File

@ -12,7 +12,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
// Last changed : $Date: 2008-02-10 18:26:55 +0200 (su, 10 helmi 2008) $
// File revision : $Revision: 4 $
//
// $Id: cpu_detect.h 11 2008-02-10 16:26:55Z oparviai $

2
src/cpu_detect_x86.cpp Normal file → Executable file
View File

@ -11,7 +11,7 @@
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2014-01-07 20:24:28 +0200 (Tue, 07 Jan 2014) $
// Last changed : $Date: 2014-01-07 20:24:28 +0200 (ti, 07 tammi 2014) $
// File revision : $Revision: 4 $
//
// $Id: cpu_detect_x86.cpp 183 2014-01-07 18:24:28Z oparviai $

400
src/mmx_optimized.cpp Executable file
View File

@ -0,0 +1,400 @@
////////////////////////////////////////////////////////////////////////////////
///
/// MMX optimized routines. All MMX optimized functions have been gathered into
/// this single source code file, regardless to their class or original source
/// code file, in order to ease porting the library to other compiler and
/// processor platforms.
///
/// The MMX-optimizations are programmed using MMX compiler intrinsics that
/// are supported both by Microsoft Visual C++ and GCC compilers, so this file
/// should compile with both toolsets.
///
/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++
/// 6.0 processor pack" update to support compiler intrinsic syntax. The update
/// is available for download at Microsoft Developers Network, see here:
/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2017-03-05 15:56:03 +0200 (su, 05 maalis 2017) $
// File revision : $Revision: 4 $
//
// $Id: mmx_optimized.cpp 247 2017-03-05 13:56:03Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include "STTypes.h"
#ifdef SOUNDTOUCH_ALLOW_MMX
// MMX routines available only with integer sample type
using namespace soundtouch;
//////////////////////////////////////////////////////////////////////////////
//
// implementation of MMX optimized functions of class 'TDStretchMMX'
//
//////////////////////////////////////////////////////////////////////////////
#include "TDStretch.h"
#include <mmintrin.h>
#include <limits.h>
#include <math.h>
// Calculates cross correlation of two buffers
double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm)
{
const __m64 *pVec1, *pVec2;
__m64 shifter;
__m64 accu, normaccu;
long corr, norm;
int i;
pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2;
shifter = _m_from_int(overlapDividerBitsNorm);
normaccu = accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
// during each round for improved CPU-level parallellization.
for (i = 0; i < channels * overlapLength / 16; i ++)
{
__m64 temp, temp2;
// dictionary of instructions:
// _m_pmaddwd : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3]
// _mm_add_pi32 : 2*32bit add
// _m_psrad : 32bit right-shift
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec2[1]), shifter));
temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec1[1]), shifter));
accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, temp2);
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec2[3]), shifter));
temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec1[3]), shifter));
accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, temp2);
pVec1 += 4;
pVec2 += 4;
}
// copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1
// and finally store the result into the variable "corr"
accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32));
corr = _m_to_int(accu);
normaccu = _mm_add_pi32(normaccu, _mm_srli_si64(normaccu, 32));
norm = _m_to_int(normaccu);
// Clear MMS state
_m_empty();
if (norm > (long)maxnorm)
{
// modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
#pragma omp critical
if (norm > (long)maxnorm)
{
maxnorm = norm;
}
}
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
dnorm = (double)norm;
return (double)corr / sqrt(dnorm < 1e-9 ? 1.0 : dnorm);
// Note: Warning about the missing EMMS instruction is harmless
// as it'll be called elsewhere.
}
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm)
{
const __m64 *pVec1, *pVec2;
__m64 shifter;
__m64 accu;
long corr, lnorm;
int i;
// cancel first normalizer tap from previous round
lnorm = 0;
for (i = 1; i <= channels; i ++)
{
lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBitsNorm;
}
pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2;
shifter = _m_from_int(overlapDividerBitsNorm);
accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
// during each round for improved CPU-level parallellization.
for (i = 0; i < channels * overlapLength / 16; i ++)
{
__m64 temp;
// dictionary of instructions:
// _m_pmaddwd : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3]
// _mm_add_pi32 : 2*32bit add
// _m_psrad : 32bit right-shift
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec2[1]), shifter));
accu = _mm_add_pi32(accu, temp);
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec2[3]), shifter));
accu = _mm_add_pi32(accu, temp);
pVec1 += 4;
pVec2 += 4;
}
// copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1
// and finally store the result into the variable "corr"
accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32));
corr = _m_to_int(accu);
// Clear MMS state
_m_empty();
// update normalizer with last samples of this round
pV1 = (short *)pVec1;
for (int j = 1; j <= channels; j ++)
{
lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBitsNorm;
}
dnorm += (double)lnorm;
if (lnorm > (long)maxnorm)
{
maxnorm = lnorm;
}
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm);
}
void TDStretchMMX::clearCrossCorrState()
{
// Clear MMS state
_m_empty();
//_asm EMMS;
}
// MMX-optimized version of the function overlapStereo
void TDStretchMMX::overlapStereo(short *output, const short *input) const
{
const __m64 *pVinput, *pVMidBuf;
__m64 *pVdest;
__m64 mix1, mix2, adder, shifter;
int i;
pVinput = (const __m64*)input;
pVMidBuf = (const __m64*)pMidBuffer;
pVdest = (__m64*)output;
// mix1 = mixer values for 1st stereo sample
// mix1 = mixer values for 2nd stereo sample
// adder = adder for updating mixer values after each round
mix1 = _mm_set_pi16(0, overlapLength, 0, overlapLength);
adder = _mm_set_pi16(1, -1, 1, -1);
mix2 = _mm_add_pi16(mix1, adder);
adder = _mm_add_pi16(adder, adder);
// Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
// overlapDividerBits calculation earlier.
shifter = _m_from_int(overlapDividerBitsPure + 1);
for (i = 0; i < overlapLength / 4; i ++)
{
__m64 temp1, temp2;
// load & shuffle data so that input & mixbuffer data samples are paired
temp1 = _mm_unpacklo_pi16(pVMidBuf[0], pVinput[0]); // = i0l m0l i0r m0r
temp2 = _mm_unpackhi_pi16(pVMidBuf[0], pVinput[0]); // = i1l m1l i1r m1r
// temp = (temp .* mix) >> shifter
temp1 = _mm_sra_pi32(_mm_madd_pi16(temp1, mix1), shifter);
temp2 = _mm_sra_pi32(_mm_madd_pi16(temp2, mix2), shifter);
pVdest[0] = _mm_packs_pi32(temp1, temp2); // pack 2*2*32bit => 4*16bit
// update mix += adder
mix1 = _mm_add_pi16(mix1, adder);
mix2 = _mm_add_pi16(mix2, adder);
// --- second round begins here ---
// load & shuffle data so that input & mixbuffer data samples are paired
temp1 = _mm_unpacklo_pi16(pVMidBuf[1], pVinput[1]); // = i2l m2l i2r m2r
temp2 = _mm_unpackhi_pi16(pVMidBuf[1], pVinput[1]); // = i3l m3l i3r m3r
// temp = (temp .* mix) >> shifter
temp1 = _mm_sra_pi32(_mm_madd_pi16(temp1, mix1), shifter);
temp2 = _mm_sra_pi32(_mm_madd_pi16(temp2, mix2), shifter);
pVdest[1] = _mm_packs_pi32(temp1, temp2); // pack 2*2*32bit => 4*16bit
// update mix += adder
mix1 = _mm_add_pi16(mix1, adder);
mix2 = _mm_add_pi16(mix2, adder);
pVinput += 2;
pVMidBuf += 2;
pVdest += 2;
}
_m_empty(); // clear MMS state
}
//////////////////////////////////////////////////////////////////////////////
//
// implementation of MMX optimized functions of class 'FIRFilter'
//
//////////////////////////////////////////////////////////////////////////////
#include "FIRFilter.h"
FIRFilterMMX::FIRFilterMMX() : FIRFilter()
{
filterCoeffsAlign = NULL;
filterCoeffsUnalign = NULL;
}
FIRFilterMMX::~FIRFilterMMX()
{
delete[] filterCoeffsUnalign;
}
// (overloaded) Calculates filter coefficients for MMX routine
void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor)
{
uint i;
FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);
// Ensure that filter coeffs array is aligned to 16-byte boundary
delete[] filterCoeffsUnalign;
filterCoeffsUnalign = new short[2 * newLength + 8];
filterCoeffsAlign = (short *)SOUNDTOUCH_ALIGN_POINTER_16(filterCoeffsUnalign);
// rearrange the filter coefficients for mmx routines
for (i = 0;i < length; i += 4)
{
filterCoeffsAlign[2 * i + 0] = coeffs[i + 0];
filterCoeffsAlign[2 * i + 1] = coeffs[i + 2];
filterCoeffsAlign[2 * i + 2] = coeffs[i + 0];
filterCoeffsAlign[2 * i + 3] = coeffs[i + 2];
filterCoeffsAlign[2 * i + 4] = coeffs[i + 1];
filterCoeffsAlign[2 * i + 5] = coeffs[i + 3];
filterCoeffsAlign[2 * i + 6] = coeffs[i + 1];
filterCoeffsAlign[2 * i + 7] = coeffs[i + 3];
}
}
// mmx-optimized version of the filter routine for stereo sound
uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const
{
// Create stack copies of the needed member variables for asm routines :
uint i, j;
__m64 *pVdest = (__m64*)dest;
if (length < 2) return 0;
for (i = 0; i < (numSamples - length) / 2; i ++)
{
__m64 accu1;
__m64 accu2;
const __m64 *pVsrc = (const __m64*)src;
const __m64 *pVfilter = (const __m64*)filterCoeffsAlign;
accu1 = accu2 = _mm_setzero_si64();
for (j = 0; j < lengthDiv8 * 2; j ++)
{
__m64 temp1, temp2;
temp1 = _mm_unpacklo_pi16(pVsrc[0], pVsrc[1]); // = l2 l0 r2 r0
temp2 = _mm_unpackhi_pi16(pVsrc[0], pVsrc[1]); // = l3 l1 r3 r1
accu1 = _mm_add_pi32(accu1, _mm_madd_pi16(temp1, pVfilter[0])); // += l2*f2+l0*f0 r2*f2+r0*f0
accu1 = _mm_add_pi32(accu1, _mm_madd_pi16(temp2, pVfilter[1])); // += l3*f3+l1*f1 r3*f3+r1*f1
temp1 = _mm_unpacklo_pi16(pVsrc[1], pVsrc[2]); // = l4 l2 r4 r2
accu2 = _mm_add_pi32(accu2, _mm_madd_pi16(temp2, pVfilter[0])); // += l3*f2+l1*f0 r3*f2+r1*f0
accu2 = _mm_add_pi32(accu2, _mm_madd_pi16(temp1, pVfilter[1])); // += l4*f3+l2*f1 r4*f3+r2*f1
// accu1 += l2*f2+l0*f0 r2*f2+r0*f0
// += l3*f3+l1*f1 r3*f3+r1*f1
// accu2 += l3*f2+l1*f0 r3*f2+r1*f0
// l4*f3+l2*f1 r4*f3+r2*f1
pVfilter += 2;
pVsrc += 2;
}
// accu >>= resultDivFactor
accu1 = _mm_srai_pi32(accu1, resultDivFactor);
accu2 = _mm_srai_pi32(accu2, resultDivFactor);
// pack 2*2*32bits => 4*16 bits
pVdest[0] = _mm_packs_pi32(accu1, accu2);
src += 4;
pVdest ++;
}
_m_empty(); // clear emms state
return (numSamples & 0xfffffffe) - length;
}
#endif // SOUNDTOUCH_ALLOW_MMX

372
src/sse_optimized.cpp Executable file
View File

@ -0,0 +1,372 @@
////////////////////////////////////////////////////////////////////////////////
///
/// SSE optimized routines for Pentium-III, Athlon-XP and later CPUs. All SSE
/// optimized functions have been gathered into this single source
/// code file, regardless to their class or original source code file, in order
/// to ease porting the library to other compiler and processor platforms.
///
/// The SSE-optimizations are programmed using SSE compiler intrinsics that
/// are supported both by Microsoft Visual C++ and GCC compilers, so this file
/// should compile with both toolsets.
///
/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++
/// 6.0 processor pack" update to support SSE instruction set. The update is
/// available for download at Microsoft Developers Network, see here:
/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
///
/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and
/// perform a search with keywords "processor pack".
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2015-08-09 00:00:15 +0300 (su, 09 elo 2015) $
// File revision : $Revision: 4 $
//
// $Id: sse_optimized.cpp 226 2015-08-08 21:00:15Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include "cpu_detect.h"
#include "STTypes.h"
using namespace soundtouch;
#ifdef SOUNDTOUCH_ALLOW_SSE
// SSE routines available only with float sample type
//////////////////////////////////////////////////////////////////////////////
//
// implementation of SSE optimized functions of class 'TDStretchSSE'
//
//////////////////////////////////////////////////////////////////////////////
#include "TDStretch.h"
#include <xmmintrin.h>
#include <math.h>
// Calculates cross correlation of two buffers
double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm)
{
int i;
const float *pVec1;
const __m128 *pVec2;
__m128 vSum, vNorm;
// Note. It means a major slow-down if the routine needs to tolerate
// unaligned __m128 memory accesses. It's way faster if we can skip
// unaligned slots and use _mm_load_ps instruction instead of _mm_loadu_ps.
// This can mean up to ~ 10-fold difference (incl. part of which is
// due to skipping every second round for stereo sound though).
//
// Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
// for choosing if this little cheating is allowed.
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
// Little cheating allowed, return valid correlation only for
// aligned locations, meaning every second round for stereo sound.
#define _MM_LOAD _mm_load_ps
if (((ulongptr)pV1) & 15) return -1e50; // skip unaligned locations
#else
// No cheating allowed, use unaligned load & take the resulting
// performance hit.
#define _MM_LOAD _mm_loadu_ps
#endif
// ensure overlapLength is divisible by 8
assert((overlapLength % 8) == 0);
// Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
// Note: pV2 _must_ be aligned to 16-bit boundary, pV1 need not.
pVec1 = (const float*)pV1;
pVec2 = (const __m128*)pV2;
vSum = vNorm = _mm_setzero_ps();
// Unroll the loop by factor of 4 * 4 operations. Use same routine for
// stereo & mono, for mono it just means twice the amount of unrolling.
for (i = 0; i < channels * overlapLength / 16; i ++)
{
__m128 vTemp;
// vSum += pV1[0..3] * pV2[0..3]
vTemp = _MM_LOAD(pVec1);
vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp ,pVec2[0]));
vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
// vSum += pV1[4..7] * pV2[4..7]
vTemp = _MM_LOAD(pVec1 + 4);
vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[1]));
vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
// vSum += pV1[8..11] * pV2[8..11]
vTemp = _MM_LOAD(pVec1 + 8);
vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[2]));
vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
// vSum += pV1[12..15] * pV2[12..15]
vTemp = _MM_LOAD(pVec1 + 12);
vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[3]));
vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
pVec1 += 16;
pVec2 += 4;
}
// return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
float *pvNorm = (float*)&vNorm;
float norm = (pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
anorm = norm;
float *pvSum = (float*)&vSum;
return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / sqrt(norm < 1e-9 ? 1.0 : norm);
/* This is approximately corresponding routine in C-language yet without normalization:
double corr, norm;
uint i;
// Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
corr = norm = 0.0;
for (i = 0; i < channels * overlapLength / 16; i ++)
{
corr += pV1[0] * pV2[0] +
pV1[1] * pV2[1] +
pV1[2] * pV2[2] +
pV1[3] * pV2[3] +
pV1[4] * pV2[4] +
pV1[5] * pV2[5] +
pV1[6] * pV2[6] +
pV1[7] * pV2[7] +
pV1[8] * pV2[8] +
pV1[9] * pV2[9] +
pV1[10] * pV2[10] +
pV1[11] * pV2[11] +
pV1[12] * pV2[12] +
pV1[13] * pV2[13] +
pV1[14] * pV2[14] +
pV1[15] * pV2[15];
for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j];
pV1 += 16;
pV2 += 16;
}
return corr / sqrt(norm);
*/
}
double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm)
{
// call usual calcCrossCorr function because SSE does not show big benefit of
// accumulating "norm" value, and also the "norm" rolling algorithm would get
// complicated due to SSE-specific alignment-vs-nonexact correlation rules.
return calcCrossCorr(pV1, pV2, norm);
}
//////////////////////////////////////////////////////////////////////////////
//
// implementation of SSE optimized functions of class 'FIRFilter'
//
//////////////////////////////////////////////////////////////////////////////
#include "FIRFilter.h"
FIRFilterSSE::FIRFilterSSE() : FIRFilter()
{
filterCoeffsAlign = NULL;
filterCoeffsUnalign = NULL;
}
FIRFilterSSE::~FIRFilterSSE()
{
delete[] filterCoeffsUnalign;
filterCoeffsAlign = NULL;
filterCoeffsUnalign = NULL;
}
// (overloaded) Calculates filter coefficients for SSE routine
void FIRFilterSSE::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor)
{
uint i;
float fDivider;
FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);
// Scale the filter coefficients so that it won't be necessary to scale the filtering result
// also rearrange coefficients suitably for SSE
// Ensure that filter coeffs array is aligned to 16-byte boundary
delete[] filterCoeffsUnalign;
filterCoeffsUnalign = new float[2 * newLength + 4];
filterCoeffsAlign = (float *)SOUNDTOUCH_ALIGN_POINTER_16(filterCoeffsUnalign);
fDivider = (float)resultDivider;
// rearrange the filter coefficients for mmx routines
for (i = 0; i < newLength; i ++)
{
filterCoeffsAlign[2 * i + 0] =
filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider;
}
}
// SSE-optimized version of the filter routine for stereo sound
uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint numSamples) const
{
int count = (int)((numSamples - length) & (uint)-2);
int j;
assert(count % 2 == 0);
if (count < 2) return 0;
assert(source != NULL);
assert(dest != NULL);
assert((length % 8) == 0);
assert(filterCoeffsAlign != NULL);
assert(((ulongptr)filterCoeffsAlign) % 16 == 0);
// filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
#pragma omp parallel for
for (j = 0; j < count; j += 2)
{
const float *pSrc;
float *pDest;
const __m128 *pFil;
__m128 sum1, sum2;
uint i;
pSrc = (const float*)source + j * 2; // source audio data
pDest = dest + j * 2; // destination audio data
pFil = (const __m128*)filterCoeffsAlign; // filter coefficients. NOTE: Assumes coefficients
// are aligned to 16-byte boundary
sum1 = sum2 = _mm_setzero_ps();
for (i = 0; i < length / 8; i ++)
{
// Unroll loop for efficiency & calculate filter for 2*2 stereo samples
// at each pass
// sum1 is accu for 2*2 filtered stereo sound data at the primary sound data offset
// sum2 is accu for 2*2 filtered stereo sound data for the next sound sample offset.
sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc) , pFil[0]));
sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 2), pFil[0]));
sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 4), pFil[1]));
sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 6), pFil[1]));
sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 8) , pFil[2]));
sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 10), pFil[2]));
sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 12), pFil[3]));
sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 14), pFil[3]));
pSrc += 16;
pFil += 4;
}
// Now sum1 and sum2 both have a filtered 2-channel sample each, but we still need
// to sum the two hi- and lo-floats of these registers together.
// post-shuffle & add the filtered values and store to dest.
_mm_storeu_ps(pDest, _mm_add_ps(
_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1,0,3,2)), // s2_1 s2_0 s1_3 s1_2
_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3,2,1,0)) // s2_3 s2_2 s1_1 s1_0
));
}
// Ideas for further improvement:
// 1. If it could be guaranteed that 'source' were always aligned to 16-byte
// boundary, a faster aligned '_mm_load_ps' instruction could be used.
// 2. If it could be guaranteed that 'dest' were always aligned to 16-byte
// boundary, a faster '_mm_store_ps' instruction could be used.
return (uint)count;
/* original routine in C-language. please notice the C-version has differently
organized coefficients though.
double suml1, suml2;
double sumr1, sumr2;
uint i, j;
for (j = 0; j < count; j += 2)
{
const float *ptr;
const float *pFil;
suml1 = sumr1 = 0.0;
suml2 = sumr2 = 0.0;
ptr = src;
pFil = filterCoeffs;
for (i = 0; i < lengthLocal; i ++)
{
// unroll loop for efficiency.
suml1 += ptr[0] * pFil[0] +
ptr[2] * pFil[2] +
ptr[4] * pFil[4] +
ptr[6] * pFil[6];
sumr1 += ptr[1] * pFil[1] +
ptr[3] * pFil[3] +
ptr[5] * pFil[5] +
ptr[7] * pFil[7];
suml2 += ptr[8] * pFil[0] +
ptr[10] * pFil[2] +
ptr[12] * pFil[4] +
ptr[14] * pFil[6];
sumr2 += ptr[9] * pFil[1] +
ptr[11] * pFil[3] +
ptr[13] * pFil[5] +
ptr[15] * pFil[7];
ptr += 16;
pFil += 8;
}
dest[0] = (float)suml1;
dest[1] = (float)sumr1;
dest[2] = (float)suml2;
dest[3] = (float)sumr2;
src += 4;
dest += 4;
}
*/
}
#endif // SOUNDTOUCH_ALLOW_SSE