diff options
Diffstat (limited to 'media/libsoundtouch/src/FIRFilter.cpp')
-rw-r--r-- | media/libsoundtouch/src/FIRFilter.cpp | 103 |
1 files changed, 52 insertions, 51 deletions
diff --git a/media/libsoundtouch/src/FIRFilter.cpp b/media/libsoundtouch/src/FIRFilter.cpp index dc7c4aa0ba..201395bd51 100644 --- a/media/libsoundtouch/src/FIRFilter.cpp +++ b/media/libsoundtouch/src/FIRFilter.cpp @@ -2,22 +2,21 @@ /// /// General FIR digital filter routines with MMX optimization. /// -/// Note : MMX optimized functions reside in a separate, platform-specific file, +/// Notes : MMX optimized functions reside in a separate, platform-specific file, /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' /// +/// This source file contains OpenMP optimizations that allow speeding up the +/// corss-correlation algorithm by executing it in several threads / CPU cores +/// in parallel. See the following article link for more detailed discussion +/// about SoundTouch OpenMP optimizations: +/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices +/// /// Author : Copyright (c) Olli Parviainen /// Author e-mail : oparviai 'at' iki.fi /// SoundTouch WWW: http://www.surina.net/soundtouch /// //////////////////////////////////////////////////////////////////////////////// // -// Last changed : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $ -// File revision : $Revision: 4 $ -// -// $Id: FIRFilter.cpp 202 2015-02-21 21:24:29Z oparviai $ -// -//////////////////////////////////////////////////////////////////////////////// -// // License : // // SoundTouch audio processing library @@ -61,14 +60,17 @@ FIRFilter::FIRFilter() length = 0; lengthDiv8 = 0; filterCoeffs = NULL; + filterCoeffsStereo = NULL; } FIRFilter::~FIRFilter() { delete[] filterCoeffs; + delete[] filterCoeffsStereo; } + // Usual C-version of the filter routine for stereo sound uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const { @@ -78,35 +80,26 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui // because division is much slower operation than multiplying. double dScaler = 1.0 / (double)resultDivider; #endif + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = length & -8; - assert(length != 0); - assert(src != NULL); - assert(dest != NULL); - assert(filterCoeffs != NULL); + assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL)); - end = 2 * (numSamples - length); + end = 2 * (numSamples - ilength); #pragma omp parallel for for (j = 0; j < end; j += 2) { const SAMPLETYPE *ptr; LONG_SAMPLETYPE suml, sumr; - uint i; suml = sumr = 0; ptr = src + j; - for (i = 0; i < length; i += 4) + for (int i = 0; i < ilength; i ++) { - // loop is unrolled by factor of 4 here for efficiency - suml += ptr[2 * i + 0] * filterCoeffs[i + 0] + - ptr[2 * i + 2] * filterCoeffs[i + 1] + - ptr[2 * i + 4] * filterCoeffs[i + 2] + - ptr[2 * i + 6] * filterCoeffs[i + 3]; - sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] + - ptr[2 * i + 3] * filterCoeffs[i + 1] + - ptr[2 * i + 5] * filterCoeffs[i + 2] + - ptr[2 * i + 7] * filterCoeffs[i + 3]; + suml += ptr[2 * i] * filterCoeffsStereo[2 * i]; + sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1]; } #ifdef SOUNDTOUCH_INTEGER_SAMPLES @@ -116,19 +109,14 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml; // saturate to 16 bit integer limits sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr; -#else - suml *= dScaler; - sumr *= dScaler; #endif // SOUNDTOUCH_INTEGER_SAMPLES dest[j] = (SAMPLETYPE)suml; dest[j + 1] = (SAMPLETYPE)sumr; } - return numSamples - length; + return numSamples - ilength; } - - // Usual C-version of the filter routine for mono sound uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const { @@ -139,31 +127,28 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint double dScaler = 1.0 / (double)resultDivider; #endif - assert(length != 0); + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = length & -8; - end = numSamples - length; + assert(ilength != 0); + + end = numSamples - ilength; #pragma omp parallel for - for (j = 0; j < end; j ++) + for (j = 0; j < end; j ++) { const SAMPLETYPE *pSrc = src + j; LONG_SAMPLETYPE sum; - uint i; + int i; sum = 0; - for (i = 0; i < length; i += 4) + for (i = 0; i < ilength; i ++) { - // loop is unrolled by factor of 4 here for efficiency - sum += pSrc[i + 0] * filterCoeffs[i + 0] + - pSrc[i + 1] * filterCoeffs[i + 1] + - pSrc[i + 2] * filterCoeffs[i + 2] + - pSrc[i + 3] * filterCoeffs[i + 3]; + sum += pSrc[i] * filterCoeffs[i]; } #ifdef SOUNDTOUCH_INTEGER_SAMPLES sum >>= resultDivFactor; // saturate to 16 bit integer limits sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum; -#else - sum *= dScaler; #endif // SOUNDTOUCH_INTEGER_SAMPLES dest[j] = (SAMPLETYPE)sum; } @@ -187,14 +172,18 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin assert(filterCoeffs != NULL); assert(numChannels < 16); - end = numChannels * (numSamples - length); + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = length & -8; + + end = numChannels * (numSamples - ilength); #pragma omp parallel for for (j = 0; j < end; j += numChannels) { const SAMPLETYPE *ptr; LONG_SAMPLETYPE sums[16]; - uint c, i; + uint c; + int i; for (c = 0; c < numChannels; c ++) { @@ -203,7 +192,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin ptr = src + j; - for (i = 0; i < length; i ++) + for (i = 0; i < ilength; i ++) { SAMPLETYPE coef=filterCoeffs[i]; for (c = 0; c < numChannels; c ++) @@ -217,13 +206,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin { #ifdef SOUNDTOUCH_INTEGER_SAMPLES sums[c] >>= resultDivFactor; -#else - sums[c] *= dScaler; #endif // SOUNDTOUCH_INTEGER_SAMPLES dest[j+c] = (SAMPLETYPE)sums[c]; } } - return numSamples - length; + return numSamples - ilength; } @@ -235,6 +222,13 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u assert(newLength > 0); if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8"); + #ifdef SOUNDTOUCH_FLOAT_SAMPLES + // scale coefficients already here if using floating samples + double scale = 1.0 / resultDivider; + #else + short scale = 1; + #endif + lengthDiv8 = newLength / 8; length = lengthDiv8 * 8; assert(length == newLength); @@ -244,7 +238,16 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u delete[] filterCoeffs; filterCoeffs = new SAMPLETYPE[length]; - memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE)); + delete[] filterCoeffsStereo; + filterCoeffsStereo = new SAMPLETYPE[length*2]; + for (uint i = 0; i < length; i ++) + { + filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale); + // create also stereo set of filter coefficients: this allows compiler + // to autovectorize filter evaluation much more efficiently + filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale); + filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale); + } } @@ -254,7 +257,6 @@ uint FIRFilter::getLength() const } - // Applies the filter to the given sequence of samples. // // Note : The amount of outputted samples is by value of 'filter_length' @@ -284,7 +286,6 @@ uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSample } - // Operator 'new' is overloaded so that it automatically creates a suitable instance // depending on if we've a MMX-capable CPU available or not. void * FIRFilter::operator new(size_t s) |