diff options
Diffstat (limited to 'media/libsoundtouch/src/mmx_optimized.cpp')
-rw-r--r-- | media/libsoundtouch/src/mmx_optimized.cpp | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/media/libsoundtouch/src/mmx_optimized.cpp b/media/libsoundtouch/src/mmx_optimized.cpp index 9062026399..0bc7fe86f7 100644 --- a/media/libsoundtouch/src/mmx_optimized.cpp +++ b/media/libsoundtouch/src/mmx_optimized.cpp @@ -20,13 +20,6 @@ /// //////////////////////////////////////////////////////////////////////////////// // -// Last changed : $Date: 2015-02-22 15:10:38 +0000 (Sun, 22 Feb 2015) $ -// File revision : $Revision: 4 $ -// -// $Id: mmx_optimized.cpp 206 2015-02-22 15:10:38Z oparviai $ -// -//////////////////////////////////////////////////////////////////////////////// -// // License : // // SoundTouch audio processing library @@ -68,7 +61,7 @@ using namespace soundtouch; // Calculates cross correlation of two buffers -double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) const +double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) { const __m64 *pVec1, *pVec2; __m64 shifter; @@ -79,7 +72,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d pVec1 = (__m64*)pV1; pVec2 = (__m64*)pV2; - shifter = _m_from_int(overlapDividerBits); + shifter = _m_from_int(overlapDividerBitsNorm); normaccu = accu = _mm_setzero_si64(); // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples @@ -123,6 +116,16 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d // Clear MMS state _m_empty(); + if (norm > (long)maxnorm) + { + // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode + #pragma omp critical + if (norm > (long)maxnorm) + { + maxnorm = norm; + } + } + // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation dnorm = (double)norm; @@ -134,7 +137,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value -double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) const +double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) { const __m64 *pVec1, *pVec2; __m64 shifter; @@ -146,13 +149,13 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, lnorm = 0; for (i = 1; i <= channels; i ++) { - lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBits; + lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBitsNorm; } pVec1 = (__m64*)pV1; pVec2 = (__m64*)pV2; - shifter = _m_from_int(overlapDividerBits); + shifter = _m_from_int(overlapDividerBitsNorm); accu = _mm_setzero_si64(); // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples @@ -191,10 +194,15 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, pV1 = (short *)pVec1; for (int j = 1; j <= channels; j ++) { - lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBits; + lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBitsNorm; } dnorm += (double)lnorm; + if (lnorm > (long)maxnorm) + { + maxnorm = lnorm; + } + // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm); @@ -209,7 +217,6 @@ void TDStretchMMX::clearCrossCorrState() } - // MMX-optimized version of the function overlapStereo void TDStretchMMX::overlapStereo(short *output, const short *input) const { @@ -233,7 +240,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in // overlapDividerBits calculation earlier. - shifter = _m_from_int(overlapDividerBits + 1); + shifter = _m_from_int(overlapDividerBitsPure + 1); for (i = 0; i < overlapLength / 4; i ++) { @@ -325,7 +332,6 @@ void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uRe } - // mmx-optimized version of the filter routine for stereo sound uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const { @@ -382,4 +388,9 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numS return (numSamples & 0xfffffffe) - length; } +#else + +// workaround to not complain about empty module +bool _dontcomplain_mmx_empty; + #endif // SOUNDTOUCH_ALLOW_MMX |