1 files changed, 27 insertions, 16 deletions
diff --git a/media/libsoundtouch/src/mmx_optimized.cpp b/media/libsoundtouch/src/mmx_optimized.cpp
index 9062026399..0bc7fe86f7 100644
--- a/media/libsoundtouch/src/mmx_optimized.cpp
+++ b/media/libsoundtouch/src/mmx_optimized.cpp
@@ -20,13 +20,6 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2015-02-22 15:10:38 +0000 (Sun, 22 Feb 2015) $
-// File revision : $Revision: 4 $
-//
-// $Id: mmx_optimized.cpp 206 2015-02-22 15:10:38Z oparviai $
-//
-////////////////////////////////////////////////////////////////////////////////
-//
 // License :
 //
 //  SoundTouch audio processing library
@@ -68,7 +61,7 @@ using namespace soundtouch;
 
 
 // Calculates cross correlation of two buffers
-double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) const
+double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm)
 {
     const __m64 *pVec1, *pVec2;
     __m64 shifter;
@@ -79,7 +72,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
     pVec1 = (__m64*)pV1;
     pVec2 = (__m64*)pV2;
 
-    shifter = _m_from_int(overlapDividerBits);
+    shifter = _m_from_int(overlapDividerBitsNorm);
     normaccu = accu = _mm_setzero_si64();
 
     // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples 
@@ -123,6 +116,16 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
     // Clear MMS state
     _m_empty();
 
+    if (norm > (long)maxnorm)
+    {
+        // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
+        #pragma omp critical
+        if (norm > (long)maxnorm)
+        {
+            maxnorm = norm;
+        }
+    }
+
     // Normalize result by dividing by sqrt(norm) - this step is easiest 
     // done using floating point operation
     dnorm = (double)norm;
@@ -134,7 +137,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
 
 
 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
-double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) const
+double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm)
 {
     const __m64 *pVec1, *pVec2;
     __m64 shifter;
@@ -146,13 +149,13 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2,
     lnorm = 0;
     for (i = 1; i <= channels; i ++)
     {
-        lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBits;
+        lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBitsNorm;
     }
 
     pVec1 = (__m64*)pV1;
     pVec2 = (__m64*)pV2;
 
-    shifter = _m_from_int(overlapDividerBits);
+    shifter = _m_from_int(overlapDividerBitsNorm);
     accu = _mm_setzero_si64();
 
     // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples 
@@ -191,10 +194,15 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2,
     pV1 = (short *)pVec1;
     for (int j = 1; j <= channels; j ++)
     {
-        lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBits;
+        lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBitsNorm;
     }
     dnorm += (double)lnorm;
 
+    if (lnorm > (long)maxnorm)
+    {
+        maxnorm = lnorm;
+    }
+
     // Normalize result by dividing by sqrt(norm) - this step is easiest 
     // done using floating point operation
     return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm);
@@ -209,7 +217,6 @@ void TDStretchMMX::clearCrossCorrState()
 }
 
 
-
 // MMX-optimized version of the function overlapStereo
 void TDStretchMMX::overlapStereo(short *output, const short *input) const
 {
@@ -233,7 +240,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
 
     // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
     // overlapDividerBits calculation earlier.
-    shifter = _m_from_int(overlapDividerBits + 1);
+    shifter = _m_from_int(overlapDividerBitsPure + 1);
 
     for (i = 0; i < overlapLength / 4; i ++)
     {
@@ -325,7 +332,6 @@ void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uRe
 }
 
 
-
 // mmx-optimized version of the filter routine for stereo sound
 uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const
 {
@@ -382,4 +388,9 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numS
     return (numSamples & 0xfffffffe) - length;
 }
 
+#else
+
+// workaround to not complain about empty module
+bool _dontcomplain_mmx_empty;
+
 #endif  // SOUNDTOUCH_ALLOW_MMX