summaryrefslogtreecommitdiff
path: root/media/libsoundtouch/src/mmx_optimized.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'media/libsoundtouch/src/mmx_optimized.cpp')
-rw-r--r--media/libsoundtouch/src/mmx_optimized.cpp43
1 files changed, 27 insertions, 16 deletions
diff --git a/media/libsoundtouch/src/mmx_optimized.cpp b/media/libsoundtouch/src/mmx_optimized.cpp
index 9062026399..0bc7fe86f7 100644
--- a/media/libsoundtouch/src/mmx_optimized.cpp
+++ b/media/libsoundtouch/src/mmx_optimized.cpp
@@ -20,13 +20,6 @@
///
////////////////////////////////////////////////////////////////////////////////
//
-// Last changed : $Date: 2015-02-22 15:10:38 +0000 (Sun, 22 Feb 2015) $
-// File revision : $Revision: 4 $
-//
-// $Id: mmx_optimized.cpp 206 2015-02-22 15:10:38Z oparviai $
-//
-////////////////////////////////////////////////////////////////////////////////
-//
// License :
//
// SoundTouch audio processing library
@@ -68,7 +61,7 @@ using namespace soundtouch;
// Calculates cross correlation of two buffers
-double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) const
+double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm)
{
const __m64 *pVec1, *pVec2;
__m64 shifter;
@@ -79,7 +72,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2;
- shifter = _m_from_int(overlapDividerBits);
+ shifter = _m_from_int(overlapDividerBitsNorm);
normaccu = accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
@@ -123,6 +116,16 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
// Clear MMS state
_m_empty();
+ if (norm > (long)maxnorm)
+ {
+ // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
+ #pragma omp critical
+ if (norm > (long)maxnorm)
+ {
+ maxnorm = norm;
+ }
+ }
+
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
dnorm = (double)norm;
@@ -134,7 +137,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
-double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) const
+double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm)
{
const __m64 *pVec1, *pVec2;
__m64 shifter;
@@ -146,13 +149,13 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2,
lnorm = 0;
for (i = 1; i <= channels; i ++)
{
- lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBits;
+ lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBitsNorm;
}
pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2;
- shifter = _m_from_int(overlapDividerBits);
+ shifter = _m_from_int(overlapDividerBitsNorm);
accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
@@ -191,10 +194,15 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2,
pV1 = (short *)pVec1;
for (int j = 1; j <= channels; j ++)
{
- lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBits;
+ lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBitsNorm;
}
dnorm += (double)lnorm;
+ if (lnorm > (long)maxnorm)
+ {
+ maxnorm = lnorm;
+ }
+
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm);
@@ -209,7 +217,6 @@ void TDStretchMMX::clearCrossCorrState()
}
-
// MMX-optimized version of the function overlapStereo
void TDStretchMMX::overlapStereo(short *output, const short *input) const
{
@@ -233,7 +240,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
// Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
// overlapDividerBits calculation earlier.
- shifter = _m_from_int(overlapDividerBits + 1);
+ shifter = _m_from_int(overlapDividerBitsPure + 1);
for (i = 0; i < overlapLength / 4; i ++)
{
@@ -325,7 +332,6 @@ void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uRe
}
-
// mmx-optimized version of the filter routine for stereo sound
uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const
{
@@ -382,4 +388,9 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numS
return (numSamples & 0xfffffffe) - length;
}
+#else
+
+// workaround to not complain about empty module
+bool _dontcomplain_mmx_empty;
+
#endif // SOUNDTOUCH_ALLOW_MMX