1 files changed, 52 insertions, 51 deletions
diff --git a/media/libsoundtouch/src/FIRFilter.cpp b/media/libsoundtouch/src/FIRFilter.cpp
index dc7c4aa0ba..201395bd51 100644
--- a/media/libsoundtouch/src/FIRFilter.cpp
+++ b/media/libsoundtouch/src/FIRFilter.cpp
@@ -2,22 +2,21 @@
 ///
 /// General FIR digital filter routines with MMX optimization. 
 ///
-/// Note : MMX optimized functions reside in a separate, platform-specific file, 
+/// Notes : MMX optimized functions reside in a separate, platform-specific file, 
 /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
 ///
+/// This source file contains OpenMP optimizations that allow speeding up the
+/// corss-correlation algorithm by executing it in several threads / CPU cores 
+/// in parallel. See the following article link for more detailed discussion 
+/// about SoundTouch OpenMP optimizations:
+/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
+///
 /// Author        : Copyright (c) Olli Parviainen
 /// Author e-mail : oparviai 'at' iki.fi
 /// SoundTouch WWW: http://www.surina.net/soundtouch
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $
-// File revision : $Revision: 4 $
-//
-// $Id: FIRFilter.cpp 202 2015-02-21 21:24:29Z oparviai $
-//
-////////////////////////////////////////////////////////////////////////////////
-//
 // License :
 //
 //  SoundTouch audio processing library
@@ -61,14 +60,17 @@ FIRFilter::FIRFilter()
     length = 0;
     lengthDiv8 = 0;
     filterCoeffs = NULL;
+    filterCoeffsStereo = NULL;
 }
 
 
 FIRFilter::~FIRFilter()
 {
     delete[] filterCoeffs;
+    delete[] filterCoeffsStereo;
 }
 
+
 // Usual C-version of the filter routine for stereo sound
 uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
 {
@@ -78,35 +80,26 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
     // because division is much slower operation than multiplying.
     double dScaler = 1.0 / (double)resultDivider;
 #endif
+    // hint compiler autovectorization that loop length is divisible by 8
+    int ilength = length & -8;
 
-    assert(length != 0);
-    assert(src != NULL);
-    assert(dest != NULL);
-    assert(filterCoeffs != NULL);
+    assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL));
 
-    end = 2 * (numSamples - length);
+    end = 2 * (numSamples - ilength);
 
     #pragma omp parallel for
     for (j = 0; j < end; j += 2) 
     {
         const SAMPLETYPE *ptr;
         LONG_SAMPLETYPE suml, sumr;
-        uint i;
 
         suml = sumr = 0;
         ptr = src + j;
 
-        for (i = 0; i < length; i += 4) 
+        for (int i = 0; i < ilength; i ++)
         {
-            // loop is unrolled by factor of 4 here for efficiency
-            suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
-                    ptr[2 * i + 2] * filterCoeffs[i + 1] +
-                    ptr[2 * i + 4] * filterCoeffs[i + 2] +
-                    ptr[2 * i + 6] * filterCoeffs[i + 3];
-            sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
-                    ptr[2 * i + 3] * filterCoeffs[i + 1] +
-                    ptr[2 * i + 5] * filterCoeffs[i + 2] +
-                    ptr[2 * i + 7] * filterCoeffs[i + 3];
+            suml += ptr[2 * i] * filterCoeffsStereo[2 * i];
+            sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1];
         }
 
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
@@ -116,19 +109,14 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
         suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
         // saturate to 16 bit integer limits
         sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
-#else
-        suml *= dScaler;
-        sumr *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
         dest[j] = (SAMPLETYPE)suml;
         dest[j + 1] = (SAMPLETYPE)sumr;
     }
-    return numSamples - length;
+    return numSamples - ilength;
 }
 
 
-
-
 // Usual C-version of the filter routine for mono sound
 uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
 {
@@ -139,31 +127,28 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
     double dScaler = 1.0 / (double)resultDivider;
 #endif
 
-    assert(length != 0);
+    // hint compiler autovectorization that loop length is divisible by 8
+    int ilength = length & -8;
 
-    end = numSamples - length;
+    assert(ilength != 0);
+
+    end = numSamples - ilength;
     #pragma omp parallel for
-    for (j = 0; j < end; j ++) 
+    for (j = 0; j < end; j ++)
     {
         const SAMPLETYPE *pSrc = src + j;
         LONG_SAMPLETYPE sum;
-        uint i;
+        int i;
 
         sum = 0;
-        for (i = 0; i < length; i += 4) 
+        for (i = 0; i < ilength; i ++)
         {
-            // loop is unrolled by factor of 4 here for efficiency
-            sum += pSrc[i + 0] * filterCoeffs[i + 0] + 
-                   pSrc[i + 1] * filterCoeffs[i + 1] + 
-                   pSrc[i + 2] * filterCoeffs[i + 2] + 
-                   pSrc[i + 3] * filterCoeffs[i + 3];
+            sum += pSrc[i] * filterCoeffs[i];
         }
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
         sum >>= resultDivFactor;
         // saturate to 16 bit integer limits
         sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
-#else
-        sum *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
         dest[j] = (SAMPLETYPE)sum;
     }
@@ -187,14 +172,18 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
     assert(filterCoeffs != NULL);
     assert(numChannels < 16);
 
-    end = numChannels * (numSamples - length);
+    // hint compiler autovectorization that loop length is divisible by 8
+    int ilength = length & -8;
+
+    end = numChannels * (numSamples - ilength);
 
     #pragma omp parallel for
     for (j = 0; j < end; j += numChannels)
     {
         const SAMPLETYPE *ptr;
         LONG_SAMPLETYPE sums[16];
-        uint c, i;
+        uint c;
+        int i;
 
         for (c = 0; c < numChannels; c ++)
         {
@@ -203,7 +192,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
 
         ptr = src + j;
 
-        for (i = 0; i < length; i ++)
+        for (i = 0; i < ilength; i ++)
         {
             SAMPLETYPE coef=filterCoeffs[i];
             for (c = 0; c < numChannels; c ++)
@@ -217,13 +206,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
         {
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
             sums[c] >>= resultDivFactor;
-#else
-            sums[c] *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
             dest[j+c] = (SAMPLETYPE)sums[c];
         }
     }
-    return numSamples - length;
+    return numSamples - ilength;
 }
 
 
@@ -235,6 +222,13 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
     assert(newLength > 0);
     if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
 
+    #ifdef SOUNDTOUCH_FLOAT_SAMPLES
+        // scale coefficients already here if using floating samples
+        double scale = 1.0 / resultDivider;
+    #else
+        short scale = 1;
+    #endif
+
     lengthDiv8 = newLength / 8;
     length = lengthDiv8 * 8;
     assert(length == newLength);
@@ -244,7 +238,16 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
 
     delete[] filterCoeffs;
     filterCoeffs = new SAMPLETYPE[length];
-    memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
+    delete[] filterCoeffsStereo;
+    filterCoeffsStereo = new SAMPLETYPE[length*2];
+    for (uint i = 0; i < length; i ++)
+    {
+        filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale);
+        // create also stereo set of filter coefficients: this allows compiler
+        // to autovectorize filter evaluation much more efficiently
+        filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale);
+        filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale);
+    }
 }
 
 
@@ -254,7 +257,6 @@ uint FIRFilter::getLength() const
 }
 
 
-
 // Applies the filter to the given sequence of samples. 
 //
 // Note : The amount of outputted samples is by value of 'filter_length' 
@@ -284,7 +286,6 @@ uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSample
 }
 
 
-
 // Operator 'new' is overloaded so that it automatically creates a suitable instance 
 // depending on if we've a MMX-capable CPU available or not.
 void * FIRFilter::operator new(size_t s)