summaryrefslogtreecommitdiff
path: root/media/libyuv/source/compare_win.cc
diff options
context:
space:
mode:
Diffstat (limited to 'media/libyuv/source/compare_win.cc')
-rw-r--r--media/libyuv/source/compare_win.cc131
1 files changed, 75 insertions, 56 deletions
diff --git a/media/libyuv/source/compare_win.cc b/media/libyuv/source/compare_win.cc
index dc86fe25b1..9bb27f1dd1 100644
--- a/media/libyuv/source/compare_win.cc
+++ b/media/libyuv/source/compare_win.cc
@@ -13,20 +13,40 @@
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
+#if defined(_MSC_VER)
+#include <intrin.h> // For __popcnt
+#endif
+
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
-// This module is for 32 bit Visual C x86 and clangcl
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+// This module is for 32 bit Visual C x86
+#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
+ !defined(__clang__) && defined(_M_IX86)
+
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ int i;
+ for (i = 0; i < count - 3; i += 4) {
+ uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
+ src_a += 4;
+ src_b += 4;
+ diff += __popcnt(x);
+ }
+ return diff;
+}
-__declspec(naked)
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+__declspec(naked) uint32_t
+ SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
- mov eax, [esp + 4] // src_a
- mov edx, [esp + 8] // src_b
- mov ecx, [esp + 12] // count
+ mov eax, [esp + 4] // src_a
+ mov edx, [esp + 8] // src_b
+ mov ecx, [esp + 12] // count
pxor xmm0, xmm0
pxor xmm5, xmm5
@@ -58,16 +78,15 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
}
}
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
+#ifdef HAS_SUMSQUAREERROR_AVX2
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
-#pragma warning(disable: 4752)
-__declspec(naked)
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
+#pragma warning(disable : 4752)
+__declspec(naked) uint32_t
+ SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
- mov eax, [esp + 4] // src_a
- mov edx, [esp + 8] // src_b
- mov ecx, [esp + 12] // count
+ mov eax, [esp + 4] // src_a
+ mov edx, [esp + 8] // src_b
+ mov ecx, [esp + 12] // count
vpxor ymm0, ymm0, ymm0 // sum
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
@@ -99,67 +118,67 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
ret
}
}
-#endif // _MSC_VER >= 1700
+#endif // HAS_SUMSQUAREERROR_AVX2
-uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
+uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
uvec32 kHashMul0 = {
- 0x0c3525e1, // 33 ^ 15
- 0xa3476dc1, // 33 ^ 14
- 0x3b4039a1, // 33 ^ 13
- 0x4f5f0981, // 33 ^ 12
+ 0x0c3525e1, // 33 ^ 15
+ 0xa3476dc1, // 33 ^ 14
+ 0x3b4039a1, // 33 ^ 13
+ 0x4f5f0981, // 33 ^ 12
};
uvec32 kHashMul1 = {
- 0x30f35d61, // 33 ^ 11
- 0x855cb541, // 33 ^ 10
- 0x040a9121, // 33 ^ 9
- 0x747c7101, // 33 ^ 8
+ 0x30f35d61, // 33 ^ 11
+ 0x855cb541, // 33 ^ 10
+ 0x040a9121, // 33 ^ 9
+ 0x747c7101, // 33 ^ 8
};
uvec32 kHashMul2 = {
- 0xec41d4e1, // 33 ^ 7
- 0x4cfa3cc1, // 33 ^ 6
- 0x025528a1, // 33 ^ 5
- 0x00121881, // 33 ^ 4
+ 0xec41d4e1, // 33 ^ 7
+ 0x4cfa3cc1, // 33 ^ 6
+ 0x025528a1, // 33 ^ 5
+ 0x00121881, // 33 ^ 4
};
uvec32 kHashMul3 = {
- 0x00008c61, // 33 ^ 3
- 0x00000441, // 33 ^ 2
- 0x00000021, // 33 ^ 1
- 0x00000001, // 33 ^ 0
+ 0x00008c61, // 33 ^ 3
+ 0x00000441, // 33 ^ 2
+ 0x00000021, // 33 ^ 1
+ 0x00000001, // 33 ^ 0
};
-__declspec(naked)
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+__declspec(naked) uint32_t
+ HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
__asm {
- mov eax, [esp + 4] // src
- mov ecx, [esp + 8] // count
+ mov eax, [esp + 4] // src
+ mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
- pxor xmm7, xmm7 // constant 0 for unpck
+ pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, xmmword ptr kHash16x33
wloop:
- movdqu xmm1, [eax] // src[0-15]
+ movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
- pmulld xmm0, xmm6 // hash *= 33 ^ 16
+ pmulld xmm0, xmm6 // hash *= 33 ^ 16
movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1
- punpcklbw xmm2, xmm7 // src[0-7]
+ punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
- punpcklwd xmm3, xmm7 // src[0-3]
+ punpcklwd xmm3, xmm7 // src[0-3]
pmulld xmm3, xmm5
movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2
- punpckhwd xmm4, xmm7 // src[4-7]
+ punpckhwd xmm4, xmm7 // src[4-7]
pmulld xmm4, xmm5
movdqa xmm5, xmmword ptr kHashMul2
- punpckhbw xmm1, xmm7 // src[8-15]
+ punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
- punpcklwd xmm2, xmm7 // src[8-11]
+ punpcklwd xmm2, xmm7 // src[8-11]
pmulld xmm2, xmm5
movdqa xmm5, xmmword ptr kHashMul3
- punpckhwd xmm1, xmm7 // src[12-15]
+ punpckhwd xmm1, xmm7 // src[12-15]
pmulld xmm1, xmm5
- paddd xmm3, xmm4 // add 16 results
+ paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
paddd xmm1, xmm3
@@ -171,18 +190,18 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
sub ecx, 16
jg wloop
- movd eax, xmm0 // return hash
+ movd eax, xmm0 // return hash
ret
}
}
// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-__declspec(naked)
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
+#ifdef HAS_HASHDJB2_AVX2
+__declspec(naked) uint32_t
+ HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
__asm {
- mov eax, [esp + 4] // src
- mov ecx, [esp + 8] // count
+ mov eax, [esp + 4] // src
+ mov ecx, [esp + 8] // count
vmovd xmm0, [esp + 12] // seed
wloop:
@@ -196,7 +215,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16]
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
- vpaddd xmm3, xmm3, xmm4 // add 16 results
+ vpaddd xmm3, xmm3, xmm4 // add 16 results
vpaddd xmm1, xmm1, xmm2
vpaddd xmm1, xmm1, xmm3
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
@@ -207,12 +226,12 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
sub ecx, 16
jg wloop
- vmovd eax, xmm0 // return hash
+ vmovd eax, xmm0 // return hash
vzeroupper
ret
}
}
-#endif // _MSC_VER >= 1700
+#endif // HAS_HASHDJB2_AVX2
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)