diff options
Diffstat (limited to 'media/libopus/celt/x86/x86cpu.h')
-rw-r--r-- | media/libopus/celt/x86/x86cpu.h | 32 |
1 files changed, 2 insertions, 30 deletions
diff --git a/media/libopus/celt/x86/x86cpu.h b/media/libopus/celt/x86/x86cpu.h index 04fd48aac4..0de8df3556 100644 --- a/media/libopus/celt/x86/x86cpu.h +++ b/media/libopus/celt/x86/x86cpu.h @@ -56,38 +56,10 @@ int opus_select_arch(void); # endif -/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() - or _mm_cvtepi16_epi32() when optimizations are disabled, even though the - actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory - reference, these require 16-byte alignment and load a full 16 bytes (instead - of 4 or 8), possibly reading out of bounds. - - We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or - _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 - reference in the PMOVSXWD instruction itself, but gcc is not smart enough to - optimize this out when optimizations ARE enabled. - - Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 - (which is fair, since technically the compiler is always allowed to do the - dereference before invoking the function implementing the intrinsic). - However, it is smart enough to eliminate the extra MOVD instruction. - For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out - the extra MOVQ if it's specified explicitly */ - -# if defined(__clang__) || !defined(__OPTIMIZE__) -# define OP_CVTEPI8_EPI32_M32(x) \ +#define OP_CVTEPI8_EPI32_M32(x) \ (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) -# else -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(*(__m128i *)(x))) -#endif -# if !defined(__OPTIMIZE__) -# define OP_CVTEPI16_EPI32_M64(x) \ +#define OP_CVTEPI16_EPI32_M64(x) \ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) -# else -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(*(__m128i *)(x))) -# endif #endif |