summaryrefslogtreecommitdiff
path: root/media/libaom/src/av1/common/x86/reconinter_avx2.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/libaom/src/av1/common/x86/reconinter_avx2.c')
-rw-r--r--media/libaom/src/av1/common/x86/reconinter_avx2.c48
1 files changed, 24 insertions, 24 deletions
diff --git a/media/libaom/src/av1/common/x86/reconinter_avx2.c b/media/libaom/src/av1/common/x86/reconinter_avx2.c
index f645e0454..a38bd8317 100644
--- a/media/libaom/src/av1/common/x86/reconinter_avx2.c
+++ b/media/libaom/src/av1/common/x86/reconinter_avx2.c
@@ -28,8 +28,8 @@ static INLINE __m256i calc_mask_avx2(const __m256i mask_base, const __m256i s0,
}
void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
DIFFWTD_MASK_TYPE mask_type,
- const uint8_t *src0, int stride0,
- const uint8_t *src1, int stride1,
+ const uint8_t *src0, int src0_stride,
+ const uint8_t *src1, int src1_stride,
int h, int w) {
const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0;
const __m256i y_mask_base = _mm256_set1_epi16(38 - mb);
@@ -37,18 +37,18 @@ void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
if (4 == w) {
do {
const __m128i s0A = xx_loadl_32(src0);
- const __m128i s0B = xx_loadl_32(src0 + stride0);
- const __m128i s0C = xx_loadl_32(src0 + stride0 * 2);
- const __m128i s0D = xx_loadl_32(src0 + stride0 * 3);
+ const __m128i s0B = xx_loadl_32(src0 + src0_stride);
+ const __m128i s0C = xx_loadl_32(src0 + src0_stride * 2);
+ const __m128i s0D = xx_loadl_32(src0 + src0_stride * 3);
const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B);
const __m128i s0CD = _mm_unpacklo_epi32(s0C, s0D);
const __m128i s0ABCD = _mm_unpacklo_epi64(s0AB, s0CD);
const __m256i s0ABCD_w = _mm256_cvtepu8_epi16(s0ABCD);
const __m128i s1A = xx_loadl_32(src1);
- const __m128i s1B = xx_loadl_32(src1 + stride1);
- const __m128i s1C = xx_loadl_32(src1 + stride1 * 2);
- const __m128i s1D = xx_loadl_32(src1 + stride1 * 3);
+ const __m128i s1B = xx_loadl_32(src1 + src1_stride);
+ const __m128i s1C = xx_loadl_32(src1 + src1_stride * 2);
+ const __m128i s1D = xx_loadl_32(src1 + src1_stride * 3);
const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B);
const __m128i s1CD = _mm_unpacklo_epi32(s1C, s1D);
const __m128i s1ABCD = _mm_unpacklo_epi64(s1AB, s1CD);
@@ -58,40 +58,40 @@ void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
const __m128i x_m8 =
_mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8));
xx_storeu_128(mask, x_m8);
- src0 += (stride0 << 2);
- src1 += (stride1 << 2);
+ src0 += (src0_stride << 2);
+ src1 += (src1_stride << 2);
mask += 16;
i += 4;
} while (i < h);
} else if (8 == w) {
do {
const __m128i s0A = xx_loadl_64(src0);
- const __m128i s0B = xx_loadl_64(src0 + stride0);
- const __m128i s0C = xx_loadl_64(src0 + stride0 * 2);
- const __m128i s0D = xx_loadl_64(src0 + stride0 * 3);
+ const __m128i s0B = xx_loadl_64(src0 + src0_stride);
+ const __m128i s0C = xx_loadl_64(src0 + src0_stride * 2);
+ const __m128i s0D = xx_loadl_64(src0 + src0_stride * 3);
const __m256i s0AC_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0A, s0C));
const __m256i s0BD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0B, s0D));
const __m128i s1A = xx_loadl_64(src1);
- const __m128i s1B = xx_loadl_64(src1 + stride1);
- const __m128i s1C = xx_loadl_64(src1 + stride1 * 2);
- const __m128i s1D = xx_loadl_64(src1 + stride1 * 3);
+ const __m128i s1B = xx_loadl_64(src1 + src1_stride);
+ const __m128i s1C = xx_loadl_64(src1 + src1_stride * 2);
+ const __m128i s1D = xx_loadl_64(src1 + src1_stride * 3);
const __m256i s1AB_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1A, s1C));
const __m256i s1CD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1B, s1D));
const __m256i m16AC = calc_mask_avx2(y_mask_base, s0AC_w, s1AB_w);
const __m256i m16BD = calc_mask_avx2(y_mask_base, s0BD_w, s1CD_w);
const __m256i m8 = _mm256_packus_epi16(m16AC, m16BD);
yy_storeu_256(mask, m8);
- src0 += stride0 << 2;
- src1 += stride1 << 2;
+ src0 += src0_stride << 2;
+ src1 += src1_stride << 2;
mask += 32;
i += 4;
} while (i < h);
} else if (16 == w) {
do {
const __m128i s0A = xx_load_128(src0);
- const __m128i s0B = xx_load_128(src0 + stride0);
+ const __m128i s0B = xx_load_128(src0 + src0_stride);
const __m128i s1A = xx_load_128(src1);
- const __m128i s1B = xx_load_128(src1 + stride1);
+ const __m128i s1B = xx_load_128(src1 + src1_stride);
const __m256i s0AL = _mm256_cvtepu8_epi16(s0A);
const __m256i s0BL = _mm256_cvtepu8_epi16(s0B);
const __m256i s1AL = _mm256_cvtepu8_epi16(s1A);
@@ -103,8 +103,8 @@ void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
const __m256i m8 =
_mm256_permute4x64_epi64(_mm256_packus_epi16(m16AL, m16BL), 0xd8);
yy_storeu_256(mask, m8);
- src0 += stride0 << 1;
- src1 += stride1 << 1;
+ src0 += src0_stride << 1;
+ src1 += src1_stride << 1;
mask += 32;
i += 2;
} while (i < h);
@@ -127,8 +127,8 @@ void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
yy_storeu_256(mask + j, m8);
j += 32;
} while (j < w);
- src0 += stride0;
- src1 += stride1;
+ src0 += src0_stride;
+ src1 += src1_stride;
mask += w;
i += 1;
} while (i < h);