diff options
author | Matt A. Tobin <email@mattatobin.com> | 2020-04-07 23:30:51 -0400 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2020-04-07 23:30:51 -0400 |
commit | 5545a8983ff0ef1fb52e64aef8e66fa9b13c1cbb (patch) | |
tree | 45d55e3e5e73c4255c4d71258d9be5b2d004d28f /third_party/aom/aom_dsp/x86/subtract_avx2.c | |
parent | 50f1986697a7412e4160976fa5e11217b4ef1f44 (diff) | |
download | uxp-5545a8983ff0ef1fb52e64aef8e66fa9b13c1cbb.tar.gz |
Move aom source to a sub-directory under media/libaom
There is no damned reason to treat this differently than any other media lib given its license and there never was.
Diffstat (limited to 'third_party/aom/aom_dsp/x86/subtract_avx2.c')
-rw-r--r-- | third_party/aom/aom_dsp/x86/subtract_avx2.c | 108 |
1 files changed, 0 insertions, 108 deletions
diff --git a/third_party/aom/aom_dsp/x86/subtract_avx2.c b/third_party/aom/aom_dsp/x86/subtract_avx2.c deleted file mode 100644 index 4389d123db..0000000000 --- a/third_party/aom/aom_dsp/x86/subtract_avx2.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <immintrin.h> - -#include "config/aom_dsp_rtcd.h" - -static INLINE void subtract32_avx2(int16_t *diff_ptr, const uint8_t *src_ptr, - const uint8_t *pred_ptr) { - __m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr)); - __m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr)); - __m256i s_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s)); - __m256i s_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s, 1)); - __m256i p_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(p)); - __m256i p_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(p, 1)); - const __m256i d_0 = _mm256_sub_epi16(s_0, p_0); - const __m256i d_1 = _mm256_sub_epi16(s_1, p_1); - _mm256_store_si256((__m256i *)(diff_ptr), d_0); - _mm256_store_si256((__m256i *)(diff_ptr + 16), d_1); -} - -static INLINE void aom_subtract_block_16xn_avx2( - int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, - ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { - for (int32_t j = 0; j < rows; ++j) { - __m128i s = _mm_lddqu_si128((__m128i *)(src_ptr)); - __m128i p = _mm_lddqu_si128((__m128i *)(pred_ptr)); - __m256i s_0 = _mm256_cvtepu8_epi16(s); - __m256i p_0 = _mm256_cvtepu8_epi16(p); - const __m256i d_0 = _mm256_sub_epi16(s_0, p_0); - _mm256_store_si256((__m256i *)(diff_ptr), d_0); - src_ptr += src_stride; - pred_ptr += pred_stride; - diff_ptr += diff_stride; - } -} - -static INLINE void aom_subtract_block_32xn_avx2( - int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, - ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { - for (int32_t j = 0; j < rows; ++j) { - subtract32_avx2(diff_ptr, src_ptr, pred_ptr); - src_ptr += src_stride; - pred_ptr += pred_stride; - diff_ptr += diff_stride; - } -} - -static INLINE void aom_subtract_block_64xn_avx2( - int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, - ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { - for (int32_t j = 0; j < rows; ++j) { - subtract32_avx2(diff_ptr, src_ptr, pred_ptr); - subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32); - src_ptr += src_stride; - pred_ptr += pred_stride; - diff_ptr += diff_stride; - } -} - -static INLINE void aom_subtract_block_128xn_avx2( - int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, - ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { - for (int32_t j = 0; j < rows; ++j) { - subtract32_avx2(diff_ptr, src_ptr, pred_ptr); - subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32); - subtract32_avx2(diff_ptr + 64, src_ptr + 64, pred_ptr + 64); - subtract32_avx2(diff_ptr + 96, src_ptr + 96, pred_ptr + 96); - src_ptr += src_stride; - pred_ptr += pred_stride; - diff_ptr += diff_stride; - } -} - -void aom_subtract_block_avx2(int rows, int cols, int16_t *diff_ptr, - ptrdiff_t diff_stride, const uint8_t *src_ptr, - ptrdiff_t src_stride, const uint8_t *pred_ptr, - ptrdiff_t pred_stride) { - switch (cols) { - case 16: - aom_subtract_block_16xn_avx2(rows, diff_ptr, diff_stride, src_ptr, - src_stride, pred_ptr, pred_stride); - break; - case 32: - aom_subtract_block_32xn_avx2(rows, diff_ptr, diff_stride, src_ptr, - src_stride, pred_ptr, pred_stride); - break; - case 64: - aom_subtract_block_64xn_avx2(rows, diff_ptr, diff_stride, src_ptr, - src_stride, pred_ptr, pred_stride); - break; - case 128: - aom_subtract_block_128xn_avx2(rows, diff_ptr, diff_stride, src_ptr, - src_stride, pred_ptr, pred_stride); - break; - default: - aom_subtract_block_sse2(rows, cols, diff_ptr, diff_stride, src_ptr, - src_stride, pred_ptr, pred_stride); - break; - } -} |