diff options
Diffstat (limited to 'media/ffvpx/libavcodec/x86/h264_intrapred.asm')
-rw-r--r-- | media/ffvpx/libavcodec/x86/h264_intrapred.asm | 102 |
1 files changed, 71 insertions, 31 deletions
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred.asm b/media/ffvpx/libavcodec/x86/h264_intrapred.asm index c88d91b49e..f3aa3172f0 100644 --- a/media/ffvpx/libavcodec/x86/h264_intrapred.asm +++ b/media/ffvpx/libavcodec/x86/h264_intrapred.asm @@ -49,7 +49,7 @@ cextern pw_17 cextern pw_32 ;----------------------------------------------------------------------------- -; void ff_pred16x16_vertical_8(uint8_t *src, int stride) +; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmx @@ -85,7 +85,7 @@ cglobal pred16x16_vertical_8, 2,3 REP_RET ;----------------------------------------------------------------------------- -; void ff_pred16x16_horizontal_8(uint8_t *src, int stride) +; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED16x16_H 0 @@ -126,7 +126,7 @@ INIT_XMM ssse3 PRED16x16_H ;----------------------------------------------------------------------------- -; void ff_pred16x16_dc_8(uint8_t *src, int stride) +; void ff_pred16x16_dc_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED16x16_DC 0 @@ -188,7 +188,7 @@ INIT_XMM ssse3 PRED16x16_DC ;----------------------------------------------------------------------------- -; void ff_pred16x16_tm_vp8_8(uint8_t *src, int stride) +; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED16x16_TM 0 @@ -268,8 +268,45 @@ cglobal pred16x16_tm_vp8_8, 2,6,6 jg .loop REP_RET +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration + sub dstq, strideq + pmovzxbw m0, [dstq] + vpbroadcastb xm1, [r0-1] + pmovzxbw m1, xm1 + psubw m0, m1 + mov iterationd, 4 + lea stride3q, [strideq*3] +.loop: + vpbroadcastb xm1, [dstq+strideq*1-1] + vpbroadcastb xm2, [dstq+strideq*2-1] + vpbroadcastb xm3, [dstq+stride3q-1] + vpbroadcastb xm4, [dstq+strideq*4-1] + pmovzxbw m1, xm1 + pmovzxbw m2, xm2 + pmovzxbw m3, xm3 + pmovzxbw m4, xm4 + paddw m1, m0 + paddw m2, m0 + paddw m3, m0 + paddw m4, m0 + vpackuswb m1, m1, m2 + vpackuswb m3, m3, m4 + vpermq m1, m1, q3120 + vpermq m3, m3, q3120 + movdqa [dstq+strideq*1], xm1 + vextracti128 [dstq+strideq*2], m1, 1 + movdqa [dstq+stride3q*1], xm3 + vextracti128 [dstq+strideq*4], m3, 1 + lea dstq, [dstq+strideq*4] + dec iterationd + jg .loop + REP_RET +%endif + ;----------------------------------------------------------------------------- -; void ff_pred16x16_plane_*_8(uint8_t *src, int stride) +; void ff_pred16x16_plane_*_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro H264_PRED16x16_PLANE 1 @@ -550,7 +587,7 @@ H264_PRED16x16_PLANE rv40 H264_PRED16x16_PLANE svq3 ;----------------------------------------------------------------------------- -; void ff_pred8x8_plane_8(uint8_t *src, int stride) +; void ff_pred8x8_plane_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro H264_PRED8x8_PLANE 0 @@ -724,7 +761,7 @@ INIT_XMM ssse3 H264_PRED8x8_PLANE ;----------------------------------------------------------------------------- -; void ff_pred8x8_vertical_8(uint8_t *src, int stride) +; void ff_pred8x8_vertical_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmx @@ -741,7 +778,7 @@ cglobal pred8x8_vertical_8, 2,2 RET ;----------------------------------------------------------------------------- -; void ff_pred8x8_horizontal_8(uint8_t *src, int stride) +; void ff_pred8x8_horizontal_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8_H 0 @@ -769,7 +806,7 @@ INIT_MMX ssse3 PRED8x8_H ;----------------------------------------------------------------------------- -; void ff_pred8x8_top_dc_8_mmxext(uint8_t *src, int stride) +; void ff_pred8x8_top_dc_8_mmxext(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext cglobal pred8x8_top_dc_8, 2,5 @@ -803,7 +840,7 @@ cglobal pred8x8_top_dc_8, 2,5 RET ;----------------------------------------------------------------------------- -; void ff_pred8x8_dc_8_mmxext(uint8_t *src, int stride) +; void ff_pred8x8_dc_8_mmxext(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -864,7 +901,7 @@ cglobal pred8x8_dc_8, 2,5 RET ;----------------------------------------------------------------------------- -; void ff_pred8x8_dc_rv40_8(uint8_t *src, int stride) +; void ff_pred8x8_dc_rv40_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -901,7 +938,7 @@ cglobal pred8x8_dc_rv40_8, 2,7 REP_RET ;----------------------------------------------------------------------------- -; void ff_pred8x8_tm_vp8_8(uint8_t *src, int stride) +; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8_TM 0 @@ -1014,7 +1051,7 @@ cglobal pred8x8_tm_vp8_8, 2,3,6 ;----------------------------------------------------------------------------- ; void ff_pred8x8l_top_dc_8(uint8_t *src, int has_topleft, int has_topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8L_TOP_DC 0 cglobal pred8x8l_top_dc_8, 4,4 @@ -1070,7 +1107,7 @@ PRED8x8L_TOP_DC ;----------------------------------------------------------------------------- ; void ff_pred8x8l_dc_8(uint8_t *src, int has_topleft, int has_topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8L_DC 0 @@ -1174,7 +1211,7 @@ PRED8x8L_DC ;----------------------------------------------------------------------------- ; void ff_pred8x8l_horizontal_8(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8L_HORIZONTAL 0 @@ -1246,7 +1283,7 @@ PRED8x8L_HORIZONTAL ;----------------------------------------------------------------------------- ; void ff_pred8x8l_vertical_8(uint8_t *src, int has_topleft, int has_topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8L_VERTICAL 0 @@ -1297,7 +1334,7 @@ PRED8x8L_VERTICAL ;----------------------------------------------------------------------------- ; void ff_pred8x8l_down_left_8(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -1498,7 +1535,7 @@ PRED8x8L_DOWN_LEFT ;----------------------------------------------------------------------------- ; void ff_pred8x8l_down_right_8_mmxext(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -1750,7 +1787,7 @@ PRED8x8L_DOWN_RIGHT ;----------------------------------------------------------------------------- ; void ff_pred8x8l_vertical_right_8(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -1978,7 +2015,7 @@ PRED8x8L_VERTICAL_RIGHT ;----------------------------------------------------------------------------- ; void ff_pred8x8l_vertical_left_8(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8L_VERTICAL_LEFT 0 @@ -2068,7 +2105,7 @@ PRED8x8L_VERTICAL_LEFT ;----------------------------------------------------------------------------- ; void ff_pred8x8l_horizontal_up_8(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED8x8L_HORIZONTAL_UP 0 @@ -2156,7 +2193,7 @@ PRED8x8L_HORIZONTAL_UP ;----------------------------------------------------------------------------- ; void ff_pred8x8l_horizontal_down_8(uint8_t *src, int has_topleft, -; int has_topright, int stride) +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -2404,7 +2441,8 @@ INIT_MMX ssse3 PRED8x8L_HORIZONTAL_DOWN ;------------------------------------------------------------------------------- -; void ff_pred4x4_dc_8_mmxext(uint8_t *src, const uint8_t *topright, int stride) +; void ff_pred4x4_dc_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) ;------------------------------------------------------------------------------- INIT_MMX mmxext @@ -2435,7 +2473,7 @@ cglobal pred4x4_dc_8, 3,5 ;----------------------------------------------------------------------------- ; void ff_pred4x4_tm_vp8_8_mmxext(uint8_t *src, const uint8_t *topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro PRED4x4_TM 0 @@ -2514,7 +2552,7 @@ cglobal pred4x4_tm_vp8_8, 3,3 ;----------------------------------------------------------------------------- ; void ff_pred4x4_vertical_vp8_8_mmxext(uint8_t *src, const uint8_t *topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -2535,7 +2573,7 @@ cglobal pred4x4_vertical_vp8_8, 3,3 ;----------------------------------------------------------------------------- ; void ff_pred4x4_down_left_8_mmxext(uint8_t *src, const uint8_t *topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext cglobal pred4x4_down_left_8, 3,3 @@ -2562,7 +2600,7 @@ cglobal pred4x4_down_left_8, 3,3 ;------------------------------------------------------------------------------ ; void ff_pred4x4_vertical_left_8_mmxext(uint8_t *src, const uint8_t *topright, -; int stride) +; ptrdiff_t stride) ;------------------------------------------------------------------------------ INIT_MMX mmxext @@ -2588,7 +2626,7 @@ cglobal pred4x4_vertical_left_8, 3,3 ;------------------------------------------------------------------------------ ; void ff_pred4x4_horizontal_up_8_mmxext(uint8_t *src, const uint8_t *topright, -; int stride) +; ptrdiff_t stride) ;------------------------------------------------------------------------------ INIT_MMX mmxext @@ -2622,7 +2660,8 @@ cglobal pred4x4_horizontal_up_8, 3,3 ;------------------------------------------------------------------------------ ; void ff_pred4x4_horizontal_down_8_mmxext(uint8_t *src, -; const uint8_t *topright, int stride) +; const uint8_t *topright, +; ptrdiff_t stride) ;------------------------------------------------------------------------------ INIT_MMX mmxext @@ -2658,7 +2697,8 @@ cglobal pred4x4_horizontal_down_8, 3,3 ;----------------------------------------------------------------------------- ; void ff_pred4x4_vertical_right_8_mmxext(uint8_t *src, -; const uint8_t *topright, int stride) +; const uint8_t *topright, +; ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext @@ -2689,7 +2729,7 @@ cglobal pred4x4_vertical_right_8, 3,3 ;----------------------------------------------------------------------------- ; void ff_pred4x4_down_right_8_mmxext(uint8_t *src, const uint8_t *topright, -; int stride) +; ptrdiff_t stride) ;----------------------------------------------------------------------------- INIT_MMX mmxext |