diff options
Diffstat (limited to 'media/libjpeg/simd/jcsample-sse2.asm')
-rw-r--r-- | media/libjpeg/simd/jcsample-sse2.asm | 350 |
1 files changed, 0 insertions, 350 deletions
diff --git a/media/libjpeg/simd/jcsample-sse2.asm b/media/libjpeg/simd/jcsample-sse2.asm deleted file mode 100644 index 83c9d152a7..0000000000 --- a/media/libjpeg/simd/jcsample-sse2.asm +++ /dev/null @@ -1,350 +0,0 @@ -; -; jcsample.asm - downsampling (SSE2) -; -; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Downsample pixel values of a single component. -; This version handles the common case of 2:1 horizontal and 1:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v1_downsample_sse2) - -EXTN(jsimd_h2v1_downsample_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v1_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00010000 ; bias pattern - movd xmm7,edx - pcmpeqw xmm6,xmm6 - pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - - cmp ecx, byte SIZEOF_XMMWORD - jae short .columnloop - alignx 16,7 - -.columnloop_r8: - movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] - pxor xmm1,xmm1 - mov ecx, SIZEOF_XMMWORD - jmp short .downsample - alignx 16,7 - -.columnloop: - movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD] - -.downsample: - movdqa xmm2,xmm0 - movdqa xmm3,xmm1 - - pand xmm0,xmm6 - psrlw xmm2,BYTE_BIT - pand xmm1,xmm6 - psrlw xmm3,BYTE_BIT - - paddw xmm0,xmm2 - paddw xmm1,xmm3 - paddw xmm0,xmm7 - paddw xmm1,xmm7 - psrlw xmm0,1 - psrlw xmm1,1 - - packuswb xmm0,xmm1 - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 - - sub ecx, byte SIZEOF_XMMWORD ; outcol - add esi, byte 2*SIZEOF_XMMWORD ; inptr - add edi, byte 1*SIZEOF_XMMWORD ; outptr - cmp ecx, byte SIZEOF_XMMWORD - jae short .columnloop - test ecx,ecx - jnz short .columnloop_r8 - - pop esi - pop edi - pop ecx - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Downsample pixel values of a single component. -; This version handles the standard case of 2:1 horizontal and 2:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v2_downsample_sse2) - -EXTN(jsimd_h2v2_downsample_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v2_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00020001 ; bias pattern - movd xmm7,edx - pcmpeqw xmm6,xmm6 - pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 - mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 - mov edi, JSAMPROW [edi] ; outptr - - cmp ecx, byte SIZEOF_XMMWORD - jae short .columnloop - alignx 16,7 - -.columnloop_r8: - movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] - pxor xmm2,xmm2 - pxor xmm3,xmm3 - mov ecx, SIZEOF_XMMWORD - jmp short .downsample - alignx 16,7 - -.columnloop: - movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD] - movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD] - -.downsample: - movdqa xmm4,xmm0 - movdqa xmm5,xmm1 - pand xmm0,xmm6 - psrlw xmm4,BYTE_BIT - pand xmm1,xmm6 - psrlw xmm5,BYTE_BIT - paddw xmm0,xmm4 - paddw xmm1,xmm5 - - movdqa xmm4,xmm2 - movdqa xmm5,xmm3 - pand xmm2,xmm6 - psrlw xmm4,BYTE_BIT - pand xmm3,xmm6 - psrlw xmm5,BYTE_BIT - paddw xmm2,xmm4 - paddw xmm3,xmm5 - - paddw xmm0,xmm1 - paddw xmm2,xmm3 - paddw xmm0,xmm7 - paddw xmm2,xmm7 - psrlw xmm0,2 - psrlw xmm2,2 - - packuswb xmm0,xmm2 - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 - - sub ecx, byte SIZEOF_XMMWORD ; outcol - add edx, byte 2*SIZEOF_XMMWORD ; inptr0 - add esi, byte 2*SIZEOF_XMMWORD ; inptr1 - add edi, byte 1*SIZEOF_XMMWORD ; outptr - cmp ecx, byte SIZEOF_XMMWORD - jae near .columnloop - test ecx,ecx - jnz near .columnloop_r8 - - pop esi - pop edi - pop ecx - - add esi, byte 2*SIZEOF_JSAMPROW ; input_data - add edi, byte 1*SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 |