diff options
Diffstat (limited to 'media/libjpeg/simd/jcsample-mmx.asm')
-rw-r--r-- | media/libjpeg/simd/jcsample-mmx.asm | 323 |
1 files changed, 0 insertions, 323 deletions
diff --git a/media/libjpeg/simd/jcsample-mmx.asm b/media/libjpeg/simd/jcsample-mmx.asm deleted file mode 100644 index 6cd544e74d..0000000000 --- a/media/libjpeg/simd/jcsample-mmx.asm +++ /dev/null @@ -1,323 +0,0 @@ -; -; jcsample.asm - downsampling (MMX) -; -; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Downsample pixel values of a single component. -; This version handles the common case of 2:1 horizontal and 1:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v1_downsample_mmx) - -EXTN(jsimd_h2v1_downsample_mmx): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v1_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00010000 ; bias pattern - movd mm7,edx - pcmpeqw mm6,mm6 - punpckldq mm7,mm7 ; mm7={0, 1, 0, 1} - psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] - movq mm1, MMWORD [esi+1*SIZEOF_MMWORD] - movq mm2,mm0 - movq mm3,mm1 - - pand mm0,mm6 - psrlw mm2,BYTE_BIT - pand mm1,mm6 - psrlw mm3,BYTE_BIT - - paddw mm0,mm2 - paddw mm1,mm3 - paddw mm0,mm7 - paddw mm1,mm7 - psrlw mm0,1 - psrlw mm1,1 - - packuswb mm0,mm1 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 - - add esi, byte 2*SIZEOF_MMWORD ; inptr - add edi, byte 1*SIZEOF_MMWORD ; outptr - sub ecx, byte SIZEOF_MMWORD ; outcol - jnz short .columnloop - - pop esi - pop edi - pop ecx - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg short .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Downsample pixel values of a single component. -; This version handles the standard case of 2:1 horizontal and 2:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v2_downsample_mmx) - -EXTN(jsimd_h2v2_downsample_mmx): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v2_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00020001 ; bias pattern - movd mm7,edx - pcmpeqw mm6,mm6 - punpckldq mm7,mm7 ; mm7={1, 2, 1, 2} - psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 - mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 - mov edi, JSAMPROW [edi] ; outptr - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [edx+0*SIZEOF_MMWORD] - movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] - movq mm2, MMWORD [edx+1*SIZEOF_MMWORD] - movq mm3, MMWORD [esi+1*SIZEOF_MMWORD] - - movq mm4,mm0 - movq mm5,mm1 - pand mm0,mm6 - psrlw mm4,BYTE_BIT - pand mm1,mm6 - psrlw mm5,BYTE_BIT - paddw mm0,mm4 - paddw mm1,mm5 - - movq mm4,mm2 - movq mm5,mm3 - pand mm2,mm6 - psrlw mm4,BYTE_BIT - pand mm3,mm6 - psrlw mm5,BYTE_BIT - paddw mm2,mm4 - paddw mm3,mm5 - - paddw mm0,mm1 - paddw mm2,mm3 - paddw mm0,mm7 - paddw mm2,mm7 - psrlw mm0,2 - psrlw mm2,2 - - packuswb mm0,mm2 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 - - add edx, byte 2*SIZEOF_MMWORD ; inptr0 - add esi, byte 2*SIZEOF_MMWORD ; inptr1 - add edi, byte 1*SIZEOF_MMWORD ; outptr - sub ecx, byte SIZEOF_MMWORD ; outcol - jnz near .columnloop - - pop esi - pop edi - pop ecx - - add esi, byte 2*SIZEOF_JSAMPROW ; input_data - add edi, byte 1*SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 |