diff options
Diffstat (limited to 'media/libtheora/lib/arm')
-rw-r--r--[-rwxr-xr-x] | media/libtheora/lib/arm/arm2gnu.pl | 55 | ||||
-rw-r--r-- | media/libtheora/lib/arm/armbits.s | 8 | ||||
-rw-r--r-- | media/libtheora/lib/arm/armcpu.c | 40 | ||||
-rw-r--r-- | media/libtheora/lib/arm/armfrag.s | 11 | ||||
-rw-r--r-- | media/libtheora/lib/arm/armidct.s | 177 | ||||
-rw-r--r-- | media/libtheora/lib/arm/armloop.s | 8 | ||||
-rw-r--r-- | media/libtheora/lib/arm/armopts.s | 2 |
7 files changed, 160 insertions, 141 deletions
diff --git a/media/libtheora/lib/arm/arm2gnu.pl b/media/libtheora/lib/arm/arm2gnu.pl index 8cb68e4a9f..5831bd81e2 100755..100644 --- a/media/libtheora/lib/arm/arm2gnu.pl +++ b/media/libtheora/lib/arm/arm2gnu.pl @@ -23,7 +23,6 @@ $\ = "\n"; # automatically add newline on print $n=0; $thumb = 0; # ARM mode by default, not Thumb. -@proc_stack = (); LINE: while (<>) { @@ -86,19 +85,13 @@ while (<>) { # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there. # if ( /\bAREA\b/ ) { - my $align; - $align = "2"; - if ( /ALIGN=(\d+)/ ) { - $align = $1; - } if ( /CODE/ ) { $nxstack = 1; } s/^(.+)CODE(.+)READONLY(.*)/ .text/; - s/^(.+)DATA(.+)READONLY(.*)/ .section .rdata/; - s/^(.+)\|\|\.data\|\|(.+)/ .data/; + s/^(.+)DATA(.+)READONLY(.*)/ .section .rdata\n .align 2/; + s/^(.+)\|\|\.data\|\|(.+)/ .data\n .align 2/; s/^(.+)\|\|\.bss\|\|(.+)/ .bss/; - s/$/; .p2align $align/; } s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/; # ||.constdata$3|| @@ -112,30 +105,12 @@ while (<>) { s/\bCODE16\b/.code 16/ && do {$thumb = 1}; if (/\bPROC\b/) { - my $prefix; - my $proc; - /^([A-Za-z_\.]\w+)\b/; - $proc = $1; - $prefix = ""; - if ($proc) - { - $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc); - push(@proc_stack, $proc); - s/^[A-Za-z_\.]\w+/$&:/; - } - $prefix = $prefix."\t.thumb_func; " if ($thumb); + print " .thumb_func" if ($thumb); s/\bPROC\b/@ $&/; - $_ = $prefix.$_; } s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/; s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/; - if (/\bENDP\b/) - { - my $proc; - s/\bENDP\b/@ $&/; - $proc = pop(@proc_stack); - $_ = "\t.size $proc, .-$proc".$_ if ($proc); - } + s/\bENDP\b/@ $&/; s/\bSUBT\b/@ $&/; s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25 s/\bKEEP\b/@ $&/; @@ -248,7 +223,6 @@ while (<>) { { my $cmd=$_; my $value; - my $prefix; my $w1; my $w2; my $w3; @@ -267,22 +241,25 @@ while (<>) { if( $bigend ne "") { # big endian - $prefix = "\t.byte\t0x".$w1.";". - "\t.byte\t0x".$w2.";". - "\t.byte\t0x".$w3.";". - "\t.byte\t0x".$w4."; "; + + print " .byte 0x".$w1; + print " .byte 0x".$w2; + print " .byte 0x".$w3; + print " .byte 0x".$w4; } else { # little endian - $prefix = "\t.byte\t0x".$w4.";". - "\t.byte\t0x".$w3.";". - "\t.byte\t0x".$w2.";". - "\t.byte\t0x".$w1."; "; + + print " .byte 0x".$w4; + print " .byte 0x".$w3; + print " .byte 0x".$w2; + print " .byte 0x".$w1; } - $_=$prefix.$_; + } + if ( /\badrl\b/i ) { s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i; diff --git a/media/libtheora/lib/arm/armbits.s b/media/libtheora/lib/arm/armbits.s index 9400722543..0fdb6fdd37 100644 --- a/media/libtheora/lib/arm/armbits.s +++ b/media/libtheora/lib/arm/armbits.s @@ -11,12 +11,18 @@ ;******************************************************************** ; ; function: -; last mod: $Id$ +; last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $ ; ;******************************************************************** AREA |.text|, CODE, READONLY + ; Explicitly specifying alignment here because some versions of + ; gas don't align code correctly. See + ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html + ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 + ALIGN + EXPORT oc_pack_read_arm EXPORT oc_pack_read1_arm EXPORT oc_huff_token_decode_arm diff --git a/media/libtheora/lib/arm/armcpu.c b/media/libtheora/lib/arm/armcpu.c index f1941bdc15..8b0f9a8574 100644 --- a/media/libtheora/lib/arm/armcpu.c +++ b/media/libtheora/lib/arm/armcpu.c @@ -20,7 +20,7 @@ #include "armcpu.h" #if !defined(OC_ARM_ASM)|| \ - !defined(OC_ARM_ASM_EDSP)&&!defined(OC_ARM_ASM_MEDIA)&& \ + !defined(OC_ARM_ASM_EDSP)&&!defined(OC_ARM_ASM_ARMV6)&& \ !defined(OC_ARM_ASM_NEON) ogg_uint32_t oc_cpu_flags_get(void){ return 0; @@ -107,44 +107,6 @@ ogg_uint32_t oc_cpu_flags_get(void){ return flags; } -#elif defined(__riscos__) -#include <kernel.h> -#include <swis.h> - -ogg_uint32_t oc_cpu_flags_get(void) { - ogg_uint32_t flags = 0; - -#if defined(OC_ARM_ASM_EDSP) || defined(OC_ARM_ASM_MEDIA) - - if (_swi(OS_Byte,_IN(0)|_IN(2)|_RETURN(1), 129, 0xFF) <= 0xA9) - _swix(OS_Module, _INR(0,1), 1, "System:Modules.CallASWI"); - - ogg_uint32_t features; - _kernel_oserror* test = _swix(OS_PlatformFeatures, _IN(0)|_OUT(0), 0, &features); - if (test == NULL) { -#if defined(OC_ARM_ASM_EDSP) - if((features>>10 & 1) == 1)flags|=OC_CPU_ARM_EDSP; -#endif - -#if defined(OC_ARM_ASM_MEDIA) - if ((features>>31 & 1) == 1) { - ogg_uint32_t shadd = 0; - test =_swix(OS_PlatformFeatures, _INR(0,1)|_OUT(0), 34, 29, &shadd); - if (test==NULL && shadd==1)flags|=OC_CPU_ARM_MEDIA; - } -#endif - } -#endif - -#if defined(OC_ARM_ASM_NEON) - ogg_uint32_t mvfr1; - test = _swix(VFPSupport_Features, _IN(0)|_OUT(2), 0, &mvfr1); - if (test==NULL && (mvfr1 & 0xFFF00)==0x11100)flags|=OC_CPU_ARM_NEON; -#endif - - return flags; -} - #else /*The feature registers which can tell us what the processor supports are accessible in priveleged modes only, so we can't have a general user-space diff --git a/media/libtheora/lib/arm/armfrag.s b/media/libtheora/lib/arm/armfrag.s index 38627ed669..e20579eee4 100644 --- a/media/libtheora/lib/arm/armfrag.s +++ b/media/libtheora/lib/arm/armfrag.s @@ -11,11 +11,17 @@ ;******************************************************************** ; Original implementation: ; Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd -; last mod: $Id$ +; last mod: $Id: armfrag.s 17481 2010-10-03 22:49:42Z tterribe $ ;******************************************************************** AREA |.text|, CODE, READONLY + ; Explicitly specifying alignment here because some versions of + ; gas don't align code correctly. See + ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html + ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 + ALIGN + GET armopts.s ; Vanilla ARM v4 versions @@ -510,7 +516,8 @@ oc_frag_recon_intra_neon PROC ; r0 = unsigned char *_dst ; r1 = int _ystride ; r2 = const ogg_int16_t _residue[64] - VMOV.I16 Q0, #128 + MOV r3, #128 + VDUP.S16 Q0, r3 VLDMIA r2, {D16-D31} ; D16= 3333222211110000 etc ; 9(8) cycles VQADD.S16 Q8, Q8, Q0 VQADD.S16 Q9, Q9, Q0 diff --git a/media/libtheora/lib/arm/armidct.s b/media/libtheora/lib/arm/armidct.s index 68530c7140..babd846ecd 100644 --- a/media/libtheora/lib/arm/armidct.s +++ b/media/libtheora/lib/arm/armidct.s @@ -11,11 +11,17 @@ ;******************************************************************** ; Original implementation: ; Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd -; last mod: $Id$ +; last mod: $Id: armidct.s 17481 2010-10-03 22:49:42Z tterribe $ ;******************************************************************** AREA |.text|, CODE, READONLY + ; Explicitly specifying alignment here because some versions of + ; gas don't align code correctly. See + ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html + ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 + ALIGN + GET armopts.s EXPORT oc_idct8x8_1_arm @@ -64,8 +70,11 @@ oc_idct8x8_slow_arm BL idct8core_arm BL idct8core_arm LDR r0, [r13], #4 ; Write to the final destination. + ; Clear input data for next block (decoder only). SUB r2, r1, #8*16 - ; Clear input data for next block. + CMP r0, r2 + MOV r1, r13 ; And read from temp storage. + BEQ oc_idct8x8_slow_arm_cols MOV r4, #0 MOV r5, #0 MOV r6, #0 @@ -78,7 +87,7 @@ oc_idct8x8_slow_arm STMIA r2!,{r4,r5,r6,r7} STMIA r2!,{r4,r5,r6,r7} STMIA r2!,{r4,r5,r6,r7} - MOV r1, r13 ; And read from temp storage. +oc_idct8x8_slow_arm_cols ; Column transforms BL idct8core_down_arm BL idct8core_down_arm @@ -102,15 +111,18 @@ oc_idct8x8_10_arm PROC BL idct3core_arm BL idct2core_arm BL idct1core_arm - ; Clear input data for next block. - MOV r4, #0 - STR r4, [r1,#-4*16]! - STR r4, [r1,#4] - STR r4, [r1,#16] - STR r4, [r1,#20] - STR r4, [r1,#32] - STR r4, [r1,#48] + ; Clear input data for next block (decoder only). + SUB r0, r1, #4*16 + CMP r0, r2 MOV r1, r13 ; Read from temp storage. + BEQ oc_idct8x8_10_arm_cols + MOV r4, #0 + STR r4, [r0] + STR r4, [r0,#4] + STR r4, [r0,#16] + STR r4, [r0,#20] + STR r4, [r0,#32] + STR r4, [r0,#48] MOV r0, r2 ; Write to the final destination oc_idct8x8_10_arm_cols ; Column transforms @@ -135,14 +147,18 @@ oc_idct8x8_6_arm PROC BL idct3core_arm BL idct2core_arm BL idct1core_arm - ; Clear input data for next block. - MOV r4, #0 - STR r4, [r1,#-3*16]! - STR r4, [r1,#4] - STR r4, [r1,#16] - STR r4, [r1,#32] + ; Clear input data for next block (decoder only). + SUB r0, r1, #3*16 + CMP r0, r2 MOV r1, r13 ; Read from temp storage. + BEQ oc_idct8x8_6_arm_cols + MOV r4, #0 + STR r4, [r0] + STR r4, [r0,#4] + STR r4, [r0,#16] + STR r4, [r0,#32] MOV r0, r2 ; Write to the final destination +oc_idct8x8_6_arm_cols ; Column transforms BL idct3core_down_arm BL idct3core_down_arm @@ -164,12 +180,14 @@ oc_idct8x8_3_arm PROC MOV r0, r13 ; Write to temp storage. BL idct2core_arm BL idct1core_arm - ; Clear input data for next block. - MOV r4, #0 - STR r4, [r1,#-2*16]! - STR r4, [r1,#16] + ; Clear input data for next block (decoder only). + SUB r0, r1, #2*16 + CMP r0, r2 MOV r1, r13 ; Read from temp storage. - MOV r0, r2 ; Write to the final destination + MOVNE r4, #0 + STRNE r4, [r0] + STRNE r4, [r0,#16] + MOVNE r0, r2 ; Write to the final destination ; Column transforms BL idct2core_down_arm BL idct2core_down_arm @@ -787,26 +805,30 @@ oc_idct8x8_slow_v6 BL idct8_8core_v6 BL idct8_8core_v6 LDR r0, [r13], #4 ; Write to the final destination. - ; Clear input data for next block. + ; Clear input data for next block (decoder only). + SUB r2, r1, #8*16 + CMP r0, r2 + MOV r1, r13 ; And read from temp storage. + BEQ oc_idct8x8_slow_v6_cols MOV r4, #0 MOV r5, #0 - STRD r4, [r1,#-8*16]! - STRD r4, [r1,#8] - STRD r4, [r1,#16] - STRD r4, [r1,#24] - STRD r4, [r1,#32] - STRD r4, [r1,#40] - STRD r4, [r1,#48] - STRD r4, [r1,#56] - STRD r4, [r1,#64] - STRD r4, [r1,#72] - STRD r4, [r1,#80] - STRD r4, [r1,#88] - STRD r4, [r1,#96] - STRD r4, [r1,#104] - STRD r4, [r1,#112] - STRD r4, [r1,#120] - MOV r1, r13 ; And read from temp storage. + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 + STRD r4, [r2], #8 +oc_idct8x8_slow_v6_cols ; Column transforms BL idct8_8core_down_v6 BL idct8_8core_down_v6 @@ -827,16 +849,20 @@ oc_idct8x8_10_v6 PROC BL idct4_3core_v6 BL idct2_1core_v6 LDR r0, [r13], #4 ; Write to the final destination. - ; Clear input data for next block. + ; Clear input data for next block (decoder only). + SUB r2, r1, #4*16 + CMP r0, r2 + AND r1, r13,#4 ; Align the stack. + BEQ oc_idct8x8_10_v6_cols MOV r4, #0 MOV r5, #0 - STRD r4, [r1,#-4*16]! - STRD r4, [r1,#16] - STR r4, [r1,#32] - STR r4, [r1,#48] - AND r1, r13,#4 ; Align the stack. - ADD r1, r1, r13 ; And read from temp storage. + STRD r4, [r2] + STRD r4, [r2,#16] + STR r4, [r2,#32] + STR r4, [r2,#48] +oc_idct8x8_10_v6_cols ; Column transforms + ADD r1, r1, r13 ; And read from temp storage. BL idct4_4core_down_v6 BL idct4_4core_down_v6 BL idct4_4core_down_v6 @@ -852,12 +878,14 @@ oc_idct8x8_3_v6 PROC MOV r8, r0 MOV r0, r13 ; Write to temp storage. BL idct2_1core_v6 - ; Clear input data for next block. - MOV r4, #0 - STR r4, [r1,#-2*16]! - STR r4, [r1,#16] + ; Clear input data for next block (decoder only). + SUB r0, r1, #2*16 + CMP r0, r8 MOV r1, r13 ; Read from temp storage. - MOV r0, r8 ; Write to the final destination. + MOVNE r4, #0 + STRNE r4, [r0] + STRNE r4, [r0,#16] + MOVNE r0, r8 ; Write to the final destination. ; Column transforms BL idct2_2core_down_v6 BL idct2_2core_down_v6 @@ -1013,16 +1041,20 @@ oc_idct8x8_6_v6 PROC ADD r0, r0, r13 ; Write to temp storage. BL idct3_2core_v6 BL idct1core_v6 - ; Clear input data for next block. + ; Clear input data for next block (decoder only). + SUB r0, r1, #3*16 + CMP r0, r8 + AND r1, r13,#4 ; Align the stack. + BEQ oc_idct8x8_6_v6_cols MOV r4, #0 MOV r5, #0 - STRD r4, [r1,#-3*16]! - STR r4, [r1,#16] - STR r4, [r1,#32] - AND r1, r13,#4 ; Align the stack. + STRD r4, [r0] + STR r4, [r0,#16] + STR r4, [r0,#32] MOV r0, r8 ; Write to the final destination. - ADD r1, r1, r13 ; And read from temp storage. +oc_idct8x8_6_v6_cols ; Column transforms + ADD r1, r1, r13 ; And read from temp storage. BL idct3_3core_down_v6 BL idct3_3core_down_v6 BL idct3_3core_down_v6 @@ -1564,6 +1596,7 @@ oc_idct8x8_slow_neon VSWP D23,D30 ; Column transforms BL oc_idct8x8_stage123_neon + CMP r0,r1 ; We have to put the return address back in the LR, or the branch ; predictor will not recognize the function return and mis-predict the ; entire call stack. @@ -1577,6 +1610,7 @@ oc_idct8x8_slow_neon VADD.S16 Q10,Q10,Q5 ; Q10 = y[2]=t[2]'+t[5]'' VSUB.S16 Q12,Q11,Q4 ; Q12 = y[4]=t[3]'-t[4]' VADD.S16 Q11,Q11,Q4 ; Q11 = y[3]=t[3]'+t[4]' + BEQ oc_idct8x8_slow_neon_noclear VMOV.I8 Q2,#0 VPOP {D8-D15} VMOV.I8 Q3,#0 @@ -1594,6 +1628,19 @@ oc_idct8x8_slow_neon VRSHR.S16 Q15,Q15,#4 ; Q15 = y[7]+8>>4 VSTMIA r0, {D16-D31} MOV PC, r14 + +oc_idct8x8_slow_neon_noclear + VPOP {D8-D15} + VRSHR.S16 Q8, Q8, #4 ; Q8 = y[0]+8>>4 + VRSHR.S16 Q9, Q9, #4 ; Q9 = y[1]+8>>4 + VRSHR.S16 Q10,Q10,#4 ; Q10 = y[2]+8>>4 + VRSHR.S16 Q11,Q11,#4 ; Q11 = y[3]+8>>4 + VRSHR.S16 Q12,Q12,#4 ; Q12 = y[4]+8>>4 + VRSHR.S16 Q13,Q13,#4 ; Q13 = y[5]+8>>4 + VRSHR.S16 Q14,Q14,#4 ; Q14 = y[6]+8>>4 + VRSHR.S16 Q15,Q15,#4 ; Q15 = y[7]+8>>4 + VSTMIA r0, {D16-D31} + MOV PC, r14 ENDP oc_idct8x8_stage123_neon PROC @@ -1824,6 +1871,7 @@ oc_idct8x8_10_neon PROC VADD.S16 Q10,Q1, Q2 ; Q10= t[1]'=t[0]+t[2] VSUB.S16 Q2, Q1, Q2 ; Q2 = t[2]'=t[0]-t[2] ; Stage 4 + CMP r0, r1 VADD.S16 Q8, Q11,Q15 ; Q8 = y[0]=t[0]'+t[7]' VADD.S16 Q9, Q10,Q14 ; Q9 = y[1]=t[1]'+t[6]'' VSUB.S16 Q15,Q11,Q15 ; Q15 = y[7]=t[0]'-t[7]' @@ -1832,6 +1880,7 @@ oc_idct8x8_10_neon PROC VADD.S16 Q11,Q3, Q12 ; Q11 = y[3]=t[3]'+t[4]' VSUB.S16 Q12,Q3, Q12 ; Q12 = y[4]=t[3]'-t[4]' VSUB.S16 Q13,Q2, Q13 ; Q13 = y[5]=t[2]'-t[5]'' + BEQ oc_idct8x8_10_neon_noclear VMOV.I8 D2, #0 VRSHR.S16 Q8, Q8, #4 ; Q8 = y[0]+8>>4 VST1.64 {D2}, [r1@64], r12 @@ -1847,6 +1896,18 @@ oc_idct8x8_10_neon PROC VRSHR.S16 Q15,Q15,#4 ; Q15 = y[7]+8>>4 VSTMIA r0, {D16-D31} MOV PC, r14 + +oc_idct8x8_10_neon_noclear + VRSHR.S16 Q8, Q8, #4 ; Q8 = y[0]+8>>4 + VRSHR.S16 Q9, Q9, #4 ; Q9 = y[1]+8>>4 + VRSHR.S16 Q10,Q10,#4 ; Q10 = y[2]+8>>4 + VRSHR.S16 Q11,Q11,#4 ; Q11 = y[3]+8>>4 + VRSHR.S16 Q12,Q12,#4 ; Q12 = y[4]+8>>4 + VRSHR.S16 Q13,Q13,#4 ; Q13 = y[5]+8>>4 + VRSHR.S16 Q14,Q14,#4 ; Q14 = y[6]+8>>4 + VRSHR.S16 Q15,Q15,#4 ; Q15 = y[7]+8>>4 + VSTMIA r0, {D16-D31} + MOV PC, r14 ENDP ] diff --git a/media/libtheora/lib/arm/armloop.s b/media/libtheora/lib/arm/armloop.s index bbd4d630ed..0a1d4705e7 100644 --- a/media/libtheora/lib/arm/armloop.s +++ b/media/libtheora/lib/arm/armloop.s @@ -11,11 +11,17 @@ ;******************************************************************** ; Original implementation: ; Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd -; last mod: $Id$ +; last mod: $Id: armloop.s 17481 2010-10-03 22:49:42Z tterribe $ ;******************************************************************** AREA |.text|, CODE, READONLY + ; Explicitly specifying alignment here because some versions of + ; gas don't align code correctly. See + ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html + ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 + ALIGN + GET armopts.s EXPORT oc_loop_filter_frag_rows_arm diff --git a/media/libtheora/lib/arm/armopts.s b/media/libtheora/lib/arm/armopts.s index 4dfdca9608..e4da429e47 100644 --- a/media/libtheora/lib/arm/armopts.s +++ b/media/libtheora/lib/arm/armopts.s @@ -11,7 +11,7 @@ ;******************************************************************** ; Original implementation: ; Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd -; last mod: $Id$ +; last mod: $Id: armopts.s.in 17430 2010-09-22 21:54:09Z tterribe $ ;******************************************************************** ; Set the following to 1 if we have EDSP instructions |