55 files changed, 0 insertions, 16769 deletions
diff --git a/media/libtheora/lib/apiwrapper.c b/media/libtheora/lib/apiwrapper.c
deleted file mode 100644
index dc959b8d1..000000000
--- a/media/libtheora/lib/apiwrapper.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: apiwrapper.c 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include "apiwrapper.h"
-
-
-
-const char *theora_version_string(void){
-  return th_version_string();
-}
-
-ogg_uint32_t theora_version_number(void){
-  return th_version_number();
-}
-
-void theora_info_init(theora_info *_ci){
-  memset(_ci,0,sizeof(*_ci));
-}
-
-void theora_info_clear(theora_info *_ci){
-  th_api_wrapper *api;
-  api=(th_api_wrapper *)_ci->codec_setup;
-  memset(_ci,0,sizeof(*_ci));
-  if(api!=NULL){
-    if(api->clear!=NULL)(*api->clear)(api);
-    _ogg_free(api);
-  }
-}
-
-void theora_clear(theora_state *_th){
-  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
-  if(_th->internal_decode!=NULL){
-    (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
-  }
-  if(_th->internal_encode!=NULL){
-    (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
-  }
-  if(_th->i!=NULL)theora_info_clear(_th->i);
-  memset(_th,0,sizeof(*_th));
-}
-
-int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
-  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
-  if(_th->internal_decode!=NULL){
-    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
-     _req,_buf,_buf_sz);
-  }
-  else if(_th->internal_encode!=NULL){
-    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
-     _req,_buf,_buf_sz);
-  }
-  else return TH_EINVAL;
-}
-
-ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
-  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
-  if(_th->internal_decode!=NULL){
-    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
-     _th,_gp);
-  }
-  else if(_th->internal_encode!=NULL){
-    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
-     _th,_gp);
-  }
-  else return -1;
-}
-
-double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
-  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
-  if(_th->internal_decode!=NULL){
-    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
-     _th,_gp);
-  }
-  else if(_th->internal_encode!=NULL){
-    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
-     _th,_gp);
-  }
-  else return -1;
-}
-
-void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
-  _info->version_major=_ci->version_major;
-  _info->version_minor=_ci->version_minor;
-  _info->version_subminor=_ci->version_subminor;
-  _info->frame_width=_ci->width;
-  _info->frame_height=_ci->height;
-  _info->pic_width=_ci->frame_width;
-  _info->pic_height=_ci->frame_height;
-  _info->pic_x=_ci->offset_x;
-  _info->pic_y=_ci->offset_y;
-  _info->fps_numerator=_ci->fps_numerator;
-  _info->fps_denominator=_ci->fps_denominator;
-  _info->aspect_numerator=_ci->aspect_numerator;
-  _info->aspect_denominator=_ci->aspect_denominator;
-  switch(_ci->colorspace){
-    case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
-    case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
-    default:_info->colorspace=TH_CS_UNSPECIFIED;break;
-  }
-  switch(_ci->pixelformat){
-    case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
-    case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
-    case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
-    default:_info->pixel_fmt=TH_PF_RSVD;
-  }
-  _info->target_bitrate=_ci->target_bitrate;
-  _info->quality=_ci->quality;
-  _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
-   OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
-}
-
-int theora_packet_isheader(ogg_packet *_op){
-  return th_packet_isheader(_op);
-}
-
-int theora_packet_iskeyframe(ogg_packet *_op){
-  return th_packet_iskeyframe(_op);
-}
-
-int theora_granule_shift(theora_info *_ci){
-  /*This breaks when keyframe_frequency_force is not positive or is larger than
-     2**31 (if your int is more than 32 bits), but that's what the original
-     function does.*/
-  return oc_ilog(_ci->keyframe_frequency_force-1);
-}
-
-void theora_comment_init(theora_comment *_tc){
-  th_comment_init((th_comment *)_tc);
-}
-
-char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){
-  return th_comment_query((th_comment *)_tc,_tag,_count);
-}
-
-int theora_comment_query_count(theora_comment *_tc,char *_tag){
-  return th_comment_query_count((th_comment *)_tc,_tag);
-}
-
-void theora_comment_clear(theora_comment *_tc){
-  th_comment_clear((th_comment *)_tc);
-}
-
-void theora_comment_add(theora_comment *_tc,char *_comment){
-  th_comment_add((th_comment *)_tc,_comment);
-}
-
-void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){
-  th_comment_add_tag((th_comment *)_tc,_tag,_value);
-}
diff --git a/media/libtheora/lib/apiwrapper.h b/media/libtheora/lib/apiwrapper.h
deleted file mode 100644
index ff45e0a4d..000000000
--- a/media/libtheora/lib/apiwrapper.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_apiwrapper_H)
-# define _apiwrapper_H (1)
-# include <ogg/ogg.h>
-# include <theora/theora.h>
-# include "theora/theoradec.h"
-# include "theora/theoraenc.h"
-# include "state.h"
-
-typedef struct th_api_wrapper th_api_wrapper;
-typedef struct th_api_info    th_api_info;
-
-/*Provide an entry point for the codec setup to clear itself in case we ever
-   want to break pieces off into a common base library shared by encoder and
-   decoder.
-  In addition, this makes several other pieces of the API wrapper cleaner.*/
-typedef void (*oc_setup_clear_func)(void *_ts);
-
-/*Generally only one of these pointers will be non-NULL in any given instance.
-  Technically we do not even really need this struct, since we should be able
-   to figure out which one from "context", but doing it this way makes sure we
-   don't flub it up.*/
-struct th_api_wrapper{
-  oc_setup_clear_func  clear;
-  th_setup_info       *setup;
-  th_dec_ctx          *decode;
-  th_enc_ctx          *encode;
-};
-
-struct th_api_info{
-  th_api_wrapper api;
-  theora_info    info;
-};
-
-
-void oc_theora_info2th_info(th_info *_info,const theora_info *_ci);
-
-#endif
diff --git a/media/libtheora/lib/arm/arm2gnu.pl b/media/libtheora/lib/arm/arm2gnu.pl
deleted file mode 100644
index 5831bd81e..000000000
--- a/media/libtheora/lib/arm/arm2gnu.pl
+++ /dev/null
@@ -1,281 +0,0 @@
-#!/usr/bin/perl
-
-my $bigend;  # little/big endian
-my $nxstack;
-
-$nxstack = 0;
-
-eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
-    if $running_under_some_shell;
-
-while ($ARGV[0] =~ /^-/) {
-    $_ = shift;
-  last if /^--/;
-    if (/^-n/) {
-    $nflag++;
-    next;
-    }
-    die "I don't recognize this switch: $_\\n";
-}
-$printit++ unless $nflag;
-
-$\ = "\n";      # automatically add newline on print
-$n=0;
-
-$thumb = 0;     # ARM mode by default, not Thumb.
-
-LINE:
-while (<>) {
-
-    # For ADRLs we need to add a new line after the substituted one.
-    $addPadding = 0;
-
-    # First, we do not dare to touch *anything* inside double quotes, do we?
-    # Second, if you want a dollar character in the string,
-    # insert two of them -- that's how ARM C and assembler treat strings.
-    s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1:   .ascii \"/   && do { s/\$\$/\$/g; next };
-    s/\bDCB\b[ \t]*\"/.ascii \"/                          && do { s/\$\$/\$/g; next };
-    s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/                    && do { s/\$\$/\$/g; next };
-    # If there's nothing on a line but a comment, don't try to apply any further
-    #  substitutions (this is a cheap hack to avoid mucking up the license header)
-    s/^([ \t]*);/$1@/                                     && do { s/\$\$/\$/g; next };
-    # If substituted -- leave immediately !
-
-    s/@/,:/;
-    s/;/@/;
-    while ( /@.*'/ ) {
-      s/(@.*)'/$1/g;
-    }
-    s/\{FALSE\}/0/g;
-    s/\{TRUE\}/1/g;
-    s/\{(\w\w\w\w+)\}/$1/g;
-    s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
-    s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
-    s/\bIMPORT\b/.extern/;
-    s/\bEXPORT\b/.global/;
-    s/^(\s+)\[/$1IF/;
-    s/^(\s+)\|/$1ELSE/;
-    s/^(\s+)\]/$1ENDIF/;
-    s/IF *:DEF:/ .ifdef/;
-    s/IF *:LNOT: *:DEF:/ .ifndef/;
-    s/ELSE/ .else/;
-    s/ENDIF/ .endif/;
-
-    if( /\bIF\b/ ) {
-      s/\bIF\b/ .if/;
-      s/=/==/;
-    }
-    if ( $n == 2) {
-        s/\$/\\/g;
-    }
-    if ($n == 1) {
-        s/\$//g;
-        s/label//g;
-    $n = 2;
-      }
-    if ( /MACRO/ ) {
-      s/MACRO *\n/.macro/;
-      $n=1;
-    }
-    if ( /\bMEND\b/ ) {
-      s/\bMEND\b/.endm/;
-      $n=0;
-    }
-
-    # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there.
-    #
-    if ( /\bAREA\b/ ) {
-        if ( /CODE/ ) {
-            $nxstack = 1;
-        }
-        s/^(.+)CODE(.+)READONLY(.*)/    .text/;
-        s/^(.+)DATA(.+)READONLY(.*)/    .section .rdata\n    .align 2/;
-        s/^(.+)\|\|\.data\|\|(.+)/    .data\n    .align 2/;
-        s/^(.+)\|\|\.bss\|\|(.+)/    .bss/;
-    }
-
-    s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/;       # ||.constdata$3||
-    s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/;               # ||.bss$2||
-    s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/;             # ||.data$2||
-    s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/;
-    s/^(\s+)\%(\s)/    .space $1/;
-
-    s/\|(.+)\.(\d+)\|/\.$1_$2/;                     # |L80.123| -> .L80_123
-    s/\bCODE32\b/.code 32/ && do {$thumb = 0};
-    s/\bCODE16\b/.code 16/ && do {$thumb = 1};
-    if (/\bPROC\b/)
-    {
-        print "    .thumb_func" if ($thumb);
-        s/\bPROC\b/@ $&/;
-    }
-    s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/;
-    s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/;
-    s/\bENDP\b/@ $&/;
-    s/\bSUBT\b/@ $&/;
-    s/\bDATA\b/@ $&/;   # DATA directive is deprecated -- Asm guide, p.7-25
-    s/\bKEEP\b/@ $&/;
-    s/\bEXPORTAS\b/@ $&/;
-    s/\|\|(.)+\bEQU\b/@ $&/;
-    s/\|\|([\w\$]+)\|\|/$1/;
-    s/\bENTRY\b/@ $&/;
-    s/\bASSERT\b/@ $&/;
-    s/\bGBLL\b/@ $&/;
-    s/\bGBLA\b/@ $&/;
-    s/^\W+OPT\b/@ $&/;
-    s/:OR:/|/g;
-    s/:SHL:/<</g;
-    s/:SHR:/>>/g;
-    s/:AND:/&/g;
-    s/:LAND:/&&/g;
-    s/CPSR/cpsr/;
-    s/SPSR/spsr/;
-    s/ALIGN$/.balign 4/;
-    s/ALIGN\s+([0-9x]+)$/.balign $1/;
-    s/psr_cxsf/psr_all/;
-    s/LTORG/.ltorg/;
-    s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/;
-    s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/;
-    s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/;
-    s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/;
-
-    #  {PC} + 0xdeadfeed  -->  . + 0xdeadfeed
-    s/\{PC\} \+/ \. +/;
-
-    # Single hex constant on the line !
-    #
-    # >>> NOTE <<<
-    #   Double-precision floats in gcc are always mixed-endian, which means
-    #   bytes in two words are little-endian, but words are big-endian.
-    #   So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address
-    #   and 0xfeed0000 at high address.
-    #
-    s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/;
-    # Only decimal constants on the line, no hex !
-    s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/;
-
-    # Single hex constant on the line !
-#    s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/;
-    # Only decimal constants on the line, no hex !
-#    s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/;
-    s/\bDCFS[ \t]+0x/.word 0x/;
-    s/\bDCFS\b/.float/;
-
-    s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/;
-    s/\bDCD\b/.word/;
-    s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/;
-    s/\bDCW\b/.short/;
-    s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/;
-    s/\bDCB\b/.byte/;
-    s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/;
-    s/^[A-Za-z_\.]\w+/$&:/;
-    s/^(\d+)/$1:/;
-    s/\%(\d+)/$1b_or_f/;
-    s/\%[Bb](\d+)/$1b/;
-    s/\%[Ff](\d+)/$1f/;
-    s/\%[Ff][Tt](\d+)/$1f/;
-    s/&([\dA-Fa-f]+)/0x$1/;
-    if ( /\b2_[01]+\b/ ) {
-      s/\b2_([01]+)\b/conv$1&&&&/g;
-      while ( /[01][01][01][01]&&&&/ ) {
-        s/0000&&&&/&&&&0/g;
-        s/0001&&&&/&&&&1/g;
-        s/0010&&&&/&&&&2/g;
-        s/0011&&&&/&&&&3/g;
-        s/0100&&&&/&&&&4/g;
-        s/0101&&&&/&&&&5/g;
-        s/0110&&&&/&&&&6/g;
-        s/0111&&&&/&&&&7/g;
-        s/1000&&&&/&&&&8/g;
-        s/1001&&&&/&&&&9/g;
-        s/1010&&&&/&&&&A/g;
-        s/1011&&&&/&&&&B/g;
-        s/1100&&&&/&&&&C/g;
-        s/1101&&&&/&&&&D/g;
-        s/1110&&&&/&&&&E/g;
-        s/1111&&&&/&&&&F/g;
-      }
-      s/000&&&&/&&&&0/g;
-      s/001&&&&/&&&&1/g;
-      s/010&&&&/&&&&2/g;
-      s/011&&&&/&&&&3/g;
-      s/100&&&&/&&&&4/g;
-      s/101&&&&/&&&&5/g;
-      s/110&&&&/&&&&6/g;
-      s/111&&&&/&&&&7/g;
-      s/00&&&&/&&&&0/g;
-      s/01&&&&/&&&&1/g;
-      s/10&&&&/&&&&2/g;
-      s/11&&&&/&&&&3/g;
-      s/0&&&&/&&&&0/g;
-      s/1&&&&/&&&&1/g;
-      s/conv&&&&/0x/g;
-    }
-
-    if ( /commandline/)
-    {
-        if( /-bigend/)
-        {
-            $bigend=1;
-        }
-    }
-
-    if ( /\bDCDU\b/ )
-    {
-        my $cmd=$_;
-        my $value;
-        my $w1;
-        my $w2;
-        my $w3;
-        my $w4;
-
-        s/\s+DCDU\b/@ $&/;
-
-        $cmd =~ /\bDCDU\b\s+0x(\d+)/;
-        $value = $1;
-        $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/;
-        $w1 = $1;
-        $w2 = $2;
-        $w3 = $3;
-        $w4 = $4;
-
-        if( $bigend ne "")
-        {
-            # big endian
-
-            print "        .byte      0x".$w1;
-            print "        .byte      0x".$w2;
-            print "        .byte      0x".$w3;
-            print "        .byte      0x".$w4;
-        }
-        else
-        {
-            # little endian
-
-            print "        .byte      0x".$w4;
-            print "        .byte      0x".$w3;
-            print "        .byte      0x".$w2;
-            print "        .byte      0x".$w1;
-        }
-
-    }
-
-
-    if ( /\badrl\b/i )
-    {
-        s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i;
-        $addPadding = 1;
-    }
-    s/\bEND\b/@ END/;
-} continue {
-    printf ("%s", $_) if $printit;
-    if ($addPadding != 0)
-    {
-        printf ("   mov r0,r0\n");
-        $addPadding = 0;
-    }
-}
-#If we had a code section, mark that this object doesn't need an executable
-# stack.
-if ($nxstack) {
-    printf ("    .section\t.note.GNU-stack,\"\",\%\%progbits\n");
-}
diff --git a/media/libtheora/lib/arm/armbits.h b/media/libtheora/lib/arm/armbits.h
deleted file mode 100644
index 1540d7eb5..000000000
--- a/media/libtheora/lib/arm/armbits.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86int.h 17344 2010-07-21 01:42:18Z tterribe $
-
- ********************************************************************/
-#if !defined(_arm_armbits_H)
-# define _arm_armbits_H (1)
-# include "../bitpack.h"
-# include "armcpu.h"
-
-# if defined(OC_ARM_ASM)
-#  define oc_pack_read oc_pack_read_arm
-#  define oc_pack_read1 oc_pack_read1_arm
-#  define oc_huff_token_decode oc_huff_token_decode_arm
-# endif
-
-long oc_pack_read_arm(oc_pack_buf *_b,int _bits);
-int oc_pack_read1_arm(oc_pack_buf *_b);
-int oc_huff_token_decode_arm(oc_pack_buf *_b,const ogg_int16_t *_tree);
-
-#endif
diff --git a/media/libtheora/lib/arm/armbits.s b/media/libtheora/lib/arm/armbits.s
deleted file mode 100644
index 0fdb6fdd3..000000000
--- a/media/libtheora/lib/arm/armbits.s
+++ /dev/null
@@ -1,236 +0,0 @@
-;********************************************************************
-;*                                                                  *
-;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
-;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
-;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
-;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
-;*                                                                  *
-;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
-;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
-;*                                                                  *
-;********************************************************************
-;
-; function:
-;   last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $
-;
-;********************************************************************
-
-	AREA	|.text|, CODE, READONLY
-
-	; Explicitly specifying alignment here because some versions of
-	; gas don't align code correctly. See
-	; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
-	; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
-	ALIGN
-
-	EXPORT oc_pack_read_arm
-	EXPORT oc_pack_read1_arm
-	EXPORT oc_huff_token_decode_arm
-
-oc_pack_read1_arm PROC
-	; r0 = oc_pack_buf *_b
-	ADD r12,r0,#8
-	LDMIA r12,{r2,r3}      ; r2 = window
-	; Stall...             ; r3 = available
-	; Stall...
-	SUBS r3,r3,#1          ; r3 = available-1, available<1 => LT
-	BLT oc_pack_read1_refill
-	MOV r0,r2,LSR #31      ; r0 = window>>31
-	MOV r2,r2,LSL #1       ; r2 = window<<=1
-	STMIA r12,{r2,r3}      ; window = r2
-	                       ; available = r3
-	MOV PC,r14
-	ENDP
-
-oc_pack_read_arm PROC
-	; r0 = oc_pack_buf *_b
-	; r1 = int          _bits
-	ADD r12,r0,#8
-	LDMIA r12,{r2,r3}      ; r2 = window
-	; Stall...             ; r3 = available
-	; Stall...
-	SUBS r3,r3,r1          ; r3 = available-_bits, available<_bits => LT
-	BLT oc_pack_read_refill
-	RSB r0,r1,#32          ; r0 = 32-_bits
-	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
-	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
-	STMIA r12,{r2,r3}      ; window = r2
-	                       ; available = r3
-	MOV PC,r14
-
-; We need to refill window.
-oc_pack_read1_refill
-	MOV r1,#1
-oc_pack_read_refill
-	STMFD r13!,{r10,r11,r14}
-	LDMIA r0,{r10,r11}     ; r10 = stop
-	                       ; r11 = ptr
-	RSB r0,r1,#32          ; r0 = 32-_bits
-	RSB r3,r3,r0           ; r3 = 32-available
-; We can use unsigned compares for both the pointers and for available
-;  (allowing us to chain condition codes) because available will never be
-;  larger than 32 (or we wouldn't be here), and thus 32-available will never be
-;  negative.
-	CMP r10,r11            ; ptr<stop => HI
-	CMPHI r3,#7            ;   available<=24 => HI
-	LDRHIB r14,[r11],#1    ;     r14 = *ptr++
-	SUBHI r3,#8            ;     available += 8
-	; (HI) Stall...
-	ORRHI r2,r14,LSL r3    ;     r2 = window|=r14<<32-available
-	CMPHI r10,r11          ;     ptr<stop => HI
-	CMPHI r3,#7            ;       available<=24 => HI
-	LDRHIB r14,[r11],#1    ;         r14 = *ptr++
-	SUBHI r3,#8            ;         available += 8
-	; (HI) Stall...
-	ORRHI r2,r14,LSL r3    ;         r2 = window|=r14<<32-available
-	CMPHI r10,r11          ;         ptr<stop => HI
-	CMPHI r3,#7            ;           available<=24 => HI
-	LDRHIB r14,[r11],#1    ;             r14 = *ptr++
-	SUBHI r3,#8            ;             available += 8
-	; (HI) Stall...
-	ORRHI r2,r14,LSL r3    ;             r2 = window|=r14<<32-available
-	CMPHI r10,r11          ;             ptr<stop => HI
-	CMPHI r3,#7            ;               available<=24 => HI
-	LDRHIB r14,[r11],#1    ;                 r14 = *ptr++
-	SUBHI r3,#8            ;                 available += 8
-	; (HI) Stall...
-	ORRHI r2,r14,LSL r3    ;                 r2 = window|=r14<<32-available
-	SUBS r3,r0,r3          ; r3 = available-=_bits, available<bits => GT
-	BLT oc_pack_read_refill_last
-	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
-	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
-	STR r11,[r12,#-4]      ; ptr = r11
-	STMIA r12,{r2,r3}      ; window = r2
-	                       ; available = r3
-	LDMFD r13!,{r10,r11,PC}
-
-; Either we wanted to read more than 24 bits and didn't have enough room to
-;  stuff the last byte into the window, or we hit the end of the packet.
-oc_pack_read_refill_last
-	CMP r11,r10            ; ptr<stop => LO
-; If we didn't hit the end of the packet, then pull enough of the next byte to
-;  to fill up the window.
-	LDRLOB r14,[r11]       ; (LO) r14 = *ptr
-; Otherwise, set the EOF flag and pretend we have lots of available bits.
-	MOVHS r14,#1           ; (HS) r14 = 1
-	ADDLO r10,r3,r1        ; (LO) r10 = available
-	STRHS r14,[r12,#8]     ; (HS) eof = 1
-	ANDLO r10,r10,#7       ; (LO) r10 = available&7
-	MOVHS r3,#1<<30        ; (HS) available = OC_LOTS_OF_BITS
-	ORRLO r2,r14,LSL r10   ; (LO) r2 = window|=*ptr>>(available&7)
-	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
-	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
-	STR r11,[r12,#-4]      ; ptr = r11
-	STMIA r12,{r2,r3}      ; window = r2
-	                       ; available = r3
-	LDMFD r13!,{r10,r11,PC}
-	ENDP
-
-
-
-oc_huff_token_decode_arm PROC
-	; r0 = oc_pack_buf       *_b
-	; r1 = const ogg_int16_t *_tree
-	STMFD r13!,{r4,r5,r10,r14}
-	LDRSH r10,[r1]         ; r10 = n=_tree[0]
-	LDMIA r0,{r2-r5}       ; r2 = stop
-	; Stall...             ; r3 = ptr
-	; Stall...             ; r4 = window
-	                       ; r5 = available
-	CMP r10,r5             ; n>available => GT
-	BGT oc_huff_token_decode_refill0
-	RSB r14,r10,#32        ; r14 = 32-n
-	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
-	ADD r14,r1,r14,LSL #1  ; r14 = _tree+bits
-	LDRSH r12,[r14,#2]     ; r12 = node=_tree[1+bits]
-	; Stall...
-	; Stall...
-	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
-	BMI oc_huff_token_decode_continue
-	MOV r10,r14,LSR #8     ; r10 = n=node>>8
-	MOV r4,r4,LSL r10      ; r4 = window<<=n
-	SUB r5,r10             ; r5 = available-=n
-	STMIB r0,{r3-r5}       ; ptr = r3
-	                       ; window = r4
-	                       ; available = r5
-	AND r0,r14,#255        ; r0 = node&255
-	LDMFD r13!,{r4,r5,r10,pc}
-
-; The first tree node wasn't enough to reach a leaf, read another
-oc_huff_token_decode_continue
-	ADD r12,r1,r12,LSL #1  ; r12 = _tree+node
-	MOV r4,r4,LSL r10      ; r4 = window<<=n
-	SUB r5,r5,r10          ; r5 = available-=n
-	LDRSH r10,[r12],#2     ; r10 = n=_tree[node]
-	; Stall...             ; r12 = _tree+node+1
-	; Stall...
-	CMP r10,r5             ; n>available => GT
-	BGT oc_huff_token_decode_refill
-	RSB r14,r10,#32        ; r14 = 32-n
-	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
-	ADD r12,r12,r14        ;
-	LDRSH r12,[r12,r14]    ; r12 = node=_tree[node+1+bits]
-	; Stall...
-	; Stall...
-	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
-	BMI oc_huff_token_decode_continue
-	MOV r10,r14,LSR #8     ; r10 = n=node>>8
-	MOV r4,r4,LSL r10      ; r4 = window<<=n
-	SUB r5,r10             ; r5 = available-=n
-	STMIB r0,{r3-r5}       ; ptr = r3
-	                       ; window = r4
-	                       ; available = r5
-	AND r0,r14,#255        ; r0 = node&255
-	LDMFD r13!,{r4,r5,r10,pc}
-
-oc_huff_token_decode_refill0
-	ADD r12,r1,#2          ; r12 = _tree+1
-oc_huff_token_decode_refill
-; We can't possibly need more than 15 bits, so available must be <= 15.
-; Therefore we can load at least two bytes without checking it.
-	CMP r2,r3              ; ptr<stop => HI
-	LDRHIB r14,[r3],#1     ;   r14 = *ptr++
-	RSBHI r5,r5,#24        ; (HI) available = 32-(available+=8)
-	RSBLS r5,r5,#32        ; (LS) r5 = 32-available
-	ORRHI r4,r14,LSL r5    ;   r4 = window|=r14<<32-available
-	CMPHI r2,r3            ;   ptr<stop => HI
-	LDRHIB r14,[r3],#1     ;     r14 = *ptr++
-	SUBHI r5,#8            ;     available += 8
-	; (HI) Stall...
-	ORRHI r4,r14,LSL r5    ;     r4 = window|=r14<<32-available
-; We can use unsigned compares for both the pointers and for available
-;  (allowing us to chain condition codes) because available will never be
-;  larger than 32 (or we wouldn't be here), and thus 32-available will never be
-;  negative.
-	CMPHI r2,r3            ;     ptr<stop => HI
-	CMPHI r5,#7            ;       available<=24 => HI
-	LDRHIB r14,[r3],#1     ;         r14 = *ptr++
-	SUBHI r5,#8            ;         available += 8
-	; (HI) Stall...
-	ORRHI r4,r14,LSL r5    ;         r4 = window|=r14<<32-available
-	CMP r2,r3              ; ptr<stop => HI
-	MOVLS r5,#-1<<30       ; (LS) available = OC_LOTS_OF_BITS+32
-	CMPHI r5,#7            ; (HI) available<=24 => HI
-	LDRHIB r14,[r3],#1     ; (HI)   r14 = *ptr++
-	SUBHI r5,#8            ; (HI)   available += 8
-	; (HI) Stall...
-	ORRHI r4,r14,LSL r5    ; (HI)   r4 = window|=r14<<32-available
-	RSB r14,r10,#32        ; r14 = 32-n
-	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
-	ADD r12,r12,r14        ;
-	LDRSH r12,[r12,r14]    ; r12 = node=_tree[node+1+bits]
-	RSB r5,r5,#32          ; r5 = available
-	; Stall...
-	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
-	BMI oc_huff_token_decode_continue
-	MOV r10,r14,LSR #8     ; r10 = n=node>>8
-	MOV r4,r4,LSL r10      ; r4 = window<<=n
-	SUB r5,r10             ; r5 = available-=n
-	STMIB r0,{r3-r5}       ; ptr = r3
-	                       ; window = r4
-	                       ; available = r5
-	AND r0,r14,#255        ; r0 = node&255
-	LDMFD r13!,{r4,r5,r10,pc}
-	ENDP
-
-	END
diff --git a/media/libtheora/lib/arm/armcpu.c b/media/libtheora/lib/arm/armcpu.c
deleted file mode 100644
index 8b0f9a857..000000000
--- a/media/libtheora/lib/arm/armcpu.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
- CPU capability detection for ARM processors.
-
- function:
-  last mod: $Id: cpu.c 17344 2010-07-21 01:42:18Z tterribe $
-
- ********************************************************************/
-
-#include "armcpu.h"
-
-#if !defined(OC_ARM_ASM)|| \
- !defined(OC_ARM_ASM_EDSP)&&!defined(OC_ARM_ASM_ARMV6)&& \
- !defined(OC_ARM_ASM_NEON)
-ogg_uint32_t oc_cpu_flags_get(void){
-  return 0;
-}
-
-#elif defined(_MSC_VER)
-/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
-# define WIN32_LEAN_AND_MEAN
-# define WIN32_EXTRA_LEAN
-# include <windows.h>
-
-ogg_uint32_t oc_cpu_flags_get(void){
-  ogg_uint32_t flags;
-  flags=0;
-  /*MSVC has no inline __asm support for ARM, but it does let you __emit
-     instructions via their assembled hex code.
-    All of these instructions should be essentially nops.*/
-# if defined(OC_ARM_ASM_EDSP)
-  __try{
-    /*PLD [r13]*/
-    __emit(0xF5DDF000);
-    flags|=OC_CPU_ARM_EDSP;
-  }
-  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
-    /*Ignore exception.*/
-  }
-#  if defined(OC_ARM_ASM_MEDIA)
-  __try{
-    /*SHADD8 r3,r3,r3*/
-    __emit(0xE6333F93);
-    flags|=OC_CPU_ARM_MEDIA;
-  }
-  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
-    /*Ignore exception.*/
-  }
-#   if defined(OC_ARM_ASM_NEON)
-  __try{
-    /*VORR q0,q0,q0*/
-    __emit(0xF2200150);
-    flags|=OC_CPU_ARM_NEON;
-  }
-  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
-    /*Ignore exception.*/
-  }
-#   endif
-#  endif
-# endif
-  return flags;
-}
-
-#elif defined(__linux__)
-# include <stdio.h>
-# include <stdlib.h>
-# include <string.h>
-
-ogg_uint32_t oc_cpu_flags_get(void){
-  ogg_uint32_t  flags;
-  FILE         *fin;
-  flags=0;
-  /*Reading /proc/self/auxv would be easier, but that doesn't work reliably on
-     Android.
-    This also means that detection will fail in Scratchbox.*/
-  fin=fopen("/proc/cpuinfo","r");
-  if(fin!=NULL){
-    /*512 should be enough for anybody (it's even enough for all the flags that
-       x86 has accumulated... so far).*/
-    char buf[512];
-    while(fgets(buf,511,fin)!=NULL){
-      if(memcmp(buf,"Features",8)==0){
-        char *p;
-        p=strstr(buf," edsp");
-        if(p!=NULL&&(p[5]==' '||p[5]=='\n'))flags|=OC_CPU_ARM_EDSP;
-        p=strstr(buf," neon");
-        if(p!=NULL&&(p[5]==' '||p[5]=='\n'))flags|=OC_CPU_ARM_NEON;
-      }
-      if(memcmp(buf,"CPU architecture:",17)==0){
-        int version;
-        version=atoi(buf+17);
-        if(version>=6)flags|=OC_CPU_ARM_MEDIA;
-      }
-    }
-    fclose(fin);
-  }
-  return flags;
-}
-
-#else
-/*The feature registers which can tell us what the processor supports are
-   accessible in priveleged modes only, so we can't have a general user-space
-   detection method like on x86.*/
-# error "Configured to use ARM asm but no CPU detection method available for " \
- "your platform.  Reconfigure with --disable-asm (or send patches)."
-#endif
diff --git a/media/libtheora/lib/arm/armcpu.h b/media/libtheora/lib/arm/armcpu.h
deleted file mode 100644
index 18dd95821..000000000
--- a/media/libtheora/lib/arm/armcpu.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
- function:
-    last mod: $Id: cpu.h 17344 2010-07-21 01:42:18Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_arm_armcpu_H)
-# define _arm_armcpu_H (1)
-#include "../internal.h"
-
-/*"Parallel instructions" from ARM v6 and above.*/
-#define OC_CPU_ARM_MEDIA    (1<<24)
-/*Flags chosen to match arch/arm/include/asm/hwcap.h in the Linux kernel.*/
-#define OC_CPU_ARM_EDSP     (1<<7)
-#define OC_CPU_ARM_NEON     (1<<12)
-
-ogg_uint32_t oc_cpu_flags_get(void);
-
-#endif
diff --git a/media/libtheora/lib/arm/armfrag.s b/media/libtheora/lib/arm/armfrag.s
deleted file mode 100644
index e20579eee..000000000
--- a/media/libtheora/lib/arm/armfrag.s
+++ /dev/null
@@ -1,662 +0,0 @@
-;********************************************************************
-;*                                                                  *
-;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
-;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
-;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
-;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
-;*                                                                  *
-;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
-;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
-;*                                                                  *
-;********************************************************************
-; Original implementation:
-;  Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd
-; last mod: $Id: armfrag.s 17481 2010-10-03 22:49:42Z tterribe $
-;********************************************************************
-
-	AREA	|.text|, CODE, READONLY
-
-	; Explicitly specifying alignment here because some versions of
-	; gas don't align code correctly. See
-	; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
-	; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
-	ALIGN
-
-	GET	armopts.s
-
-; Vanilla ARM v4 versions
-	EXPORT	oc_frag_copy_list_arm
-	EXPORT	oc_frag_recon_intra_arm
-	EXPORT	oc_frag_recon_inter_arm
-	EXPORT	oc_frag_recon_inter2_arm
-
-oc_frag_copy_list_arm PROC
-	; r0 = _dst_frame
-	; r1 = _src_frame
-	; r2 = _ystride
-	; r3 = _fragis
-	; <> = _nfragis
-	; <> = _frag_buf_offs
-	LDR	r12,[r13]		; r12 = _nfragis
-	STMFD	r13!,{r4-r6,r11,r14}
-	SUBS	r12, r12, #1
-	LDR	r4,[r3],#4		; r4 = _fragis[fragii]
-	LDRGE	r14,[r13,#4*6]		; r14 = _frag_buf_offs
-	BLT	ofcl_arm_end
-	SUB	r2, r2, #4
-ofcl_arm_lp
-	LDR	r11,[r14,r4,LSL #2]	; r11 = _frag_buf_offs[_fragis[fragii]]
-	SUBS	r12, r12, #1
-	; Stall (on XScale)
-	ADD	r4, r1, r11		; r4 = _src_frame+frag_buf_off
-	LDR	r6, [r4], #4
-	ADD	r11,r0, r11		; r11 = _dst_frame+frag_buf_off
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4], r2
-	STR	r6, [r11],#4
-	LDR	r6, [r4], #4
-	STR	r5, [r11],r2
-	LDR	r5, [r4]
-	LDRGE	r4,[r3],#4		; r4 = _fragis[fragii]
-	STR	r6, [r11],#4
-	STR	r5, [r11]
-	BGE	ofcl_arm_lp
-ofcl_arm_end
-	LDMFD	r13!,{r4-r6,r11,PC}
-oc_frag_recon_intra_arm
-	; r0 =       unsigned char *_dst
-	; r1 =       int            _ystride
-	; r2 = const ogg_int16_t    _residue[64]
-	STMFD	r13!,{r4,r5,r14}
-	MOV	r14,#8
-	MOV	r5, #255
-	SUB	r1, r1, #7
-ofrintra_lp_arm
-	LDRSH	r3, [r2], #2
-	LDRSH	r4, [r2], #2
-	LDRSH	r12,[r2], #2
-	ADDS	r3, r3, #128
-	CMPGT	r5, r3
-	EORLT	r3, r5, r3, ASR #32
-	STRB	r3, [r0], #1
-	ADDS	r4, r4, #128
-	CMPGT	r5, r4
-	EORLT	r4, r5, r4, ASR #32
-	LDRSH	r3, [r2], #2
-	STRB	r4, [r0], #1
-	ADDS	r12,r12,#128
-	CMPGT	r5, r12
-	EORLT	r12,r5, r12,ASR #32
-	LDRSH	r4, [r2], #2
-	STRB	r12,[r0], #1
-	ADDS	r3, r3, #128
-	CMPGT	r5, r3
-	EORLT	r3, r5, r3, ASR #32
-	LDRSH	r12,[r2], #2
-	STRB	r3, [r0], #1
-	ADDS	r4, r4, #128
-	CMPGT	r5, r4
-	EORLT	r4, r5, r4, ASR #32
-	LDRSH	r3, [r2], #2
-	STRB	r4, [r0], #1
-	ADDS	r12,r12,#128
-	CMPGT	r5, r12
-	EORLT	r12,r5, r12,ASR #32
-	LDRSH	r4, [r2], #2
-	STRB	r12,[r0], #1
-	ADDS	r3, r3, #128
-	CMPGT	r5, r3
-	EORLT	r3, r5, r3, ASR #32
-	STRB	r3, [r0], #1
-	ADDS	r4, r4, #128
-	CMPGT	r5, r4
-	EORLT	r4, r5, r4, ASR #32
-	STRB	r4, [r0], r1
-	SUBS	r14,r14,#1
-	BGT	ofrintra_lp_arm
-	LDMFD	r13!,{r4,r5,PC}
-	ENDP
-
-oc_frag_recon_inter_arm PROC
-	; r0 =       unsigned char *dst
-	; r1 = const unsigned char *src
-	; r2 =       int            ystride
-	; r3 = const ogg_int16_t    residue[64]
-	STMFD	r13!,{r5,r9-r11,r14}
-	MOV	r9, #8
-	MOV	r5, #255
-	SUB	r2, r2, #7
-ofrinter_lp_arm
-	LDRSH	r12,[r3], #2
-	LDRB	r14,[r1], #1
-	LDRSH	r11,[r3], #2
-	LDRB	r10,[r1], #1
-	ADDS	r12,r12,r14
-	CMPGT	r5, r12
-	EORLT	r12,r5, r12,ASR #32
-	STRB	r12,[r0], #1
-	ADDS	r11,r11,r10
-	CMPGT	r5, r11
-	LDRSH	r12,[r3], #2
-	LDRB	r14,[r1], #1
-	EORLT	r11,r5, r11,ASR #32
-	STRB	r11,[r0], #1
-	ADDS	r12,r12,r14
-	CMPGT	r5, r12
-	LDRSH	r11,[r3], #2
-	LDRB	r10,[r1], #1
-	EORLT	r12,r5, r12,ASR #32
-	STRB	r12,[r0], #1
-	ADDS	r11,r11,r10
-	CMPGT	r5, r11
-	LDRSH	r12,[r3], #2
-	LDRB	r14,[r1], #1
-	EORLT	r11,r5, r11,ASR #32
-	STRB	r11,[r0], #1
-	ADDS	r12,r12,r14
-	CMPGT	r5, r12
-	LDRSH	r11,[r3], #2
-	LDRB	r10,[r1], #1
-	EORLT	r12,r5, r12,ASR #32
-	STRB	r12,[r0], #1
-	ADDS	r11,r11,r10
-	CMPGT	r5, r11
-	LDRSH	r12,[r3], #2
-	LDRB	r14,[r1], #1
-	EORLT	r11,r5, r11,ASR #32
-	STRB	r11,[r0], #1
-	ADDS	r12,r12,r14
-	CMPGT	r5, r12
-	LDRSH	r11,[r3], #2
-	LDRB	r10,[r1], r2
-	EORLT	r12,r5, r12,ASR #32
-	STRB	r12,[r0], #1
-	ADDS	r11,r11,r10
-	CMPGT	r5, r11
-	EORLT	r11,r5, r11,ASR #32
-	STRB	r11,[r0], r2
-	SUBS	r9, r9, #1
-	BGT	ofrinter_lp_arm
-	LDMFD	r13!,{r5,r9-r11,PC}
-	ENDP
-
-oc_frag_recon_inter2_arm PROC
-	; r0 =       unsigned char *dst
-	; r1 = const unsigned char *src1
-	; r2 = const unsigned char *src2
-	; r3 =       int            ystride
-	LDR	r12,[r13]
-	; r12= const ogg_int16_t    residue[64]
-	STMFD	r13!,{r4-r8,r14}
-	MOV	r14,#8
-	MOV	r8, #255
-	SUB	r3, r3, #7
-ofrinter2_lp_arm
-	LDRB	r5, [r1], #1
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	LDRB	r7, [r1], #1
-	ADD	r5, r5, r6
-	ADDS	r5, r4, r5, LSR #1
-	CMPGT	r8, r5
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	EORLT	r5, r8, r5, ASR #32
-	STRB	r5, [r0], #1
-	ADD	r7, r7, r6
-	ADDS	r7, r4, r7, LSR #1
-	CMPGT	r8, r7
-	LDRB	r5, [r1], #1
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	EORLT	r7, r8, r7, ASR #32
-	STRB	r7, [r0], #1
-	ADD	r5, r5, r6
-	ADDS	r5, r4, r5, LSR #1
-	CMPGT	r8, r5
-	LDRB	r7, [r1], #1
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	EORLT	r5, r8, r5, ASR #32
-	STRB	r5, [r0], #1
-	ADD	r7, r7, r6
-	ADDS	r7, r4, r7, LSR #1
-	CMPGT	r8, r7
-	LDRB	r5, [r1], #1
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	EORLT	r7, r8, r7, ASR #32
-	STRB	r7, [r0], #1
-	ADD	r5, r5, r6
-	ADDS	r5, r4, r5, LSR #1
-	CMPGT	r8, r5
-	LDRB	r7, [r1], #1
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	EORLT	r5, r8, r5, ASR #32
-	STRB	r5, [r0], #1
-	ADD	r7, r7, r6
-	ADDS	r7, r4, r7, LSR #1
-	CMPGT	r8, r7
-	LDRB	r5, [r1], #1
-	LDRB	r6, [r2], #1
-	LDRSH	r4, [r12],#2
-	EORLT	r7, r8, r7, ASR #32
-	STRB	r7, [r0], #1
-	ADD	r5, r5, r6
-	ADDS	r5, r4, r5, LSR #1
-	CMPGT	r8, r5
-	LDRB	r7, [r1], r3
-	LDRB	r6, [r2], r3
-	LDRSH	r4, [r12],#2
-	EORLT	r5, r8, r5, ASR #32
-	STRB	r5, [r0], #1
-	ADD	r7, r7, r6
-	ADDS	r7, r4, r7, LSR #1
-	CMPGT	r8, r7
-	EORLT	r7, r8, r7, ASR #32
-	STRB	r7, [r0], r3
-	SUBS	r14,r14,#1
-	BGT	ofrinter2_lp_arm
-	LDMFD	r13!,{r4-r8,PC}
-	ENDP
-
- [ OC_ARM_ASM_EDSP
-	EXPORT	oc_frag_copy_list_edsp
-
-oc_frag_copy_list_edsp PROC
-	; r0 = _dst_frame
-	; r1 = _src_frame
-	; r2 = _ystride
-	; r3 = _fragis
-	; <> = _nfragis
-	; <> = _frag_buf_offs
-	LDR	r12,[r13]		; r12 = _nfragis
-	STMFD	r13!,{r4-r11,r14}
-	SUBS	r12, r12, #1
-	LDRGE	r5, [r3],#4		; r5 = _fragis[fragii]
-	LDRGE	r14,[r13,#4*10]		; r14 = _frag_buf_offs
-	BLT	ofcl_edsp_end
-ofcl_edsp_lp
-	MOV	r4, r1
-	LDR	r5, [r14,r5, LSL #2]	; r5 = _frag_buf_offs[_fragis[fragii]]
-	SUBS	r12, r12, #1
-	; Stall (on XScale)
-	LDRD	r6, [r4, r5]!		; r4 = _src_frame+frag_buf_off
-	LDRD	r8, [r4, r2]!
-	; Stall
-	STRD	r6, [r5, r0]!		; r5 = _dst_frame+frag_buf_off
-	STRD	r8, [r5, r2]!
-	; Stall
-	LDRD	r6, [r4, r2]!	; On Xscale at least, doing 3 consecutive
-	LDRD	r8, [r4, r2]!	; loads causes a stall, but that's no worse
-	LDRD	r10,[r4, r2]!	; than us only doing 2, and having to do
-				; another pair of LDRD/STRD later on.
-	; Stall
-	STRD	r6, [r5, r2]!
-	STRD	r8, [r5, r2]!
-	STRD	r10,[r5, r2]!
-	LDRD	r6, [r4, r2]!
-	LDRD	r8, [r4, r2]!
-	LDRD	r10,[r4, r2]!
-	STRD	r6, [r5, r2]!
-	STRD	r8, [r5, r2]!
-	STRD	r10,[r5, r2]!
-	LDRGE	r5, [r3],#4		; r5 = _fragis[fragii]
-	BGE	ofcl_edsp_lp
-ofcl_edsp_end
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
- ]
-
- [ OC_ARM_ASM_MEDIA
-	EXPORT	oc_frag_recon_intra_v6
-	EXPORT	oc_frag_recon_inter_v6
-	EXPORT	oc_frag_recon_inter2_v6
-
-oc_frag_recon_intra_v6 PROC
-	; r0 =       unsigned char *_dst
-	; r1 =       int            _ystride
-	; r2 = const ogg_int16_t    _residue[64]
-	STMFD	r13!,{r4-r6,r14}
-	MOV	r14,#8
-	MOV	r12,r2
-	LDR	r6, =0x00800080
-ofrintra_v6_lp
-	LDRD	r2, [r12],#8	; r2 = 11110000 r3 = 33332222
-	LDRD	r4, [r12],#8	; r4 = 55554444 r5 = 77776666
-	SUBS	r14,r14,#1
-	QADD16	r2, r2, r6
-	QADD16	r3, r3, r6
-	QADD16	r4, r4, r6
-	QADD16	r5, r5, r6
-	USAT16	r2, #8, r2		; r2 = __11__00
-	USAT16	r3, #8, r3		; r3 = __33__22
-	USAT16	r4, #8, r4		; r4 = __55__44
-	USAT16	r5, #8, r5		; r5 = __77__66
-	ORR	r2, r2, r2, LSR #8	; r2 = __111100
-	ORR	r3, r3, r3, LSR #8	; r3 = __333322
-	ORR	r4, r4, r4, LSR #8	; r4 = __555544
-	ORR	r5, r5, r5, LSR #8	; r5 = __777766
-	PKHBT   r2, r2, r3, LSL #16     ; r2 = 33221100
-	PKHBT   r3, r4, r5, LSL #16     ; r3 = 77665544
-	STRD	r2, [r0], r1
-	BGT	ofrintra_v6_lp
-	LDMFD	r13!,{r4-r6,PC}
-	ENDP
-
-oc_frag_recon_inter_v6 PROC
-	; r0 =       unsigned char *_dst
-	; r1 = const unsigned char *_src
-	; r2 =       int            _ystride
-	; r3 = const ogg_int16_t    _residue[64]
-	STMFD	r13!,{r4-r7,r14}
-	MOV	r14,#8
-ofrinter_v6_lp
-	LDRD	r6, [r3], #8		; r6 = 11110000 r7 = 33332222
-	SUBS	r14,r14,#1
- [ OC_ARM_CAN_UNALIGN_LDRD
-	LDRD	r4, [r1], r2	; Unaligned ; r4 = 33221100 r5 = 77665544
- |
-	LDR	r5, [r1, #4]
-	LDR	r4, [r1], r2
- ]
-	PKHBT	r12,r6, r7, LSL #16	; r12= 22220000
-	PKHTB	r7, r7, r6, ASR #16	; r7 = 33331111
-	UXTB16	r6,r4			; r6 = __22__00
-	UXTB16	r4,r4, ROR #8		; r4 = __33__11
-	QADD16	r12,r12,r6		; r12= xx22xx00
-	QADD16	r4, r7, r4		; r4 = xx33xx11
-	LDRD	r6, [r3], #8		; r6 = 55554444 r7 = 77776666
-	USAT16	r4, #8, r4		; r4 = __33__11
-	USAT16	r12,#8,r12		; r12= __22__00
-	ORR	r4, r12,r4, LSL #8	; r4 = 33221100
-	PKHBT	r12,r6, r7, LSL #16	; r12= 66664444
-	PKHTB	r7, r7, r6, ASR #16	; r7 = 77775555
-	UXTB16	r6,r5			; r6 = __66__44
-	UXTB16	r5,r5, ROR #8		; r5 = __77__55
-	QADD16	r12,r12,r6		; r12= xx66xx44
-	QADD16	r5, r7, r5		; r5 = xx77xx55
-	USAT16	r12,#8, r12		; r12= __66__44
-	USAT16	r5, #8, r5		; r4 = __77__55
-	ORR	r5, r12,r5, LSL #8	; r5 = 33221100
-	STRD	r4, [r0], r2
-	BGT	ofrinter_v6_lp
-	LDMFD	r13!,{r4-r7,PC}
-	ENDP
-
-oc_frag_recon_inter2_v6 PROC
-	; r0 =       unsigned char *_dst
-	; r1 = const unsigned char *_src1
-	; r2 = const unsigned char *_src2
-	; r3 =       int            _ystride
-	LDR	r12,[r13]
-	; r12= const ogg_int16_t    _residue[64]
-	STMFD	r13!,{r4-r9,r14}
-	MOV	r14,#8
-ofrinter2_v6_lp
-	LDRD	r6, [r12,#8]	; r6 = 55554444 r7 = 77776666
-	SUBS	r14,r14,#1
-	LDR	r4, [r1, #4]	; Unaligned	; r4 = src1[1] = 77665544
-	LDR	r5, [r2, #4]	; Unaligned	; r5 = src2[1] = 77665544
-	PKHBT	r8, r6, r7, LSL #16	; r8 = 66664444
-	PKHTB	r9, r7, r6, ASR #16	; r9 = 77775555
-	UHADD8	r4, r4, r5	; r4 = (src1[7,6,5,4] + src2[7,6,5,4])>>1
-	UXTB16	r5, r4			; r5 = __66__44
-	UXTB16	r4, r4, ROR #8		; r4 = __77__55
-	QADD16	r8, r8, r5		; r8 = xx66xx44
-	QADD16	r9, r9, r4		; r9 = xx77xx55
-	LDRD	r6,[r12],#16	; r6 = 33332222 r7 = 11110000
-	USAT16	r8, #8, r8		; r8 = __66__44
-	LDR	r4, [r1], r3	; Unaligned	; r4 = src1[0] = 33221100
-	USAT16	r9, #8, r9		; r9 = __77__55
-	LDR	r5, [r2], r3	; Unaligned	; r5 = src2[0] = 33221100
-	ORR	r9, r8, r9, LSL #8	; r9 = 77665544
-	PKHBT	r8, r6, r7, LSL #16	; r8 = 22220000
-	UHADD8	r4, r4, r5	; r4 = (src1[3,2,1,0] + src2[3,2,1,0])>>1
-	PKHTB	r7, r7, r6, ASR #16	; r7 = 33331111
-	UXTB16	r5, r4			; r5 = __22__00
-	UXTB16	r4, r4, ROR #8		; r4 = __33__11
-	QADD16	r8, r8, r5		; r8 = xx22xx00
-	QADD16	r7, r7, r4		; r7 = xx33xx11
-	USAT16	r8, #8, r8		; r8 = __22__00
-	USAT16	r7, #8, r7		; r7 = __33__11
-	ORR	r8, r8, r7, LSL #8	; r8 = 33221100
-	STRD	r8, [r0], r3
-	BGT	ofrinter2_v6_lp
-	LDMFD	r13!,{r4-r9,PC}
-	ENDP
- ]
-
- [ OC_ARM_ASM_NEON
-	EXPORT	oc_frag_copy_list_neon
-	EXPORT	oc_frag_recon_intra_neon
-	EXPORT	oc_frag_recon_inter_neon
-	EXPORT	oc_frag_recon_inter2_neon
-
-oc_frag_copy_list_neon PROC
-	; r0 = _dst_frame
-	; r1 = _src_frame
-	; r2 = _ystride
-	; r3 = _fragis
-	; <> = _nfragis
-	; <> = _frag_buf_offs
-	LDR	r12,[r13]		; r12 = _nfragis
-	STMFD	r13!,{r4-r7,r14}
-	CMP	r12, #1
-	LDRGE	r6, [r3]		; r6 = _fragis[fragii]
-	LDRGE	r14,[r13,#4*6]		; r14 = _frag_buf_offs
-	BLT	ofcl_neon_end
-	; Stall (2 on Xscale)
-	LDR	r6, [r14,r6, LSL #2]	; r6 = _frag_buf_offs[_fragis[fragii]]
-	; Stall (on XScale)
-	MOV	r7, r6			; Guarantee PLD points somewhere valid.
-ofcl_neon_lp
-	ADD	r4, r1, r6
-	VLD1.64	{D0}, [r4@64], r2
-	ADD	r5, r0, r6
-	VLD1.64	{D1}, [r4@64], r2
-	SUBS	r12, r12, #1
-	VLD1.64	{D2}, [r4@64], r2
-	LDRGT	r6, [r3,#4]!		; r6 = _fragis[fragii]
-	VLD1.64	{D3}, [r4@64], r2
-	LDRGT	r6, [r14,r6, LSL #2]	; r6 = _frag_buf_offs[_fragis[fragii]]
-	VLD1.64	{D4}, [r4@64], r2
-	ADDGT	r7, r1, r6
-	VLD1.64	{D5}, [r4@64], r2
-	PLD	[r7]
-	VLD1.64	{D6}, [r4@64], r2
-	PLD	[r7, r2]
-	VLD1.64	{D7}, [r4@64]
-	PLD	[r7, r2, LSL #1]
-	VST1.64	{D0}, [r5@64], r2
-	ADDGT	r7, r7, r2, LSL #2
-	VST1.64	{D1}, [r5@64], r2
-	PLD	[r7, -r2]
-	VST1.64	{D2}, [r5@64], r2
-	PLD	[r7]
-	VST1.64	{D3}, [r5@64], r2
-	PLD	[r7, r2]
-	VST1.64	{D4}, [r5@64], r2
-	PLD	[r7, r2, LSL #1]
-	VST1.64	{D5}, [r5@64], r2
-	ADDGT	r7, r7, r2, LSL #2
-	VST1.64	{D6}, [r5@64], r2
-	PLD	[r7, -r2]
-	VST1.64	{D7}, [r5@64]
-	BGT	ofcl_neon_lp
-ofcl_neon_end
-	LDMFD	r13!,{r4-r7,PC}
-	ENDP
-
-oc_frag_recon_intra_neon PROC
-	; r0 =       unsigned char *_dst
-	; r1 =       int            _ystride
-	; r2 = const ogg_int16_t    _residue[64]
-	MOV	r3, #128
-	VDUP.S16	Q0, r3
-	VLDMIA	r2,  {D16-D31}	; D16= 3333222211110000 etc	; 9(8) cycles
-	VQADD.S16	Q8, Q8, Q0
-	VQADD.S16	Q9, Q9, Q0
-	VQADD.S16	Q10,Q10,Q0
-	VQADD.S16	Q11,Q11,Q0
-	VQADD.S16	Q12,Q12,Q0
-	VQADD.S16	Q13,Q13,Q0
-	VQADD.S16	Q14,Q14,Q0
-	VQADD.S16	Q15,Q15,Q0
-	VQMOVUN.S16	D16,Q8	; D16= 7766554433221100		; 1 cycle
-	VQMOVUN.S16	D17,Q9	; D17= FFEEDDCCBBAA9988		; 1 cycle
-	VQMOVUN.S16	D18,Q10	; D18= NNMMLLKKJJIIHHGG		; 1 cycle
-	VST1.64	{D16},[r0@64], r1
-	VQMOVUN.S16	D19,Q11	; D19= VVUUTTSSRRQQPPOO		; 1 cycle
-	VST1.64	{D17},[r0@64], r1
-	VQMOVUN.S16	D20,Q12	; D20= ddccbbaaZZYYXXWW		; 1 cycle
-	VST1.64	{D18},[r0@64], r1
-	VQMOVUN.S16	D21,Q13	; D21= llkkjjiihhggffee		; 1 cycle
-	VST1.64	{D19},[r0@64], r1
-	VQMOVUN.S16	D22,Q14	; D22= ttssrrqqppoonnmm		; 1 cycle
-	VST1.64	{D20},[r0@64], r1
-	VQMOVUN.S16	D23,Q15	; D23= !!@@zzyyxxwwvvuu		; 1 cycle
-	VST1.64	{D21},[r0@64], r1
-	VST1.64	{D22},[r0@64], r1
-	VST1.64	{D23},[r0@64], r1
-	MOV	PC,R14
-	ENDP
-
-oc_frag_recon_inter_neon PROC
-	; r0 =       unsigned char *_dst
-	; r1 = const unsigned char *_src
-	; r2 =       int            _ystride
-	; r3 = const ogg_int16_t    _residue[64]
-	VLDMIA	r3, {D16-D31}	; D16= 3333222211110000 etc	; 9(8) cycles
-	VLD1.64	{D0}, [r1], r2
-	VLD1.64	{D2}, [r1], r2
-	VMOVL.U8	Q0, D0	; Q0 = __77__66__55__44__33__22__11__00
-	VLD1.64	{D4}, [r1], r2
-	VMOVL.U8	Q1, D2	; etc
-	VLD1.64	{D6}, [r1], r2
-	VMOVL.U8	Q2, D4
-	VMOVL.U8	Q3, D6
-	VQADD.S16	Q8, Q8, Q0
-	VLD1.64	{D0}, [r1], r2
-	VQADD.S16	Q9, Q9, Q1
-	VLD1.64	{D2}, [r1], r2
-	VQADD.S16	Q10,Q10,Q2
-	VLD1.64	{D4}, [r1], r2
-	VQADD.S16	Q11,Q11,Q3
-	VLD1.64	{D6}, [r1], r2
-	VMOVL.U8	Q0, D0
-	VMOVL.U8	Q1, D2
-	VMOVL.U8	Q2, D4
-	VMOVL.U8	Q3, D6
-	VQADD.S16	Q12,Q12,Q0
-	VQADD.S16	Q13,Q13,Q1
-	VQADD.S16	Q14,Q14,Q2
-	VQADD.S16	Q15,Q15,Q3
-	VQMOVUN.S16	D16,Q8
-	VQMOVUN.S16	D17,Q9
-	VQMOVUN.S16	D18,Q10
-	VST1.64	{D16},[r0@64], r2
-	VQMOVUN.S16	D19,Q11
-	VST1.64	{D17},[r0@64], r2
-	VQMOVUN.S16	D20,Q12
-	VST1.64	{D18},[r0@64], r2
-	VQMOVUN.S16	D21,Q13
-	VST1.64	{D19},[r0@64], r2
-	VQMOVUN.S16	D22,Q14
-	VST1.64	{D20},[r0@64], r2
-	VQMOVUN.S16	D23,Q15
-	VST1.64	{D21},[r0@64], r2
-	VST1.64	{D22},[r0@64], r2
-	VST1.64	{D23},[r0@64], r2
-	MOV	PC,R14
-	ENDP
-
-oc_frag_recon_inter2_neon PROC
-	; r0 =       unsigned char *_dst
-	; r1 = const unsigned char *_src1
-	; r2 = const unsigned char *_src2
-	; r3 =       int            _ystride
-	LDR	r12,[r13]
-	; r12= const ogg_int16_t    _residue[64]
-	VLDMIA	r12,{D16-D31}
-	VLD1.64	{D0}, [r1], r3
-	VLD1.64	{D4}, [r2], r3
-	VLD1.64	{D1}, [r1], r3
-	VLD1.64	{D5}, [r2], r3
-	VHADD.U8	Q2, Q0, Q2	; Q2 = FFEEDDCCBBAA99887766554433221100
-	VLD1.64	{D2}, [r1], r3
-	VLD1.64	{D6}, [r2], r3
-	VMOVL.U8	Q0, D4		; Q0 = __77__66__55__44__33__22__11__00
-	VLD1.64	{D3}, [r1], r3
-	VMOVL.U8	Q2, D5		; etc
-	VLD1.64	{D7}, [r2], r3
-	VHADD.U8	Q3, Q1, Q3
-	VQADD.S16	Q8, Q8, Q0
-	VQADD.S16	Q9, Q9, Q2
-	VLD1.64	{D0}, [r1], r3
-	VMOVL.U8	Q1, D6
-	VLD1.64	{D4}, [r2], r3
-	VMOVL.U8	Q3, D7
-	VLD1.64	{D1}, [r1], r3
-	VQADD.S16	Q10,Q10,Q1
-	VLD1.64	{D5}, [r2], r3
-	VQADD.S16	Q11,Q11,Q3
-	VLD1.64	{D2}, [r1], r3
-	VHADD.U8	Q2, Q0, Q2
-	VLD1.64	{D6}, [r2], r3
-	VLD1.64	{D3}, [r1], r3
-	VMOVL.U8	Q0, D4
-	VLD1.64	{D7}, [r2], r3
-	VMOVL.U8	Q2, D5
-	VHADD.U8	Q3, Q1, Q3
-	VQADD.S16	Q12,Q12,Q0
-	VQADD.S16	Q13,Q13,Q2
-	VMOVL.U8	Q1, D6
-	VMOVL.U8	Q3, D7
-	VQADD.S16	Q14,Q14,Q1
-	VQADD.S16	Q15,Q15,Q3
-	VQMOVUN.S16	D16,Q8
-	VQMOVUN.S16	D17,Q9
-	VQMOVUN.S16	D18,Q10
-	VST1.64	{D16},[r0@64], r3
-	VQMOVUN.S16	D19,Q11
-	VST1.64	{D17},[r0@64], r3
-	VQMOVUN.S16	D20,Q12
-	VST1.64	{D18},[r0@64], r3
-	VQMOVUN.S16	D21,Q13
-	VST1.64	{D19},[r0@64], r3
-	VQMOVUN.S16	D22,Q14
-	VST1.64	{D20},[r0@64], r3
-	VQMOVUN.S16	D23,Q15
-	VST1.64	{D21},[r0@64], r3
-	VST1.64	{D22},[r0@64], r3
-	VST1.64	{D23},[r0@64], r3
-	MOV	PC,R14
-	ENDP
- ]
-
-	END
diff --git a/media/libtheora/lib/arm/armidct.s b/media/libtheora/lib/arm/armidct.s
deleted file mode 100644
index babd846ec..000000000
--- a/media/libtheora/lib/arm/armidct.s
+++ /dev/null
@@ -1,1914 +0,0 @@
-;********************************************************************
-;*                                                                  *
-;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
-;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
-;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
-;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
-;*                                                                  *
-;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
-;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
-;*                                                                  *
-;********************************************************************
-; Original implementation:
-;  Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd
-; last mod: $Id: armidct.s 17481 2010-10-03 22:49:42Z tterribe $
-;********************************************************************
-
-	AREA	|.text|, CODE, READONLY
-
-	; Explicitly specifying alignment here because some versions of
-	; gas don't align code correctly. See
-	; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
-	; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
-	ALIGN
-
-	GET	armopts.s
-
-	EXPORT	oc_idct8x8_1_arm
-	EXPORT	oc_idct8x8_arm
-
-oc_idct8x8_1_arm PROC
-	; r0 = ogg_int16_t  *_y
-	; r1 = ogg_uint16_t  _dc
-	ORR	r1, r1, r1, LSL #16
-	MOV	r2, r1
-	MOV	r3, r1
-	MOV	r12,r1
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	STMIA	r0!,{r1,r2,r3,r12}
-	MOV	PC, r14
-	ENDP
-
-oc_idct8x8_arm PROC
-	; r0 = ogg_int16_t *_y
-	; r1 = ogg_int16_t *_x
-	; r2 = int          _last_zzi
-	CMP	r2, #3
-	BLE	oc_idct8x8_3_arm
-	CMP	r2, #6
-	BLE	oc_idct8x8_6_arm
-	CMP	r2, #10
-	BLE	oc_idct8x8_10_arm
-oc_idct8x8_slow_arm
-	STMFD	r13!,{r4-r11,r14}
-	SUB	r13,r13,#64*2
-; Row transforms
-	STR	r0, [r13,#-4]!
-	ADD	r0, r13, #4	; Write to temp storage.
-	BL	idct8core_arm
-	BL	idct8core_arm
-	BL	idct8core_arm
-	BL	idct8core_arm
-	BL	idct8core_arm
-	BL	idct8core_arm
-	BL	idct8core_arm
-	BL	idct8core_arm
-	LDR	r0, [r13], #4	; Write to the final destination.
-	; Clear input data for next block (decoder only).
-	SUB	r2, r1, #8*16
-	CMP	r0, r2
-	MOV	r1, r13		; And read from temp storage.
-	BEQ	oc_idct8x8_slow_arm_cols
-	MOV	r4, #0
-	MOV	r5, #0
-	MOV	r6, #0
-	MOV	r7, #0
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-	STMIA	r2!,{r4,r5,r6,r7}
-oc_idct8x8_slow_arm_cols
-; Column transforms
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	BL	idct8core_down_arm
-	ADD	r13,r13,#64*2
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
-
-oc_idct8x8_10_arm PROC
-	STMFD	r13!,{r4-r11,r14}
-	SUB	r13,r13,#64*2
-; Row transforms
-	MOV	r2, r0
-	MOV	r0, r13		; Write to temp storage.
-	BL	idct4core_arm
-	BL	idct3core_arm
-	BL	idct2core_arm
-	BL	idct1core_arm
-	; Clear input data for next block (decoder only).
-	SUB	r0, r1, #4*16
-	CMP	r0, r2
-	MOV	r1, r13		; Read from temp storage.
-	BEQ	oc_idct8x8_10_arm_cols
-	MOV	r4, #0
-	STR	r4, [r0]
-	STR	r4, [r0,#4]
-	STR	r4, [r0,#16]
-	STR	r4, [r0,#20]
-	STR	r4, [r0,#32]
-	STR	r4, [r0,#48]
-	MOV	r0, r2		; Write to the final destination
-oc_idct8x8_10_arm_cols
-; Column transforms
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	BL	idct4core_down_arm
-	ADD	r13,r13,#64*2
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
-
-oc_idct8x8_6_arm PROC
-	STMFD	r13!,{r4-r7,r9-r11,r14}
-	SUB	r13,r13,#64*2
-; Row transforms
-	MOV	r2, r0
-	MOV	r0, r13		; Write to temp storage.
-	BL	idct3core_arm
-	BL	idct2core_arm
-	BL	idct1core_arm
-	; Clear input data for next block (decoder only).
-	SUB	r0, r1, #3*16
-	CMP	r0, r2
-	MOV	r1, r13		; Read from temp storage.
-	BEQ	oc_idct8x8_6_arm_cols
-	MOV	r4, #0
-	STR	r4, [r0]
-	STR	r4, [r0,#4]
-	STR	r4, [r0,#16]
-	STR	r4, [r0,#32]
-	MOV	r0, r2		; Write to the final destination
-oc_idct8x8_6_arm_cols
-; Column transforms
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	BL	idct3core_down_arm
-	ADD	r13,r13,#64*2
-	LDMFD	r13!,{r4-r7,r9-r11,PC}
-	ENDP
-
-oc_idct8x8_3_arm PROC
-	STMFD	r13!,{r4-r7,r9-r11,r14}
-	SUB	r13,r13,#64*2
-; Row transforms
-	MOV	r2, r0
-	MOV	r0, r13		; Write to temp storage.
-	BL	idct2core_arm
-	BL	idct1core_arm
-	; Clear input data for next block (decoder only).
-	SUB	r0, r1, #2*16
-	CMP	r0, r2
-	MOV	r1, r13		; Read from temp storage.
-	MOVNE	r4, #0
-	STRNE	r4, [r0]
-	STRNE	r4, [r0,#16]
-	MOVNE	r0, r2		; Write to the final destination
-; Column transforms
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	BL	idct2core_down_arm
-	ADD	r13,r13,#64*2
-	LDMFD	r13!,{r4-r7,r9-r11,PC}
-	ENDP
-
-idct1core_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r3, [r1], #16
-	MOV	r12,#0x05
-	ORR	r12,r12,#0xB500
-	MUL	r3, r12, r3
-	; Stall ?
-	MOV	r3, r3, ASR #16
-	STRH	r3, [r0], #2
-	STRH	r3, [r0, #14]
-	STRH	r3, [r0, #30]
-	STRH	r3, [r0, #46]
-	STRH	r3, [r0, #62]
-	STRH	r3, [r0, #78]
-	STRH	r3, [r0, #94]
-	STRH	r3, [r0, #110]
-	MOV	PC,R14
-	ENDP
-
-idct2core_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r9, [r1], #16		; r9 = x[0]
-	LDR	r12,OC_C4S4
-	LDRSH	r11,[r1, #-14]		; r11= x[1]
-	LDR	r3, OC_C7S1
-	MUL	r9, r12,r9		; r9 = t[0]<<16 = OC_C4S4*x[0]
-	LDR	r10,OC_C1S7
-	MUL	r3, r11,r3		; r3 = t[4]<<16 = OC_C7S1*x[1]
-	MOV	r9, r9, ASR #16		; r9 = t[0]
-	MUL	r11,r10,r11		; r11= t[7]<<16 = OC_C1S7*x[1]
-	MOV	r3, r3, ASR #16		; r3 = t[4]
-	MUL	r10,r12,r3		; r10= t[5]<<16 = OC_C4S4*t[4]
-	MOV	r11,r11,ASR #16		; r11= t[7]
-	MUL	r12,r11,r12		; r12= t[6]<<16 = OC_C4S4*t[7]
-	MOV	r10,r10,ASR #16		; r10= t[5]
-	ADD	r12,r9,r12,ASR #16	; r12= t[0]+t[6]
-	ADD	r12,r12,r10		; r12= t[0]+t2[6] = t[0]+t[6]+t[5]
-	SUB	r10,r12,r10,LSL #1	; r10= t[0]+t2[5] = t[0]+t[6]-t[5]
-	ADD	r3, r3, r9		; r3 = t[0]+t[4]
-	ADD	r11,r11,r9		; r11= t[0]+t[7]
-	STRH	r11,[r0], #2		; y[0] = t[0]+t[7]
-	STRH	r12,[r0, #14]		; y[1] = t[0]+t[6]
-	STRH	r10,[r0, #30]		; y[2] = t[0]+t[5]
-	STRH	r3, [r0, #46]		; y[3] = t[0]+t[4]
-	RSB	r3, r3, r9, LSL #1	; r3 = t[0]*2-(t[0]+t[4])=t[0]-t[4]
-	RSB	r10,r10,r9, LSL #1	; r10= t[0]*2-(t[0]+t[5])=t[0]-t[5]
-	RSB	r12,r12,r9, LSL #1	; r12= t[0]*2-(t[0]+t[6])=t[0]-t[6]
-	RSB	r11,r11,r9, LSL #1	; r1 = t[0]*2-(t[0]+t[7])=t[0]-t[7]
-	STRH	r3, [r0, #62]		; y[4] = t[0]-t[4]
-	STRH	r10,[r0, #78]		; y[5] = t[0]-t[5]
-	STRH	r12,[r0, #94]		; y[6] = t[0]-t[6]
-	STRH	r11,[r0, #110]		; y[7] = t[0]-t[7]
-	MOV	PC,r14
-	ENDP
-
-idct2core_down_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r9, [r1], #16		; r9 = x[0]
-	LDR	r12,OC_C4S4
-	LDRSH	r11,[r1, #-14]		; r11= x[1]
-	LDR	r3, OC_C7S1
-	MUL	r9, r12,r9		; r9 = t[0]<<16 = OC_C4S4*x[0]
-	LDR	r10,OC_C1S7
-	MUL	r3, r11,r3		; r3 = t[4]<<16 = OC_C7S1*x[1]
-	MOV	r9, r9, ASR #16		; r9 = t[0]
-	MUL	r11,r10,r11		; r11= t[7]<<16 = OC_C1S7*x[1]
-	ADD	r9, r9, #8		; r9 = t[0]+8
-	MOV	r3, r3, ASR #16		; r3 = t[4]
-	MUL	r10,r12,r3		; r10= t[5]<<16 = OC_C4S4*t[4]
-	MOV	r11,r11,ASR #16		; r11= t[7]
-	MUL	r12,r11,r12		; r12= t[6]<<16 = OC_C4S4*t[7]
-	MOV	r10,r10,ASR #16		; r10= t[5]
-	ADD	r12,r9,r12,ASR #16	; r12= t[0]+t[6]+8
-	ADD	r12,r12,r10		; r12= t[0]+t2[6] = t[0]+t[6]+t[5]+8
-	SUB	r10,r12,r10,LSL #1	; r10= t[0]+t2[5] = t[0]+t[6]-t[5]+8
-	ADD	r3, r3, r9		; r3 = t[0]+t[4]+8
-	ADD	r11,r11,r9		; r11= t[0]+t[7]+8
-	; TODO: This is wrong.
-	; The C code truncates to 16 bits by storing to RAM and doing the
-	;  shifts later; we've got an extra 4 bits here.
-	MOV	r4, r11,ASR #4
-	MOV	r5, r12,ASR #4
-	MOV	r6, r10,ASR #4
-	MOV	r7, r3, ASR #4
-	RSB	r3, r3, r9, LSL #1	;r3 =t[0]*2+8-(t[0]+t[4])=t[0]-t[4]+8
-	RSB	r10,r10,r9, LSL #1	;r10=t[0]*2+8-(t[0]+t[5])=t[0]-t[5]+8
-	RSB	r12,r12,r9, LSL #1	;r12=t[0]*2+8-(t[0]+t[6])=t[0]-t[6]+8
-	RSB	r11,r11,r9, LSL #1	;r11=t[0]*2+8-(t[0]+t[7])=t[0]-t[7]+8
-	MOV	r3, r3, ASR #4
-	MOV	r10,r10,ASR #4
-	MOV	r12,r12,ASR #4
-	MOV	r11,r11,ASR #4
-	STRH	r4, [r0], #2		; y[0] = t[0]+t[7]
-	STRH	r5, [r0, #14]		; y[1] = t[0]+t[6]
-	STRH	r6, [r0, #30]		; y[2] = t[0]+t[5]
-	STRH	r7, [r0, #46]		; y[3] = t[0]+t[4]
-	STRH	r3, [r0, #62]		; y[4] = t[0]-t[4]
-	STRH	r10,[r0, #78]		; y[5] = t[0]-t[5]
-	STRH	r12,[r0, #94]		; y[6] = t[0]-t[6]
-	STRH	r11,[r0, #110]		; y[7] = t[0]-t[7]
-	MOV	PC,r14
-	ENDP
-
-idct3core_arm PROC
-	LDRSH	r9, [r1], #16		; r9 = x[0]
-	LDR	r12,OC_C4S4		; r12= OC_C4S4
-	LDRSH	r3, [r1, #-12]		; r3 = x[2]
-	LDR	r10,OC_C6S2		; r10= OC_C6S2
-	MUL	r9, r12,r9		; r9 = t[0]<<16 = OC_C4S4*x[0]
-	LDR	r4, OC_C2S6		; r4 = OC_C2S6
-	MUL	r10,r3, r10		; r10= t[2]<<16 = OC_C6S2*x[2]
-	LDRSH	r11,[r1, #-14]		; r11= x[1]
-	MUL	r3, r4, r3		; r3 = t[3]<<16 = OC_C2S6*x[2]
-	LDR	r4, OC_C7S1		; r4 = OC_C7S1
-	LDR	r5, OC_C1S7		; r5 = OC_C1S7
-	MOV	r9, r9, ASR #16		; r9 = t[0]
-	MUL	r4, r11,r4		; r4 = t[4]<<16 = OC_C7S1*x[1]
-	ADD	r3, r9, r3, ASR #16	; r3 = t[0]+t[3]
-	MUL	r11,r5, r11		; r11= t[7]<<16 = OC_C1S7*x[1]
-	MOV	r4, r4, ASR #16		; r4 = t[4]
-	MUL	r5, r12,r4		; r5 = t[5]<<16 = OC_C4S4*t[4]
-	MOV	r11,r11,ASR #16		; r11= t[7]
-	MUL	r12,r11,r12		; r12= t[6]<<16 = OC_C4S4*t[7]
-	ADD	r10,r9, r10,ASR #16	; r10= t[1] = t[0]+t[2]
-	RSB	r6, r10,r9, LSL #1	; r6 = t[2] = t[0]-t[2]
-					; r3 = t2[0] = t[0]+t[3]
-	RSB	r9, r3, r9, LSL #1	; r9 = t2[3] = t[0]-t[3]
-	MOV	r12,r12,ASR #16		; r12= t[6]
-	ADD	r5, r12,r5, ASR #16	; r5 = t2[6] = t[6]+t[5]
-	RSB	r12,r5, r12,LSL #1	; r12= t2[5] = t[6]-t[5]
-	ADD	r11,r3, r11		; r11= t2[0]+t[7]
-	ADD	r5, r10,r5		; r5 = t[1]+t2[6]
-	ADD	r12,r6, r12		; r12= t[2]+t2[5]
-	ADD	r4, r9, r4		; r4 = t2[3]+t[4]
-	STRH	r11,[r0], #2		; y[0] = t[0]+t[7]
-	STRH	r5, [r0, #14]		; y[1] = t[1]+t2[6]
-	STRH	r12,[r0, #30]		; y[2] = t[2]+t2[5]
-	STRH	r4, [r0, #46]		; y[3] = t2[3]+t[4]
-	RSB	r11,r11,r3, LSL #1	; r11= t2[0] - t[7]
-	RSB	r5, r5, r10,LSL #1	; r5 = t[1]  - t2[6]
-	RSB	r12,r12,r6, LSL #1	; r6 = t[2]  - t2[5]
-	RSB	r4, r4, r9, LSL #1	; r4 = t2[3] - t[4]
-	STRH	r4, [r0, #62]		; y[4] = t2[3]-t[4]
-	STRH	r12,[r0, #78]		; y[5] = t[2]-t2[5]
-	STRH	r5, [r0, #94]		; y[6] = t[1]-t2[6]
-	STRH	r11,[r0, #110]		; y[7] = t2[0]-t[7]
-	MOV	PC,R14
-	ENDP
-
-idct3core_down_arm PROC
-	LDRSH	r9, [r1], #16		; r9 = x[0]
-	LDR	r12,OC_C4S4		; r12= OC_C4S4
-	LDRSH	r3, [r1, #-12]		; r3 = x[2]
-	LDR	r10,OC_C6S2		; r10= OC_C6S2
-	MUL	r9, r12,r9		; r9 = t[0]<<16 = OC_C4S4*x[0]
-	LDR	r4, OC_C2S6		; r4 = OC_C2S6
-	MUL	r10,r3, r10		; r10= t[2]<<16 = OC_C6S2*x[2]
-	LDRSH	r11,[r1, #-14]		; r11= x[1]
-	MUL	r3, r4, r3		; r3 = t[3]<<16 = OC_C2S6*x[2]
-	LDR	r4, OC_C7S1		; r4 = OC_C7S1
-	LDR	r5, OC_C1S7		; r5 = OC_C1S7
-	MOV	r9, r9, ASR #16		; r9 = t[0]
-	MUL	r4, r11,r4		; r4 = t[4]<<16 = OC_C7S1*x[1]
-	ADD	r9, r9, #8		; r9 = t[0]+8
-	MUL	r11,r5, r11		; r11= t[7]<<16 = OC_C1S7*x[1]
-	ADD	r3, r9, r3, ASR #16	; r3 = t[0]+t[3]+8
-	MOV	r4, r4, ASR #16		; r4 = t[4]
-	MUL	r5, r12,r4		; r5 = t[5]<<16 = OC_C4S4*t[4]
-	MOV	r11,r11,ASR #16		; r11= t[7]
-	MUL	r12,r11,r12		; r12= t[6]<<16 = OC_C4S4*t[7]
-	ADD	r10,r9, r10,ASR #16	; r10= t[1]+8 = t[0]+t[2]+8
-	RSB	r6, r10,r9, LSL #1	; r6 = t[2]+8 = t[0]-t[2]+8
-					; r3 = t2[0]+8 = t[0]+t[3]+8
-	RSB	r9, r3, r9, LSL #1	; r9 = t2[3]+8 = t[0]-t[3]+8
-	MOV	r12,r12,ASR #16		; r12= t[6]
-	ADD	r5, r12,r5, ASR #16	; r5 = t2[6] = t[6]+t[5]
-	RSB	r12,r5, r12,LSL #1	; r12= t2[5] = t[6]-t[5]
-	ADD	r11,r3, r11		; r11= t2[0]+t[7] +8
-	ADD	r5, r10,r5		; r5 = t[1] +t2[6]+8
-	ADD	r12,r6, r12		; r12= t[2] +t2[5]+8
-	ADD	r4, r9, r4		; r4 = t2[3]+t[4] +8
-	RSB	r3, r11,r3, LSL #1	; r11= t2[0] - t[7]  + 8
-	RSB	r10,r5, r10,LSL #1	; r5 = t[1]  - t2[6] + 8
-	RSB	r6, r12,r6, LSL #1	; r6 = t[2]  - t2[5] + 8
-	RSB	r9, r4, r9, LSL #1	; r4 = t2[3] - t[4]  + 8
-	; TODO: This is wrong.
-	; The C code truncates to 16 bits by storing to RAM and doing the
-	;  shifts later; we've got an extra 4 bits here.
-	MOV	r11,r11,ASR #4
-	MOV	r5, r5, ASR #4
-	MOV	r12,r12,ASR #4
-	MOV	r4, r4, ASR #4
-	MOV	r9, r9, ASR #4
-	MOV	r6, r6, ASR #4
-	MOV	r10,r10,ASR #4
-	MOV	r3, r3, ASR #4
-	STRH	r11,[r0], #2		; y[0] = t[0]+t[7]
-	STRH	r5, [r0, #14]		; y[1] = t[1]+t2[6]
-	STRH	r12,[r0, #30]		; y[2] = t[2]+t2[5]
-	STRH	r4, [r0, #46]		; y[3] = t2[3]+t[4]
-	STRH	r9, [r0, #62]		; y[4] = t2[3]-t[4]
-	STRH	r6, [r0, #78]		; y[5] = t[2]-t2[5]
-	STRH	r10,[r0, #94]		; y[6] = t[1]-t2[6]
-	STRH	r3, [r0, #110]		; y[7] = t2[0]-t[7]
-	MOV	PC,R14
-	ENDP
-
-idct4core_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r9, [r1], #16		; r9 = x[0]
-	LDR	r10,OC_C4S4		; r10= OC_C4S4
-	LDRSH	r12,[r1, #-12]		; r12= x[2]
-	LDR	r4, OC_C6S2		; r4 = OC_C6S2
-	MUL	r9, r10,r9		; r9 = t[0]<<16 = OC_C4S4*x[0]
-	LDR	r5, OC_C2S6		; r5 = OC_C2S6
-	MUL	r4, r12,r4		; r4 = t[2]<<16 = OC_C6S2*x[2]
-	LDRSH	r3, [r1, #-14]		; r3 = x[1]
-	MUL	r5, r12,r5		; r5 = t[3]<<16 = OC_C2S6*x[2]
-	LDR	r6, OC_C7S1		; r6 = OC_C7S1
-	LDR	r12,OC_C1S7		; r12= OC_C1S7
-	LDRSH	r11,[r1, #-10]		; r11= x[3]
-	MUL	r6, r3, r6		; r6 = t[4]<<16 = OC_C7S1*x[1]
-	LDR	r7, OC_C5S3		; r7 = OC_C5S3
-	MUL	r3, r12,r3		; r3 = t[7]<<16 = OC_C1S7*x[1]
-	LDR	r8, OC_C3S5		; r8 = OC_C3S5
-	MUL	r7, r11,r7		; r7 = -t[5]<<16 = OC_C5S3*x[3]
-	MOV	r9, r9, ASR #16		; r9 = t[0]
-	MUL	r11,r8, r11		; r11= t[6]<<16 = OC_C3S5*x[3]
-	MOV	r6, r6, ASR #16		; r6 = t[4]
-; TODO: This is wrong; t[4]-t[5] and t[7]-t[6] need to be truncated to 16-bit
-; before multiplying, not after (this is not equivalent)
-	SUB	r7, r6, r7, ASR #16	; r7 = t2[4]=t[4]+t[5] (as r7=-t[5])
-	RSB	r6, r7, r6, LSL #1	; r6 = t[4]-t[5]
-	MUL	r6, r10,r6		; r6 = t2[5]<<16 =OC_C4S4*(t[4]-t[5])
-	MOV	r3, r3, ASR #16		; r3 = t[7]
-	ADD	r11,r3, r11,ASR #16	; r11= t2[7]=t[7]+t[6]
-	RSB	r3, r11,r3, LSL #1	; r3 = t[7]-t[6]
-	MUL	r3, r10,r3		; r3 = t2[6]<<16 =OC_C4S4*(t[7]-t[6])
-	ADD	r4, r9, r4, ASR #16	; r4 = t[1] = t[0] + t[2]
-	RSB	r10,r4, r9, LSL #1	; r10= t[2] = t[0] - t[2]
-	ADD	r5, r9, r5, ASR #16	; r5 = t[0] = t[0] + t[3]
-	RSB	r9, r5, r9, LSL #1	; r9 = t[3] = t[0] - t[3]
-	MOV	r3, r3, ASR #16		; r3 = t2[6]
-	ADD	r6, r3, r6, ASR #16	; r6 = t3[6] = t2[6]+t2[5]
-	RSB	r3, r6, r3, LSL #1	; r3 = t3[5] = t2[6]-t2[5]
-	ADD	r11,r5, r11		; r11= t[0]+t2[7]
-	ADD	r6, r4, r6		; r6 = t[1]+t3[6]
-	ADD	r3, r10,r3		; r3 = t[2]+t3[5]
-	ADD	r7, r9, r7		; r7 = t[3]+t2[4]
-	STRH	r11,[r0], #2		; y[0] = t[0]+t[7]
-	STRH	r6, [r0, #14]		; y[1] = t[1]+t2[6]
-	STRH	r3, [r0, #30]		; y[2] = t[2]+t2[5]
-	STRH	r7, [r0, #46]		; y[3] = t2[3]+t[4]
-	RSB	r11,r11,r5, LSL #1	; r11= t[0]-t2[7]
-	RSB	r6, r6, r4, LSL #1	; r6 = t[1]-t3[6]
-	RSB	r3, r3, r10,LSL #1	; r3 = t[2]-t3[5]
-	RSB	r7, r7, r9, LSL #1	; r7 = t[3]-t2[4]
-	STRH	r7, [r0, #62]		; y[4] = t2[3]-t[4]
-	STRH	r3, [r0, #78]		; y[5] = t[2]-t2[5]
-	STRH	r6, [r0, #94]		; y[6] = t[1]-t2[6]
-	STRH	r11, [r0, #110]		; y[7] = t2[0]-t[7]
-	MOV	PC,r14
-	ENDP
-
-idct4core_down_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r9, [r1], #16		; r9 = x[0]
-	LDR	r10,OC_C4S4		; r10= OC_C4S4
-	LDRSH	r12,[r1, #-12]		; r12= x[2]
-	LDR	r4, OC_C6S2		; r4 = OC_C6S2
-	MUL	r9, r10,r9		; r9 = t[0]<<16 = OC_C4S4*x[0]
-	LDR	r5, OC_C2S6		; r5 = OC_C2S6
-	MUL	r4, r12,r4		; r4 = t[2]<<16 = OC_C6S2*x[2]
-	LDRSH	r3, [r1, #-14]		; r3 = x[1]
-	MUL	r5, r12,r5		; r5 = t[3]<<16 = OC_C2S6*x[2]
-	LDR	r6, OC_C7S1		; r6 = OC_C7S1
-	LDR	r12,OC_C1S7		; r12= OC_C1S7
-	LDRSH	r11,[r1, #-10]		; r11= x[3]
-	MUL	r6, r3, r6		; r6 = t[4]<<16 = OC_C7S1*x[1]
-	LDR	r7, OC_C5S3		; r7 = OC_C5S3
-	MUL	r3, r12,r3		; r3 = t[7]<<16 = OC_C1S7*x[1]
-	LDR	r8, OC_C3S5		; r8 = OC_C3S5
-	MUL	r7, r11,r7		; r7 = -t[5]<<16 = OC_C5S3*x[3]
-	MOV	r9, r9, ASR #16		; r9 = t[0]
-	MUL	r11,r8, r11		; r11= t[6]<<16 = OC_C3S5*x[3]
-	MOV	r6, r6, ASR #16		; r6 = t[4]
-; TODO: This is wrong; t[4]-t[5] and t[7]-t[6] need to be truncated to 16-bit
-; before multiplying, not after (this is not equivalent)
-	SUB	r7, r6, r7, ASR #16	; r7 = t2[4]=t[4]+t[5] (as r7=-t[5])
-	RSB	r6, r7, r6, LSL #1	; r6 = t[4]-t[5]
-	MUL	r6, r10,r6		; r6 = t2[5]<<16 =OC_C4S4*(t[4]-t[5])
-	MOV	r3, r3, ASR #16		; r3 = t[7]
-	ADD	r11,r3, r11,ASR #16	; r11= t2[7]=t[7]+t[6]
-	RSB	r3, r11,r3, LSL #1	; r3 = t[7]-t[6]
-	ADD	r9, r9, #8		; r9 = t[0]+8
-	MUL	r3, r10,r3		; r3 = t2[6]<<16 =OC_C4S4*(t[7]-t[6])
-	ADD	r4, r9, r4, ASR #16	; r4 = t[1] = t[0] + t[2] + 8
-	RSB	r10,r4, r9, LSL #1	; r10= t[2] = t[0] - t[2] + 8
-	ADD	r5, r9, r5, ASR #16	; r5 = t[0] = t[0] + t[3] + 8
-	RSB	r9, r5, r9, LSL #1	; r9 = t[3] = t[0] - t[3] + 8
-	MOV	r3, r3, ASR #16		; r3 = t2[6]
-	ADD	r6, r3, r6, ASR #16	; r6 = t3[6] = t2[6]+t2[5]
-	RSB	r3, r6, r3, LSL #1	; r3 = t3[5] = t2[6]-t2[5]
-	ADD	r5, r5, r11		; r5 = t[0]+t2[7]+8
-	ADD	r4, r4, r6		; r4 = t[1]+t3[6]+8
-	ADD	r10,r10,r3		; r10= t[2]+t3[5]+8
-	ADD	r9, r9, r7		; r9 = t[3]+t2[4]+8
-	SUB	r11,r5, r11,LSL #1	; r11= t[0]-t2[7]+8
-	SUB	r6, r4, r6, LSL #1	; r6 = t[1]-t3[6]+8
-	SUB	r3, r10,r3, LSL #1	; r3 = t[2]-t3[5]+8
-	SUB	r7, r9, r7, LSL #1	; r7 = t[3]-t2[4]+8
-	; TODO: This is wrong.
-	; The C code truncates to 16 bits by storing to RAM and doing the
-	;  shifts later; we've got an extra 4 bits here.
-	MOV	r11,r11,ASR #4
-	MOV	r6, r6, ASR #4
-	MOV	r3, r3, ASR #4
-	MOV	r7, r7, ASR #4
-	MOV	r9, r9, ASR #4
-	MOV	r10,r10,ASR #4
-	MOV	r4, r4, ASR #4
-	MOV	r5, r5, ASR #4
-	STRH	r5,[r0], #2		; y[0] = t[0]+t[7]
-	STRH	r4, [r0, #14]		; y[1] = t[1]+t2[6]
-	STRH	r10,[r0, #30]		; y[2] = t[2]+t2[5]
-	STRH	r9, [r0, #46]		; y[3] = t2[3]+t[4]
-	STRH	r7, [r0, #62]		; y[4] = t2[3]-t[4]
-	STRH	r3, [r0, #78]		; y[5] = t[2]-t2[5]
-	STRH	r6, [r0, #94]		; y[6] = t[1]-t2[6]
-	STRH	r11,[r0, #110]		; y[7] = t2[0]-t[7]
-	MOV	PC,r14
-	ENDP
-
-idct8core_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r2, [r1],#16		; r2 = x[0]
-	STMFD	r13!,{r1,r14}
-	LDRSH	r6, [r1, #-8]		; r6 = x[4]
-	LDR	r12,OC_C4S4		; r12= C4S4
-	LDRSH	r4, [r1, #-12]		; r4 = x[2]
-	ADD	r2, r2, r6		; r2 = x[0] + x[4]
-	SUB	r6, r2, r6, LSL #1	; r6 = x[0] - x[4]
-	; For spec compliance, these sums must be truncated to 16-bit precision
-	; _before_ the multiply (not after).
-	; Sadly, ARMv4 provides no simple way to do that.
-	MOV	r2, r2, LSL #16
-	MOV	r6, r6, LSL #16
-	MOV	r2, r2, ASR #16
-	MOV	r6, r6, ASR #16
-	MUL	r2, r12,r2		; r2 = t[0]<<16 = C4S4*(x[0]+x[4])
-	LDRSH	r8, [r1, #-4]		; r8 = x[6]
-	LDR	r7, OC_C6S2		; r7 = OC_C6S2
-	MUL	r6, r12,r6		; r6 = t[1]<<16 = C4S4*(x[0]-x[4])
-	LDR	r14,OC_C2S6		; r14= OC_C2S6
-	MUL	r3, r4, r7		; r3 = OC_C6S2*x[2]
-	LDR	r5, OC_C7S1		; r5 = OC_C7S1
-	MUL	r4, r14,r4		; r4 = OC_C2S6*x[2]
-	MOV	r3, r3, ASR #16		; r3 = OC_C6S2*x[2]>>16
-	MUL	r14,r8, r14		; r14= OC_C2S6*x[6]
-	MOV	r4, r4, ASR #16		; r4 = OC_C2S6*x[2]>>16
-	MUL	r8, r7, r8		; r8 = OC_C6S2*x[6]
-	LDR	r7, OC_C1S7		; r7 = OC_C1S7
-	SUB	r3, r3, r14,ASR #16	; r3=t[2]=C6S2*x[2]>>16-C2S6*x[6]>>16
-	LDRSH	r14,[r1, #-14]		; r14= x[1]
-	ADD	r4, r4, r8, ASR #16	; r4=t[3]=C2S6*x[2]>>16+C6S2*x[6]>>16
-	LDRSH	r8, [r1, #-2]		; r8 = x[7]
-	MUL	r9, r5, r14		; r9 = OC_C7S1*x[1]
-	LDRSH	r10,[r1, #-6]		; r10= x[5]
-	MUL	r14,r7, r14		; r14= OC_C1S7*x[1]
-	MOV	r9, r9, ASR #16		; r9 = OC_C7S1*x[1]>>16
-	MUL	r7, r8, r7		; r7 = OC_C1S7*x[7]
-	MOV	r14,r14,ASR #16		; r14= OC_C1S7*x[1]>>16
-	MUL	r8, r5, r8		; r8 = OC_C7S1*x[7]
-	LDRSH	r1, [r1, #-10]		; r1 = x[3]
-	LDR	r5, OC_C3S5		; r5 = OC_C3S5
-	LDR	r11,OC_C5S3		; r11= OC_C5S3
-	ADD	r8, r14,r8, ASR #16	; r8=t[7]=C1S7*x[1]>>16+C7S1*x[7]>>16
-	MUL	r14,r5, r10		; r14= OC_C3S5*x[5]
-	SUB	r9, r9, r7, ASR #16	; r9=t[4]=C7S1*x[1]>>16-C1S7*x[7]>>16
-	MUL	r10,r11,r10		; r10= OC_C5S3*x[5]
-	MOV	r14,r14,ASR #16		; r14= OC_C3S5*x[5]>>16
-	MUL	r11,r1, r11		; r11= OC_C5S3*x[3]
-	MOV	r10,r10,ASR #16		; r10= OC_C5S3*x[5]>>16
-	MUL	r1, r5, r1		; r1 = OC_C3S5*x[3]
-	SUB	r14,r14,r11,ASR #16	;r14=t[5]=C3S5*x[5]>>16-C5S3*x[3]>>16
-	ADD	r10,r10,r1, ASR #16	;r10=t[6]=C5S3*x[5]>>16+C3S5*x[3]>>16
-	; r2=t[0]<<16 r3=t[2] r4=t[3] r6=t[1]<<16 r8=t[7] r9=t[4]
-	; r10=t[6] r12=C4S4 r14=t[5]
-; TODO: This is wrong; t[4]-t[5] and t[7]-t[6] need to be truncated to 16-bit
-; before multiplying, not after (this is not equivalent)
-	; Stage 2
-	; 4-5 butterfly
-	ADD	r9, r9, r14		; r9 = t2[4]     =       t[4]+t[5]
-	SUB	r14,r9, r14, LSL #1	; r14=                   t[4]-t[5]
-	MUL	r14,r12,r14		; r14= t2[5]<<16 = C4S4*(t[4]-t[5])
-	; 7-6 butterfly
-	ADD	r8, r8, r10		; r8 = t2[7]     =       t[7]+t[6]
-	SUB	r10,r8, r10, LSL #1	; r10=                   t[7]-t[6]
-	MUL	r10,r12,r10		; r10= t2[6]<<16 = C4S4*(t[7]+t[6])
-	; r2=t[0]<<16 r3=t[2] r4=t[3] r6=t[1]<<16 r8=t2[7] r9=t2[4]
-	; r10=t2[6]<<16 r12=C4S4 r14=t2[5]<<16
-	; Stage 3
-	; 0-3 butterfly
-	ADD	r2, r4, r2, ASR #16	; r2 = t2[0] = t[0] + t[3]
-	SUB	r4, r2, r4, LSL #1	; r4 = t2[3] = t[0] - t[3]
-	; 1-2 butterfly
-	ADD	r6, r3, r6, ASR #16	; r6 = t2[1] = t[1] + t[2]
-	SUB	r3, r6, r3, LSL #1	; r3 = t2[2] = t[1] - t[2]
-	; 6-5 butterfly
-	MOV	r14,r14,ASR #16		; r14= t2[5]
-	ADD	r10,r14,r10,ASR #16	; r10= t3[6] = t[6] + t[5]
-	SUB	r14,r10,r14,LSL #1	; r14= t3[5] = t[6] - t[5]
-	; r2=t2[0] r3=t2[2] r4=t2[3] r6=t2[1] r8=t2[7] r9=t2[4]
-	; r10=t3[6] r14=t3[5]
-	; Stage 4
-	ADD	r2, r2, r8		; r2 = t[0] + t[7]
-	ADD	r6, r6, r10		; r6 = t[1] + t[6]
-	ADD	r3, r3, r14		; r3 = t[2] + t[5]
-	ADD	r4, r4, r9		; r4 = t[3] + t[4]
-	SUB	r8, r2, r8, LSL #1	; r8 = t[0] - t[7]
-	SUB	r10,r6, r10,LSL #1	; r10= t[1] - t[6]
-	SUB	r14,r3, r14,LSL #1	; r14= t[2] - t[5]
-	SUB	r9, r4, r9, LSL #1	; r9 = t[3] - t[4]
-	STRH	r2, [r0], #2		; y[0] = t[0]+t[7]
-	STRH	r6, [r0, #14]		; y[1] = t[1]+t[6]
-	STRH	r3, [r0, #30]		; y[2] = t[2]+t[5]
-	STRH	r4, [r0, #46]		; y[3] = t[3]+t[4]
-	STRH	r9, [r0, #62]		; y[4] = t[3]-t[4]
-	STRH	r14,[r0, #78]		; y[5] = t[2]-t[5]
-	STRH	r10,[r0, #94]		; y[6] = t[1]-t[6]
-	STRH	r8, [r0, #110]		; y[7] = t[0]-t[7]
-	LDMFD	r13!,{r1,PC}
-	ENDP
-
-idct8core_down_arm PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r2, [r1],#16		; r2 = x[0]
-	STMFD	r13!,{r1,r14}
-	LDRSH	r6, [r1, #-8]		; r6 = x[4]
-	LDR	r12,OC_C4S4		; r12= C4S4
-	LDRSH	r4, [r1, #-12]		; r4 = x[2]
-	ADD	r2, r2, r6		; r2 = x[0] + x[4]
-	SUB	r6, r2, r6, LSL #1	; r6 = x[0] - x[4]
-	; For spec compliance, these sums must be truncated to 16-bit precision
-	; _before_ the multiply (not after).
-	; Sadly, ARMv4 provides no simple way to do that.
-	MOV	r2, r2, LSL #16
-	MOV	r6, r6, LSL #16
-	MOV	r2, r2, ASR #16
-	MOV	r6, r6, ASR #16
-	MUL	r2, r12,r2		; r2 = t[0]<<16 = C4S4*(x[0]+x[4])
-	LDRSH	r8, [r1, #-4]		; r8 = x[6]
-	LDR	r7, OC_C6S2		; r7 = OC_C6S2
-	MUL	r6, r12,r6		; r6 = t[1]<<16 = C4S4*(x[0]-x[4])
-	LDR	r14,OC_C2S6		; r14= OC_C2S6
-	MUL	r3, r4, r7		; r3 = OC_C6S2*x[2]
-	LDR	r5, OC_C7S1		; r5 = OC_C7S1
-	MUL	r4, r14,r4		; r4 = OC_C2S6*x[2]
-	MOV	r3, r3, ASR #16		; r3 = OC_C6S2*x[2]>>16
-	MUL	r14,r8, r14		; r14= OC_C2S6*x[6]
-	MOV	r4, r4, ASR #16		; r4 = OC_C2S6*x[2]>>16
-	MUL	r8, r7, r8		; r8 = OC_C6S2*x[6]
-	LDR	r7, OC_C1S7		; r7 = OC_C1S7
-	SUB	r3, r3, r14,ASR #16	; r3=t[2]=C6S2*x[2]>>16-C2S6*x[6]>>16
-	LDRSH	r14,[r1, #-14]		; r14= x[1]
-	ADD	r4, r4, r8, ASR #16	; r4=t[3]=C2S6*x[2]>>16+C6S2*x[6]>>16
-	LDRSH	r8, [r1, #-2]		; r8 = x[7]
-	MUL	r9, r5, r14		; r9 = OC_C7S1*x[1]
-	LDRSH	r10,[r1, #-6]		; r10= x[5]
-	MUL	r14,r7, r14		; r14= OC_C1S7*x[1]
-	MOV	r9, r9, ASR #16		; r9 = OC_C7S1*x[1]>>16
-	MUL	r7, r8, r7		; r7 = OC_C1S7*x[7]
-	MOV	r14,r14,ASR #16		; r14= OC_C1S7*x[1]>>16
-	MUL	r8, r5, r8		; r8 = OC_C7S1*x[7]
-	LDRSH	r1, [r1, #-10]		; r1 = x[3]
-	LDR	r5, OC_C3S5		; r5 = OC_C3S5
-	LDR	r11,OC_C5S3		; r11= OC_C5S3
-	ADD	r8, r14,r8, ASR #16	; r8=t[7]=C1S7*x[1]>>16+C7S1*x[7]>>16
-	MUL	r14,r5, r10		; r14= OC_C3S5*x[5]
-	SUB	r9, r9, r7, ASR #16	; r9=t[4]=C7S1*x[1]>>16-C1S7*x[7]>>16
-	MUL	r10,r11,r10		; r10= OC_C5S3*x[5]
-	MOV	r14,r14,ASR #16		; r14= OC_C3S5*x[5]>>16
-	MUL	r11,r1, r11		; r11= OC_C5S3*x[3]
-	MOV	r10,r10,ASR #16		; r10= OC_C5S3*x[5]>>16
-	MUL	r1, r5, r1		; r1 = OC_C3S5*x[3]
-	SUB	r14,r14,r11,ASR #16	;r14=t[5]=C3S5*x[5]>>16-C5S3*x[3]>>16
-	ADD	r10,r10,r1, ASR #16	;r10=t[6]=C5S3*x[5]>>16+C3S5*x[3]>>16
-	; r2=t[0]<<16 r3=t[2] r4=t[3] r6=t[1]<<16 r8=t[7] r9=t[4]
-	; r10=t[6] r12=C4S4 r14=t[5]
-	; Stage 2
-; TODO: This is wrong; t[4]-t[5] and t[7]-t[6] need to be truncated to 16-bit
-; before multiplying, not after (this is not equivalent)
-	; 4-5 butterfly
-	ADD	r9, r9, r14		; r9 = t2[4]     =       t[4]+t[5]
-	SUB	r14,r9, r14, LSL #1	; r14=                   t[4]-t[5]
-	MUL	r14,r12,r14		; r14= t2[5]<<16 = C4S4*(t[4]-t[5])
-	; 7-6 butterfly
-	ADD	r8, r8, r10		; r8 = t2[7]     =       t[7]+t[6]
-	SUB	r10,r8, r10, LSL #1	; r10=                   t[7]-t[6]
-	MUL	r10,r12,r10		; r10= t2[6]<<16 = C4S4*(t[7]+t[6])
-	; r2=t[0]<<16 r3=t[2] r4=t[3] r6=t[1]<<16 r8=t2[7] r9=t2[4]
-	; r10=t2[6]<<16 r12=C4S4 r14=t2[5]<<16
-	; Stage 3
-	ADD	r2, r2, #8<<16		; r2 = t[0]+8<<16
-	ADD	r6, r6, #8<<16		; r6 = t[1]+8<<16
-	; 0-3 butterfly
-	ADD	r2, r4, r2, ASR #16	; r2 = t2[0] = t[0] + t[3] + 8
-	SUB	r4, r2, r4, LSL #1	; r4 = t2[3] = t[0] - t[3] + 8
-	; 1-2 butterfly
-	ADD	r6, r3, r6, ASR #16	; r6 = t2[1] = t[1] + t[2] + 8
-	SUB	r3, r6, r3, LSL #1	; r3 = t2[2] = t[1] - t[2] + 8
-	; 6-5 butterfly
-	MOV	r14,r14,ASR #16		; r14= t2[5]
-	ADD	r10,r14,r10,ASR #16	; r10= t3[6] = t[6] + t[5]
-	SUB	r14,r10,r14,LSL #1	; r14= t3[5] = t[6] - t[5]
-	; r2=t2[0] r3=t2[2] r4=t2[3] r6=t2[1] r8=t2[7] r9=t2[4]
-	; r10=t3[6] r14=t3[5]
-	; Stage 4
-	ADD	r2, r2, r8		; r2 = t[0] + t[7] + 8
-	ADD	r6, r6, r10		; r6 = t[1] + t[6] + 8
-	ADD	r3, r3, r14		; r3 = t[2] + t[5] + 8
-	ADD	r4, r4, r9		; r4 = t[3] + t[4] + 8
-	SUB	r8, r2, r8, LSL #1	; r8 = t[0] - t[7] + 8
-	SUB	r10,r6, r10,LSL #1	; r10= t[1] - t[6] + 8
-	SUB	r14,r3, r14,LSL #1	; r14= t[2] - t[5] + 8
-	SUB	r9, r4, r9, LSL #1	; r9 = t[3] - t[4] + 8
-	; TODO: This is wrong.
-	; The C code truncates to 16 bits by storing to RAM and doing the
-	;  shifts later; we've got an extra 4 bits here.
-	MOV	r2, r2, ASR #4
-	MOV	r6, r6, ASR #4
-	MOV	r3, r3, ASR #4
-	MOV	r4, r4, ASR #4
-	MOV	r8, r8, ASR #4
-	MOV	r10,r10,ASR #4
-	MOV	r14,r14,ASR #4
-	MOV	r9, r9, ASR #4
-	STRH	r2, [r0], #2		; y[0] = t[0]+t[7]
-	STRH	r6, [r0, #14]		; y[1] = t[1]+t[6]
-	STRH	r3, [r0, #30]		; y[2] = t[2]+t[5]
-	STRH	r4, [r0, #46]		; y[3] = t[3]+t[4]
-	STRH	r9, [r0, #62]		; y[4] = t[3]-t[4]
-	STRH	r14,[r0, #78]		; y[5] = t[2]-t[5]
-	STRH	r10,[r0, #94]		; y[6] = t[1]-t[6]
-	STRH	r8, [r0, #110]		; y[7] = t[0]-t[7]
-	LDMFD	r13!,{r1,PC}
-	ENDP
-
- [ OC_ARM_ASM_MEDIA
-	EXPORT	oc_idct8x8_1_v6
-	EXPORT	oc_idct8x8_v6
-
-oc_idct8x8_1_v6 PROC
-	; r0 = ogg_int16_t  *_y
-	; r1 = ogg_uint16_t  _dc
-	ORR	r2, r1, r1, LSL #16
-	ORR	r3, r1, r1, LSL #16
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	STRD	r2, [r0], #8
-	MOV	PC, r14
-	ENDP
-
-oc_idct8x8_v6 PROC
-	; r0 = ogg_int16_t *_y
-	; r1 = ogg_int16_t *_x
-	; r2 = int          _last_zzi
-	CMP	r2, #3
-	BLE	oc_idct8x8_3_v6
-	;CMP	r2, #6
-	;BLE	oc_idct8x8_6_v6
-	CMP	r2, #10
-	BLE	oc_idct8x8_10_v6
-oc_idct8x8_slow_v6
-	STMFD	r13!,{r4-r11,r14}
-	SUB	r13,r13,#64*2
-; Row transforms
-	STR	r0, [r13,#-4]!
-	ADD	r0, r13, #4	; Write to temp storage.
-	BL	idct8_8core_v6
-	BL	idct8_8core_v6
-	BL	idct8_8core_v6
-	BL	idct8_8core_v6
-	LDR	r0, [r13], #4	; Write to the final destination.
-	; Clear input data for next block (decoder only).
-	SUB	r2, r1, #8*16
-	CMP	r0, r2
-	MOV	r1, r13		; And read from temp storage.
-	BEQ	oc_idct8x8_slow_v6_cols
-	MOV	r4, #0
-	MOV	r5, #0
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-	STRD	r4, [r2], #8
-oc_idct8x8_slow_v6_cols
-; Column transforms
-	BL	idct8_8core_down_v6
-	BL	idct8_8core_down_v6
-	BL	idct8_8core_down_v6
-	BL	idct8_8core_down_v6
-	ADD	r13,r13,#64*2
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
-
-oc_idct8x8_10_v6 PROC
-	STMFD	r13!,{r4-r11,r14}
-	SUB	r13,r13,#64*2+4
-; Row transforms
-	MOV	r2, r13
-	STR	r0, [r13,#-4]!
-	AND	r0, r2, #4	; Align the stack.
-	ADD	r0, r0, r2	; Write to temp storage.
-	BL	idct4_3core_v6
-	BL	idct2_1core_v6
-	LDR	r0, [r13], #4	; Write to the final destination.
-	; Clear input data for next block (decoder only).
-	SUB	r2, r1, #4*16
-	CMP	r0, r2
-	AND	r1, r13,#4	; Align the stack.
-	BEQ	oc_idct8x8_10_v6_cols
-	MOV	r4, #0
-	MOV	r5, #0
-	STRD	r4, [r2]
-	STRD	r4, [r2,#16]
-	STR	r4, [r2,#32]
-	STR	r4, [r2,#48]
-oc_idct8x8_10_v6_cols
-; Column transforms
-	ADD	r1, r1, r13	; And read from temp storage.
-	BL	idct4_4core_down_v6
-	BL	idct4_4core_down_v6
-	BL	idct4_4core_down_v6
-	BL	idct4_4core_down_v6
-	ADD	r13,r13,#64*2+4
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
-
-oc_idct8x8_3_v6 PROC
-	STMFD	r13!,{r4-r8,r14}
-	SUB	r13,r13,#64*2
-; Row transforms
-	MOV	r8, r0
-	MOV	r0, r13		; Write to temp storage.
-	BL	idct2_1core_v6
-	; Clear input data for next block (decoder only).
-	SUB	r0, r1, #2*16
-	CMP	r0, r8
-	MOV	r1, r13		; Read from temp storage.
-	MOVNE	r4, #0
-	STRNE	r4, [r0]
-	STRNE	r4, [r0,#16]
-	MOVNE	r0, r8		; Write to the final destination.
-; Column transforms
-	BL	idct2_2core_down_v6
-	BL	idct2_2core_down_v6
-	BL	idct2_2core_down_v6
-	BL	idct2_2core_down_v6
-	ADD	r13,r13,#64*2
-	LDMFD	r13!,{r4-r8,PC}
-	ENDP
-
-idct2_1core_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-; Stage 1:
-	LDR	r2, [r1], #16		; r2 = <x[0,1]|x[0,0]>
-	LDR	r3, OC_C4S4
-	LDRSH	r6, [r1], #16		; r6 = x[1,0]
-	SMULWB	r12,r3, r2		; r12= t[0,0]=OC_C4S4*x[0,0]>>16
-	LDRD	r4, OC_C7S1		; r4 = OC_C7S1; r5 = OC_C1S7
-	SMULWB	r6, r3, r6		; r6 = t[1,0]=OC_C4S4*x[1,0]>>16
-	SMULWT	r4, r4, r2		; r4 = t[0,4]=OC_C7S1*x[0,1]>>16
-	SMULWT	r7, r5, r2		; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
-; Stage 2:
-	SMULWB	r5, r3, r4		; r5 = t[0,5]=OC_C4S4*t[0,4]>>16
-	PKHBT	r12,r12,r6, LSL #16	; r12= <t[1,0]|t[0,0]>
-	SMULWB	r6, r3, r7		; r6 = t[0,6]=OC_C4S4*t[0,7]>>16
-	PKHBT	r7, r7, r3		; r7 = <0|t[0,7]>
-; Stage 3:
-	PKHBT	r5, r6, r5, LSL #16	; r5 = <t[0,5]|t[0,6]>
-	PKHBT	r4, r4, r3		; r4 = <0|t[0,4]>
-	SASX	r5, r5, r5		; r5 = <t[0,6]+t[0,5]|t[0,6]-t[0,5]>
-; Stage 4:
-	PKHTB	r6, r3, r5, ASR #16	; r6 = <0|t[0,6]>
-	PKHBT	r5, r5, r3		; r5 = <0|t[0,5]>
-	SADD16	r3, r12,r7		; r3 = t[0]+t[7]
-	STR	r3, [r0], #4		; y[0<<3] = t[0]+t[7]
-	SADD16	r3, r12,r6		; r3 = t[0]+t[6]
-	STR	r3, [r0, #12]		; y[1<<3] = t[0]+t[6]
-	SADD16	r3, r12,r5		; r3 = t[0]+t[5]
-	STR	r3, [r0, #28]		; y[2<<3] = t[0]+t[5]
-	SADD16	r3, r12,r4		; r3 = t[0]+t[4]
-	STR	r3, [r0, #44]		; y[3<<3] = t[0]+t[4]
-	SSUB16	r4, r12,r4		; r4 = t[0]-t[4]
-	STR	r4, [r0, #60]		; y[4<<3] = t[0]-t[4]
-	SSUB16	r5, r12,r5		; r5 = t[0]-t[5]
-	STR	r5, [r0, #76]		; y[5<<3] = t[0]-t[5]
-	SSUB16	r6, r12,r6		; r6 = t[0]-t[6]
-	STR	r6, [r0, #92]		; y[6<<3] = t[0]-t[6]
-	SSUB16	r7, r12,r7		; r7 = t[0]-t[7]
-	STR	r7, [r0, #108]		; y[7<<3] = t[0]-t[7]
-	MOV	PC,r14
-	ENDP
- ]
-
-	ALIGN 8
-OC_C7S1
-	DCD	12785 ; 31F1
-OC_C1S7
-	DCD	64277 ; FB15
-OC_C6S2
-	DCD	25080 ; 61F8
-OC_C2S6
-	DCD	60547 ; EC83
-OC_C5S3
-	DCD	36410 ; 8E3A
-OC_C3S5
-	DCD	54491 ; D4DB
-OC_C4S4
-	DCD	46341 ; B505
-
- [ OC_ARM_ASM_MEDIA
-idct2_2core_down_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-; Stage 1:
-	LDR	r2, [r1], #16		; r2 = <x[0,1]|x[0,0]>
-	LDR	r3, OC_C4S4
-	MOV	r7 ,#8			; r7  = 8
-	LDR	r6, [r1], #16		; r6 = <x[1,1]|x[1,0]>
-	SMLAWB	r12,r3, r2, r7		; r12= (t[0,0]=OC_C4S4*x[0,0]>>16)+8
-	LDRD	r4, OC_C7S1		; r4 = OC_C7S1; r5 = OC_C1S7
-	SMLAWB	r7, r3, r6, r7		; r7 = (t[1,0]=OC_C4S4*x[1,0]>>16)+8
-	SMULWT  r5, r5, r2		; r2 = t[0,7]=OC_C1S7*x[0,1]>>16
-	PKHBT	r12,r12,r7, LSL #16	; r12= <t[1,0]+8|t[0,0]+8>
-	SMULWT	r4, r4, r2		; r4 = t[0,4]=OC_C7S1*x[0,1]>>16
-; Here we cheat: row 1 had just a DC, so x[0,1]==x[1,1] by definition.
-	PKHBT	r7, r5, r5, LSL #16	; r7 = <t[0,7]|t[0,7]>
-; Stage 2:
-	SMULWB	r6, r3, r7		; r6 = t[0,6]=OC_C4S4*t[0,7]>>16
-	PKHBT	r4, r4, r4, LSL #16	; r4 = <t[0,4]|t[0,4]>
-	SMULWT	r2, r3, r7		; r2 = t[1,6]=OC_C4S4*t[1,7]>>16
-	SMULWB	r5, r3, r4		; r5 = t[0,5]=OC_C4S4*t[0,4]>>16
-	PKHBT	r6, r6, r2, LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWT	r2, r3, r4		; r2 = t[1,5]=OC_C4S4*t[1,4]>>16
-	PKHBT	r2, r5, r2, LSL #16	; r2 = <t[1,5]|t[0,5]>
-; Stage 3:
-	SSUB16	r5, r6, r2		; r5 = <t[1,6]-t[1,5]|t[0,6]-t[0,5]>
-	SADD16	r6, r6, r2		; r6 = <t[1,6]+t[1,5]|t[0,6]+t[0,5]>
-; Stage 4:
-	SADD16	r2, r12,r7		; r2 = t[0]+t[7]+8
-	MOV	r3, r2, ASR #4
-	MOV	r2, r2, LSL #16
-	PKHTB	r3, r3, r2, ASR #20	; r3 = t[0]+t[7]+8>>4
-	STR	r3, [r0], #4		; y[0<<3] = t[0]+t[7]+8>>4
-	SADD16	r2, r12,r6		; r2 = t[0]+t[6]+8
-	MOV	r3, r2, ASR #4
-	MOV	r2, r2, LSL #16
-	PKHTB	r3, r3, r2, ASR #20	; r3 = t[0]+t[6]+8>>4
-	STR	r3, [r0, #12]		; y[1<<3] = t[0]+t[6]+8>>4
-	SADD16	r2, r12,r5		; r2 = t[0]+t[5]+8
-	MOV	r3, r2, ASR #4
-	MOV	r2, r2, LSL #16
-	PKHTB	r3, r3, r2, ASR #20	; r3 = t[0]+t[5]+8>>4
-	STR	r3, [r0, #28]		; y[2<<3] = t[0]+t[5]+8>>4
-	SADD16	r2, r12,r4		; r2 = t[0]+t[4]+8
-	MOV	r3, r2, ASR #4
-	MOV	r2, r2, LSL #16
-	PKHTB	r3, r3, r2, ASR #20	; r3 = t[0]+t[4]+8>>4
-	STR	r3, [r0, #44]		; y[3<<3] = t[0]+t[4]+8>>4
-	SSUB16	r4, r12,r4		; r4 = t[0]-t[4]+8
-	MOV	r3, r4, ASR #4
-	MOV	r4, r4, LSL #16
-	PKHTB	r3, r3, r4, ASR #20	; r3 = t[0]-t[4]+8>>4
-	STR	r3, [r0, #60]		; y[4<<3] = t[0]-t[4]+8>>4
-	SSUB16	r5, r12,r5		; r5 = t[0]-t[5]+8
-	MOV	r3, r5, ASR #4
-	MOV	r5, r5, LSL #16
-	PKHTB	r3, r3, r5, ASR #20	; r3 = t[0]-t[5]+8>>4
-	STR	r3, [r0, #76]		; y[5<<3] = t[0]-t[5]+8>>4
-	SSUB16	r6, r12,r6		; r6 = t[0]-t[6]+8
-	MOV	r3, r6, ASR #4
-	MOV	r6, r6, LSL #16
-	PKHTB	r3, r3, r6, ASR #20	; r3 = t[0]-t[6]+8>>4
-	STR	r3, [r0, #92]		; y[6<<3] = t[0]-t[6]+8>>4
-	SSUB16	r7, r12,r7		; r7 = t[0]-t[7]+8
-	MOV	r3, r7, ASR #4
-	MOV	r7, r7, LSL #16
-	PKHTB	r3, r3, r7, ASR #20	; r3 = t[0]-t[7]+8>>4
-	STR	r3, [r0, #108]		; y[7<<3] = t[0]-t[7]+8>>4
-	MOV	PC,r14
-	ENDP
-
-; In theory this should save ~75 cycles over oc_idct8x8_10, more than enough to
-;  pay for increased branch mis-prediction to get here, but in practice it
-;  doesn't seem to slow anything down to take it out, and it's less code this
-;  way.
- [ 0
-oc_idct8x8_6_v6 PROC
-	STMFD	r13!,{r4-r8,r10,r11,r14}
-	SUB	r13,r13,#64*2+4
-; Row transforms
-	MOV	r8, r0
-	AND	r0, r13,#4	; Align the stack.
-	ADD	r0, r0, r13	; Write to temp storage.
-	BL	idct3_2core_v6
-	BL	idct1core_v6
-	; Clear input data for next block (decoder only).
-	SUB	r0, r1, #3*16
-	CMP	r0, r8
-	AND	r1, r13,#4	; Align the stack.
-	BEQ	oc_idct8x8_6_v6_cols
-	MOV	r4, #0
-	MOV	r5, #0
-	STRD	r4, [r0]
-	STR	r4, [r0,#16]
-	STR	r4, [r0,#32]
-	MOV	r0, r8		; Write to the final destination.
-oc_idct8x8_6_v6_cols
-; Column transforms
-	ADD	r1, r1, r13	; And read from temp storage.
-	BL	idct3_3core_down_v6
-	BL	idct3_3core_down_v6
-	BL	idct3_3core_down_v6
-	BL	idct3_3core_down_v6
-	ADD	r13,r13,#64*2+4
-	LDMFD	r13!,{r4-r8,r10,r11,PC}
-	ENDP
-
-idct1core_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-	LDRSH	r3, [r1], #16
-	MOV	r12,#0x05
-	ORR	r12,r12,#0xB500
-	MUL	r3, r12, r3
-	; Stall ?
-	MOV	r3, r3, ASR #16
-	; Don't need to actually store the odd lines; they won't be read.
-	STRH	r3, [r0], #2
-	STRH	r3, [r0, #30]
-	STRH	r3, [r0, #62]
-	STRH	r3, [r0, #94]
-	MOV	PC,R14
-	ENDP
-
-idct3_2core_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-; Stage 1:
-	LDRD	r4, [r1], #16		; r4 = <x[0,1]|x[0,0]>; r5 = <*|x[0,2]>
-	LDRD	r10,OC_C6S2_3_v6	; r10= OC_C6S2; r11= OC_C2S6
-	; Stall
-	SMULWB	r3, r11,r5		; r3 = t[0,3]=OC_C2S6*x[0,2]>>16
-	LDR	r11,OC_C4S4
-	SMULWB	r2, r10,r5		; r2 = t[0,2]=OC_C6S2*x[0,2]>>16
-	LDR	r5, [r1], #16		; r5 = <x[1,1]|x[1,0]>
-	SMULWB	r12,r11,r4		; r12= (t[0,0]=OC_C4S4*x[0,0]>>16)
-	LDRD	r6, OC_C7S1_3_v6	; r6 = OC_C7S1; r7 = OC_C1S7
-	SMULWB	r10,r11,r5		; r10= (t[1,0]=OC_C4S4*x[1,0]>>16)
-	PKHBT	r12,r12,r10,LSL #16	; r12= <t[1,0]|t[0,0]>
-	SMULWT  r10,r7, r5		; r10= t[1,7]=OC_C1S7*x[1,1]>>16
-	PKHBT	r2, r2, r11		; r2 = <0|t[0,2]>
-	SMULWT  r7, r7, r4		; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
-	PKHBT	r3, r3, r11		; r3 = <0|t[0,3]>
-	SMULWT	r5, r6, r5		; r10= t[1,4]=OC_C7S1*x[1,1]>>16
-	PKHBT	r7, r7, r10,LSL #16	; r7 = <t[1,7]|t[0,7]>
-	SMULWT	r4, r6, r4		; r4 = t[0,4]=OC_C7S1*x[0,1]>>16
-; Stage 2:
-	SMULWB	r6, r11,r7		; r6 = t[0,6]=OC_C4S4*t[0,7]>>16
-	PKHBT	r4, r4, r5, LSL #16	; r4 = <t[1,4]|t[0,4]>
-	SMULWT	r10,r11,r7		; r10= t[1,6]=OC_C4S4*t[1,7]>>16
-	SMULWB	r5, r11,r4		; r5 = t[0,5]=OC_C4S4*t[0,4]>>16
-	PKHBT	r6, r6, r10,LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWT	r10,r11,r4		; r10= t[1,5]=OC_C4S4*t[1,4]>>16
-; Stage 3:
-	B	idct4_3core_stage3_v6
-	ENDP
-
-; Another copy so the LDRD offsets are less than +/- 255.
-	ALIGN 8
-OC_C7S1_3_v6
-	DCD	12785 ; 31F1
-OC_C1S7_3_v6
-	DCD	64277 ; FB15
-OC_C6S2_3_v6
-	DCD	25080 ; 61F8
-OC_C2S6_3_v6
-	DCD	60547 ; EC83
-
-idct3_3core_down_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-; Stage 1:
-	LDRD	r10,[r1], #16		; r10= <x[0,1]|x[0,0]>; r11= <??|x[0,2]>
-	LDRD	r6, OC_C6S2_3_v6	; r6 = OC_C6S2; r7 = OC_C2S6
-	LDR	r4, [r1], #16		; r4 = <x[1,1]|x[1,0]>
-	SMULWB	r3, r7, r11		; r3 = t[0,3]=OC_C2S6*x[0,2]>>16
-	MOV	r7,#8
-	SMULWB	r2, r6, r11		; r2 = t[0,2]=OC_C6S2*x[0,2]>>16
-	LDR	r11,OC_C4S4
-	SMLAWB	r12,r11,r10,r7		; r12= t[0,0]+8=(OC_C4S4*x[0,0]>>16)+8
-; Here we cheat: row 2 had just a DC, so x[0,2]==x[1,2] by definition.
-	PKHBT	r3, r3, r3, LSL #16	; r3 = <t[0,3]|t[0,3]>
-	SMLAWB	r5, r11,r4, r7		; r5 = t[1,0]+8=(OC_C4S4*x[1,0]>>16)+8
-	PKHBT	r2, r2, r2, LSL #16	; r2 = <t[0,2]|t[0,2]>
-	LDRD	r6, OC_C7S1_3_v6	; r6 = OC_C7S1; r7 = OC_C1S7
-	PKHBT	r12,r12,r5, LSL #16	; r12= <t[1,0]+8|t[0,0]+8>
-	SMULWT  r5, r7, r4		; r5 = t[1,7]=OC_C1S7*x[1,1]>>16
-	SMULWT  r7, r7, r10		; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
-	SMULWT	r10,r6, r10		; r10= t[0,4]=OC_C7S1*x[0,1]>>16
-	PKHBT	r7, r7, r5, LSL #16	; r7 = <t[1,7]|t[0,7]>
-	SMULWT	r4, r6, r4		; r4 = t[1,4]=OC_C7S1*x[1,1]>>16
-; Stage 2:
-	SMULWB	r6, r11,r7		; r6 = t[0,6]=OC_C4S4*t[0,7]>>16
-	PKHBT	r4, r10,r4, LSL #16	; r4 = <t[1,4]|t[0,4]>
-	SMULWT	r10,r11,r7		; r10= t[1,6]=OC_C4S4*t[1,7]>>16
-	SMULWB	r5, r11,r4		; r5 = t[0,5]=OC_C4S4*t[0,4]>>16
-	PKHBT	r6, r6, r10,LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWT	r10,r11,r4		; r10= t[1,5]=OC_C4S4*t[1,4]>>16
-; Stage 3:
-	B	idct4_4core_down_stage3_v6
-	ENDP
- ]
-
-idct4_3core_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-; Stage 1:
-	LDRD	r10,[r1], #16	; r10= <x[0,1]|x[0,0]>; r11= <x[0,3]|x[0,2]>
-	LDRD	r2, OC_C5S3_4_v6	; r2 = OC_C5S3; r3 = OC_C3S5
-	LDRD	r4, [r1], #16		; r4 = <x[1,1]|x[1,0]>; r5 = <??|x[1,2]>
-	SMULWT	r9, r3, r11		; r9 = t[0,6]=OC_C3S5*x[0,3]>>16
-	SMULWT	r8, r2, r11		; r8 = -t[0,5]=OC_C5S3*x[0,3]>>16
-	PKHBT	r9, r9, r2		; r9 = <0|t[0,6]>
-	LDRD	r6, OC_C6S2_4_v6	; r6 = OC_C6S2; r7 = OC_C2S6
-	PKHBT	r8, r8, r2		; r9 = <0|-t[0,5]>
-	SMULWB	r3, r7, r11		; r3 = t[0,3]=OC_C2S6*x[0,2]>>16
-	SMULWB	r2, r6, r11		; r2 = t[0,2]=OC_C6S2*x[0,2]>>16
-	LDR	r11,OC_C4S4
-	SMULWB	r12,r7, r5		; r12= t[1,3]=OC_C2S6*x[1,2]>>16
-	SMULWB	r5, r6, r5		; r5 = t[1,2]=OC_C6S2*x[1,2]>>16
-	PKHBT	r3, r3, r12,LSL #16	; r3 = <t[1,3]|t[0,3]>
-	SMULWB	r12,r11,r10		; r12= t[0,0]=OC_C4S4*x[0,0]>>16
-	PKHBT	r2, r2, r5, LSL #16	; r2 = <t[1,2]|t[0,2]>
-	SMULWB	r5, r11,r4		; r5 = t[1,0]=OC_C4S4*x[1,0]>>16
-	LDRD	r6, OC_C7S1_4_v6	; r6 = OC_C7S1; r7 = OC_C1S7
-	PKHBT	r12,r12,r5, LSL #16	; r12= <t[1,0]|t[0,0]>
-	SMULWT  r5, r7, r4		; r5 = t[1,7]=OC_C1S7*x[1,1]>>16
-	SMULWT  r7, r7, r10		; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
-	SMULWT	r10,r6, r10		; r10= t[0,4]=OC_C7S1*x[0,1]>>16
-	PKHBT	r7, r7, r5, LSL #16	; r7 = <t[1,7]|t[0,7]>
-	SMULWT	r4, r6, r4		; r4 = t[1,4]=OC_C7S1*x[1,1]>>16
-; Stage 2:
-	SSUB16	r6, r7, r9		; r6 = t[7]-t[6]
-	PKHBT	r4, r10,r4, LSL #16	; r4 = <t[1,4]|t[0,4]>
-	SADD16	r7, r7, r9		; r7 = t[7]=t[7]+t[6]
-	SMULWT	r9, r11,r6		; r9 = t[1,6]=OC_C4S4*r6T>>16
-	SADD16	r5, r4, r8		; r5 = t[4]-t[5]
-	SMULWB	r6, r11,r6		; r6 = t[0,6]=OC_C4S4*r6B>>16
-	SSUB16	r4, r4, r8		; r4 = t[4]=t[4]+t[5]
-	SMULWT	r10,r11,r5		; r10= t[1,5]=OC_C4S4*r5T>>16
-	PKHBT	r6, r6, r9, LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWB	r5, r11,r5		; r5 = t[0,5]=OC_C4S4*r5B>>16
-; Stage 3:
-idct4_3core_stage3_v6
-	SADD16	r11,r12,r2		; r11= t[1]=t[0]+t[2]
-	PKHBT	r10,r5, r10,LSL #16	; r10= <t[1,5]|t[0,5]>
-	SSUB16	r2, r12,r2		; r2 = t[2]=t[0]-t[2]
-idct4_3core_stage3_5_v6
-	SSUB16	r5, r6, r10		; r5 = t[5]'=t[6]-t[5]
-	SADD16	r6, r6, r10		; r6 = t[6]=t[6]+t[5]
-	SADD16	r10,r12,r3		; r10= t[0]'=t[0]+t[3]
-	SSUB16	r3, r12,r3		; r3 = t[3]=t[0]-t[3]
-; Stage 4:
-	SADD16	r12,r10,r7		; r12= t[0]+t[7]
-	STR	r12,[r0], #4		; y[0<<3] = t[0]+t[7]
-	SADD16	r12,r11,r6		; r12= t[1]+t[6]
-	STR	r12,[r0, #12]		; y[1<<3] = t[1]+t[6]
-	SADD16	r12,r2, r5		; r12= t[2]+t[5]
-	STR	r12,[r0, #28]		; y[2<<3] = t[2]+t[5]
-	SADD16	r12,r3, r4		; r12= t[3]+t[4]
-	STR	r12,[r0, #44]		; y[3<<3] = t[3]+t[4]
-	SSUB16	r4, r3, r4		; r4 = t[3]-t[4]
-	STR	r4, [r0, #60]		; y[4<<3] = t[3]-t[4]
-	SSUB16	r5, r2, r5		; r5 = t[2]-t[5]
-	STR	r5, [r0, #76]		; y[5<<3] = t[2]-t[5]
-	SSUB16	r6, r11,r6		; r6 = t[1]-t[6]
-	STR	r6, [r0, #92]		; y[6<<3] = t[1]-t[6]
-	SSUB16	r7, r10,r7		; r7 = t[0]-t[7]
-	STR	r7, [r0, #108]		; y[7<<3] = t[0]-t[7]
-	MOV	PC,r14
-	ENDP
-
-; Another copy so the LDRD offsets are less than +/- 255.
-	ALIGN 8
-OC_C7S1_4_v6
-	DCD	12785 ; 31F1
-OC_C1S7_4_v6
-	DCD	64277 ; FB15
-OC_C6S2_4_v6
-	DCD	25080 ; 61F8
-OC_C2S6_4_v6
-	DCD	60547 ; EC83
-OC_C5S3_4_v6
-	DCD	36410 ; 8E3A
-OC_C3S5_4_v6
-	DCD	54491 ; D4DB
-
-idct4_4core_down_v6 PROC
-	; r0 =       ogg_int16_t *_y (destination)
-	; r1 = const ogg_int16_t *_x (source)
-; Stage 1:
-	LDRD	r10,[r1], #16	; r10= <x[0,1]|x[0,0]>; r11= <x[0,3]|x[0,2]>
-	LDRD	r2, OC_C5S3_4_v6	; r2 = OC_C5S3; r3 = OC_C3S5
-	LDRD	r4, [r1], #16	; r4 = <x[1,1]|x[1,0]>; r5 = <x[1,3]|x[1,2]>
-	SMULWT	r9, r3, r11		; r9 = t[0,6]=OC_C3S5*x[0,3]>>16
-	LDRD	r6, OC_C6S2_4_v6	; r6 = OC_C6S2; r7 = OC_C2S6
-	SMULWT	r8, r2, r11		; r8 = -t[0,5]=OC_C5S3*x[0,3]>>16
-; Here we cheat: row 3 had just a DC, so x[0,3]==x[1,3] by definition.
-	PKHBT	r9, r9, r9, LSL #16	; r9 = <t[0,6]|t[0,6]>
-	SMULWB	r3, r7, r11		; r3 = t[0,3]=OC_C2S6*x[0,2]>>16
-	PKHBT	r8, r8, r8, LSL #16	; r8 = <-t[0,5]|-t[0,5]>
-	SMULWB	r2, r6, r11		; r2 = t[0,2]=OC_C6S2*x[0,2]>>16
-	LDR	r11,OC_C4S4
-	SMULWB	r12,r7, r5		; r12= t[1,3]=OC_C2S6*x[1,2]>>16
-	MOV	r7,#8
-	SMULWB	r5, r6, r5		; r5 = t[1,2]=OC_C6S2*x[1,2]>>16
-	PKHBT	r3, r3, r12,LSL #16	; r3 = <t[1,3]|t[0,3]>
-	SMLAWB	r12,r11,r10,r7		; r12= t[0,0]+8=(OC_C4S4*x[0,0]>>16)+8
-	PKHBT	r2, r2, r5, LSL #16	; r2 = <t[1,2]|t[0,2]>
-	SMLAWB	r5, r11,r4 ,r7		; r5 = t[1,0]+8=(OC_C4S4*x[1,0]>>16)+8
-	LDRD	r6, OC_C7S1_4_v6	; r6 = OC_C7S1; r7 = OC_C1S7
-	PKHBT	r12,r12,r5, LSL #16	; r12= <t[1,0]+8|t[0,0]+8>
-	SMULWT  r5, r7, r4		; r5 = t[1,7]=OC_C1S7*x[1,1]>>16
-	SMULWT  r7, r7, r10		; r7 = t[0,7]=OC_C1S7*x[0,1]>>16
-	SMULWT	r10,r6, r10		; r10= t[0,4]=OC_C7S1*x[0,1]>>16
-	PKHBT	r7, r7, r5, LSL #16	; r7 = <t[1,7]|t[0,7]>
-	SMULWT	r4, r6, r4		; r4 = t[1,4]=OC_C7S1*x[1,1]>>16
-; Stage 2:
-	SSUB16	r6, r7, r9		; r6 = t[7]-t[6]
-	PKHBT	r4, r10,r4, LSL #16	; r4 = <t[1,4]|t[0,4]>
-	SADD16	r7, r7, r9		; r7 = t[7]=t[7]+t[6]
-	SMULWT	r9, r11,r6		; r9 = t[1,6]=OC_C4S4*r6T>>16
-	SADD16	r5, r4, r8		; r5 = t[4]-t[5]
-	SMULWB	r6, r11,r6		; r6 = t[0,6]=OC_C4S4*r6B>>16
-	SSUB16	r4, r4, r8		; r4 = t[4]=t[4]+t[5]
-	SMULWT	r10,r11,r5		; r10= t[1,5]=OC_C4S4*r5T>>16
-	PKHBT	r6, r6, r9, LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWB	r5, r11,r5		; r5 = t[0,5]=OC_C4S4*r5B>>16
-; Stage 3:
-idct4_4core_down_stage3_v6
-	SADD16	r11,r12,r2		; r11= t[1]+8=t[0]+t[2]+8
-	PKHBT	r10,r5, r10,LSL #16	; r10= <t[1,5]|t[0,5]>
-	SSUB16	r2, r12,r2		; r2 = t[2]+8=t[0]-t[2]+8
-	B	idct8_8core_down_stage3_5_v6
-	ENDP
-
-idct8_8core_v6 PROC
-	STMFD	r13!,{r0,r14}
-; Stage 1:
-	;5-6 rotation by 3pi/16
-	LDRD	r10,OC_C5S3_4_v6	; r10= OC_C5S3, r11= OC_C3S5
-	LDR	r4, [r1,#8]		; r4 = <x[0,5]|x[0,4]>
-	LDR	r7, [r1,#24]		; r7 = <x[1,5]|x[1,4]>
-	SMULWT	r5, r11,r4		; r5 = OC_C3S5*x[0,5]>>16
-	LDR	r0, [r1,#4]		; r0 = <x[0,3]|x[0,2]>
-	SMULWT	r3, r11,r7		; r3 = OC_C3S5*x[1,5]>>16
-	LDR	r12,[r1,#20]		; r12= <x[1,3]|x[1,2]>
-	SMULWT	r6, r11,r0		; r6 = OC_C3S5*x[0,3]>>16
-	SMULWT	r11,r11,r12		; r11= OC_C3S5*x[1,3]>>16
-	SMLAWT	r6, r10,r4, r6		; r6 = t[0,6]=r6+(OC_C5S3*x[0,5]>>16)
-	PKHBT	r5, r5, r3, LSL #16	; r5 = <r3|r5>
-	SMLAWT	r11,r10,r7, r11		; r11= t[1,6]=r11+(OC_C5S3*x[1,5]>>16)
-	PKHBT	r4, r4, r7, LSL #16	; r4 = <x[1,4]|x[0,4]>
-	SMULWT	r3, r10,r0		; r3 = OC_C5S3*x[0,3]>>16
-	PKHBT	r6, r6, r11,LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWT	r8, r10,r12		; r8 = OC_C5S3*x[1,3]>>16
-	;2-3 rotation by 6pi/16
-	LDRD	r10,OC_C6S2_4_v6	; r10= OC_C6S2, r11= OC_C2S6
-	PKHBT	r3, r3, r8, LSL #16	; r3 = <r8|r3>
-	LDR	r8, [r1,#12]		; r8 = <x[0,7]|x[0,6]>
-	SMULWB	r2, r10,r0		; r2 = OC_C6S2*x[0,2]>>16
-	SSUB16	r5, r5, r3		; r5 = <t[1,5]|t[0,5]>
-	SMULWB	r9, r10,r12		; r9 = OC_C6S2*x[1,2]>>16
-	LDR	r7, [r1,#28]		; r7 = <x[1,7]|x[1,6]>
-	SMULWB	r3, r10,r8		; r3 = OC_C6S2*x[0,6]>>16
-	SMULWB	r10,r10,r7		; r10= OC_C6S2*x[1,6]>>16
-	PKHBT	r2, r2, r9, LSL #16	; r2 = <r2|r9>
-	SMLAWB	r3, r11,r0, r3		; r3 = t[0,3]=r3+(OC_C2S6*x[0,2]>>16)
-	SMLAWB	r10,r11,r12,r10		; r10= t[1,3]=r10+(OC_C2S6*x[1,2]>>16)
-	SMULWB	r9, r11,r8		; r9 = OC_C2S6*x[0,6]>>16
-	PKHBT	r3, r3, r10,LSL #16	; r3 = <t[1,6]|t[0,6]>
-	SMULWB	r12,r11,r7		; r12= OC_C2S6*x[1,6]>>16
-	;4-7 rotation by 7pi/16
-	LDRD	r10,OC_C7S1_8_v6	; r10= OC_C7S1, r11= OC_C1S7
-	PKHBT	r9, r9, r12,LSL #16	; r9 = <r9|r12>
-	LDR	r0, [r1],#16		; r0 = <x[0,1]|x[0,0]>
-	PKHTB	r7, r7, r8, ASR #16	; r7 = <x[1,7]|x[0,7]>
-	SSUB16	r2, r2, r9		; r2 = <t[1,2]|t[0,2]>
-	SMULWB	r9, r10,r7		; r9 = OC_C7S1*x[0,7]>>16
-	LDR	r14,[r1],#16		; r14= <x[1,1]|x[1,0]>
-	SMULWT	r12,r10,r7		; r12= OC_C7S1*x[1,7]>>16
-	SMULWT	r8, r10,r0		; r8 = OC_C7S1*x[0,1]>>16
-	SMULWT	r10,r10,r14		; r10= OC_C7S1*x[1,1]>>16
-	SMLAWT	r9, r11,r0, r9		; r9 = t[0,7]=r9+(OC_C1S7*x[0,1]>>16)
-	PKHBT	r8, r8, r10,LSL #16	; r8 = <r12|r8>
-	SMLAWT	r12,r11,r14,r12		; r12= t[1,7]=r12+(OC_C1S7*x[1,1]>>16)
-	PKHBT	r0, r0, r14,LSL #16	; r0 = <x[1,0]|x[0,0]>
-	SMULWB	r10,r11,r7		; r10= OC_C1S7*x[0,6]>>16
-	PKHBT	r9, r9, r12,LSL #16	; r9 = <t[1,7]|t[0,7]>
-	SMULWT	r12,r11,r7		; r12= OC_C1S7*x[1,6]>>16
-	;0-1 butterfly
-	LDR	r11,OC_C4S4
-	PKHBT	r10,r10,r12,LSL #16	; r10= <r12|r10>
-	SADD16	r7, r0, r4		; r7 = x[0]+x[4]
-	SSUB16	r10,r8, r10		; r10= <t[1,4]|t[0,4]>
-	SSUB16	r4, r0, r4		; r4 = x[0]-x[4]
-	SMULWB	r8, r11,r7		; r8 = t[0,0]=OC_C4S4*r7B>>16
-	SMULWT	r12,r11,r7		; r12= t[1,0]=OC_C4S4*r7T>>16
-	SMULWB	r7, r11,r4		; r7 = t[0,1]=OC_C4S4*r4B>>16
-	PKHBT	r12,r8, r12,LSL #16	; r12= <t[1,0]|t[0,0]>
-	SMULWT	r8, r11,r4		; r8 = t[1,1]=OC_C4S4*r4T>>16
-; Stage 2:
-	SADD16	r4, r10,r5		; r4 = t[4]'=t[4]+t[5]
-	PKHBT	r8, r7, r8, LSL #16	; r8 = <t[1,0]|t[0,0]>
-	SSUB16	r5, r10,r5		; r5 = t[4]-t[5]
-	SMULWB	r10,r11,r5		; r10= t[0,5]=OC_C4S4*r5B>>16
-	SADD16	r7, r9, r6		; r7 = t[7]'=t[7]+t[6]
-	SMULWT	r5, r11,r5		; r5 = t[1,5]=OC_C4S4*r5T>>16
-	SSUB16	r6, r9, r6		; r6 = t[7]-t[6]
-	SMULWB	r9, r11,r6		; r9 = t[0,6]=OC_C4S4*r6B>>16
-	PKHBT	r10,r10,r5, LSL #16	; r10= <t[1,5]|t[0,5]>
-	SMULWT	r6, r11,r6		; r6 = t[1,6]=OC_C4S4*r6T>>16
-; Stage 3:
-	SADD16	r11,r8, r2		; r11= t[1]'=t[1]+t[2]
-	PKHBT	r6, r9, r6, LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SSUB16	r2, r8, r2		; r2 = t[2]=t[1]-t[2]
-	LDMFD	r13!,{r0,r14}
-	B	idct4_3core_stage3_5_v6
-	ENDP
-
-; Another copy so the LDRD offsets are less than +/- 255.
-	ALIGN 8
-OC_C7S1_8_v6
-	DCD	12785 ; 31F1
-OC_C1S7_8_v6
-	DCD	64277 ; FB15
-OC_C6S2_8_v6
-	DCD	25080 ; 61F8
-OC_C2S6_8_v6
-	DCD	60547 ; EC83
-OC_C5S3_8_v6
-	DCD	36410 ; 8E3A
-OC_C3S5_8_v6
-	DCD	54491 ; D4DB
-
-idct8_8core_down_v6 PROC
-	STMFD	r13!,{r0,r14}
-; Stage 1:
-	;5-6 rotation by 3pi/16
-	LDRD	r10,OC_C5S3_8_v6	; r10= OC_C5S3, r11= OC_C3S5
-	LDR	r4, [r1,#8]		; r4 = <x[0,5]|x[0,4]>
-	LDR	r7, [r1,#24]		; r7 = <x[1,5]|x[1,4]>
-	SMULWT	r5, r11,r4		; r5 = OC_C3S5*x[0,5]>>16
-	LDR	r0, [r1,#4]		; r0 = <x[0,3]|x[0,2]>
-	SMULWT	r3, r11,r7		; r3 = OC_C3S5*x[1,5]>>16
-	LDR	r12,[r1,#20]		; r12= <x[1,3]|x[1,2]>
-	SMULWT	r6, r11,r0		; r6 = OC_C3S5*x[0,3]>>16
-	SMULWT	r11,r11,r12		; r11= OC_C3S5*x[1,3]>>16
-	SMLAWT	r6, r10,r4, r6		; r6 = t[0,6]=r6+(OC_C5S3*x[0,5]>>16)
-	PKHBT	r5, r5, r3, LSL #16	; r5 = <r3|r5>
-	SMLAWT	r11,r10,r7, r11		; r11= t[1,6]=r11+(OC_C5S3*x[1,5]>>16)
-	PKHBT	r4, r4, r7, LSL #16	; r4 = <x[1,4]|x[0,4]>
-	SMULWT	r3, r10,r0		; r3 = OC_C5S3*x[0,3]>>16
-	PKHBT	r6, r6, r11,LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SMULWT	r8, r10,r12		; r8 = OC_C5S3*x[1,3]>>16
-	;2-3 rotation by 6pi/16
-	LDRD	r10,OC_C6S2_8_v6	; r10= OC_C6S2, r11= OC_C2S6
-	PKHBT	r3, r3, r8, LSL #16	; r3 = <r8|r3>
-	LDR	r8, [r1,#12]		; r8 = <x[0,7]|x[0,6]>
-	SMULWB	r2, r10,r0		; r2 = OC_C6S2*x[0,2]>>16
-	SSUB16	r5, r5, r3		; r5 = <t[1,5]|t[0,5]>
-	SMULWB	r9, r10,r12		; r9 = OC_C6S2*x[1,2]>>16
-	LDR	r7, [r1,#28]		; r7 = <x[1,7]|x[1,6]>
-	SMULWB	r3, r10,r8		; r3 = OC_C6S2*x[0,6]>>16
-	SMULWB	r10,r10,r7		; r10= OC_C6S2*x[1,6]>>16
-	PKHBT	r2, r2, r9, LSL #16	; r2 = <r2|r9>
-	SMLAWB	r3, r11,r0, r3		; r3 = t[0,3]=r3+(OC_C2S6*x[0,2]>>16)
-	SMLAWB	r10,r11,r12,r10		; r10= t[1,3]=r10+(OC_C2S6*x[1,2]>>16)
-	SMULWB	r9, r11,r8		; r9 = OC_C2S6*x[0,6]>>16
-	PKHBT	r3, r3, r10,LSL #16	; r3 = <t[1,6]|t[0,6]>
-	SMULWB	r12,r11,r7		; r12= OC_C2S6*x[1,6]>>16
-	;4-7 rotation by 7pi/16
-	LDRD	r10,OC_C7S1_8_v6	; r10= OC_C7S1, r11= OC_C1S7
-	PKHBT	r9, r9, r12,LSL #16	; r9 = <r9|r12>
-	LDR	r0, [r1],#16		; r0 = <x[0,1]|x[0,0]>
-	PKHTB	r7, r7, r8, ASR #16	; r7 = <x[1,7]|x[0,7]>
-	SSUB16	r2, r2, r9		; r2 = <t[1,2]|t[0,2]>
-	SMULWB	r9, r10,r7		; r9 = OC_C7S1*x[0,7]>>16
-	LDR	r14,[r1],#16		; r14= <x[1,1]|x[1,0]>
-	SMULWT	r12,r10,r7		; r12= OC_C7S1*x[1,7]>>16
-	SMULWT	r8, r10,r0		; r8 = OC_C7S1*x[0,1]>>16
-	SMULWT	r10,r10,r14		; r10= OC_C7S1*x[1,1]>>16
-	SMLAWT	r9, r11,r0, r9		; r9 = t[0,7]=r9+(OC_C1S7*x[0,1]>>16)
-	PKHBT	r8, r8, r10,LSL #16	; r8 = <r12|r8>
-	SMLAWT	r12,r11,r14,r12		; r12= t[1,7]=r12+(OC_C1S7*x[1,1]>>16)
-	PKHBT	r0, r0, r14,LSL #16	; r0 = <x[1,0]|x[0,0]>
-	SMULWB	r10,r11,r7		; r10= OC_C1S7*x[0,6]>>16
-	PKHBT	r9, r9, r12,LSL #16	; r9 = <t[1,7]|t[0,7]>
-	SMULWT	r12,r11,r7		; r12= OC_C1S7*x[1,6]>>16
-	;0-1 butterfly
-	LDR	r11,OC_C4S4
-	MOV	r14,#8
-	PKHBT	r10,r10,r12,LSL #16	; r10= <r12|r10>
-	SADD16	r7, r0, r4		; r7 = x[0]+x[4]
-	SSUB16	r10,r8, r10		; r10= <t[1,4]|t[0,4]>
-	SMLAWB	r8, r11,r7, r14		; r8 = t[0,0]+8=(OC_C4S4*r7B>>16)+8
-	SSUB16	r4, r0, r4		; r4 = x[0]-x[4]
-	SMLAWT	r12,r11,r7, r14		; r12= t[1,0]+8=(OC_C4S4*r7T>>16)+8
-	SMLAWB	r7, r11,r4, r14		; r7 = t[0,1]+8=(OC_C4S4*r4B>>16)+8
-	PKHBT	r12,r8, r12,LSL #16	; r12= <t[1,0]+8|t[0,0]+8>
-	SMLAWT	r8, r11,r4, r14		; r8 = t[1,1]+8=(OC_C4S4*r4T>>16)+8
-; Stage 2:
-	SADD16	r4, r10,r5		; r4 = t[4]'=t[4]+t[5]
-	PKHBT	r8, r7, r8, LSL #16	; r8 = <t[1,0]+8|t[0,0]+8>
-	SSUB16	r5, r10,r5		; r5 = t[4]-t[5]
-	SMULWB	r10,r11,r5		; r10= t[0,5]=OC_C4S4*r5B>>16
-	SADD16	r7, r9, r6		; r7 = t[7]'=t[7]+t[6]
-	SMULWT	r5, r11,r5		; r5 = t[1,5]=OC_C4S4*r5T>>16
-	SSUB16	r6, r9, r6		; r6 = t[7]-t[6]
-	SMULWB	r9, r11,r6		; r9 = t[0,6]=OC_C4S4*r6B>>16
-	PKHBT	r10,r10,r5, LSL #16	; r10= <t[1,5]|t[0,5]>
-	SMULWT	r6, r11,r6		; r6 = t[1,6]=OC_C4S4*r6T>>16
-; Stage 3:
-	SADD16	r11,r8, r2		; r11= t[1]'+8=t[1]+t[2]+8
-	PKHBT	r6, r9, r6, LSL #16	; r6 = <t[1,6]|t[0,6]>
-	SSUB16	r2, r8, r2		; r2 = t[2]+8=t[1]-t[2]+8
-	LDMFD	r13!,{r0,r14}
-idct8_8core_down_stage3_5_v6
-	SSUB16	r5, r6, r10		; r5 = t[5]'=t[6]-t[5]
-	SADD16	r6, r6, r10		; r6 = t[6]=t[6]+t[5]
-	SADD16	r10,r12,r3		; r10= t[0]'+8=t[0]+t[3]+8
-	SSUB16	r3, r12,r3		; r3 = t[3]+8=t[0]-t[3]+8
-; Stage 4:
-	SADD16	r12,r10,r7		; r12= t[0]+t[7]+8
-	SSUB16	r7, r10,r7		; r7 = t[0]-t[7]+8
-	MOV	r10,r12,ASR #4
-	MOV	r12,r12,LSL #16
-	PKHTB	r10,r10,r12,ASR #20	; r10= t[0]+t[7]+8>>4
-	STR	r10,[r0], #4		; y[0<<3] = t[0]+t[7]+8>>4
-	SADD16	r12,r11,r6		; r12= t[1]+t[6]+8
-	SSUB16	r6, r11,r6		; r6 = t[1]-t[6]+8
-	MOV	r10,r12,ASR #4
-	MOV	r12,r12,LSL #16
-	PKHTB	r10,r10,r12,ASR #20	; r10= t[1]+t[6]+8>>4
-	STR	r10,[r0, #12]		; y[1<<3] = t[1]+t[6]+8>>4
-	SADD16	r12,r2, r5		; r12= t[2]+t[5]+8
-	SSUB16	r5, r2, r5		; r5 = t[2]-t[5]+8
-	MOV	r10,r12,ASR #4
-	MOV	r12,r12,LSL #16
-	PKHTB	r10,r10,r12,ASR #20	; r10= t[2]+t[5]+8>>4
-	STR	r10,[r0, #28]		; y[2<<3] = t[2]+t[5]+8>>4
-	SADD16	r12,r3, r4		; r12= t[3]+t[4]+8
-	SSUB16	r4, r3, r4		; r4 = t[3]-t[4]+8
-	MOV	r10,r12,ASR #4
-	MOV	r12,r12,LSL #16
-	PKHTB	r10,r10,r12,ASR #20	; r10= t[3]+t[4]+8>>4
-	STR	r10,[r0, #44]		; y[3<<3] = t[3]+t[4]+8>>4
-	MOV	r10,r4, ASR #4
-	MOV	r4, r4, LSL #16
-	PKHTB	r10,r10,r4, ASR #20	; r10= t[3]-t[4]+8>>4
-	STR	r10,[r0, #60]		; y[4<<3] = t[3]-t[4]+8>>4
-	MOV	r10,r5, ASR #4
-	MOV	r5, r5, LSL #16
-	PKHTB	r10,r10,r5, ASR #20	; r10= t[2]-t[5]+8>>4
-	STR	r10,[r0, #76]		; y[5<<3] = t[2]-t[5]+8>>4
-	MOV	r10,r6, ASR #4
-	MOV	r6, r6, LSL #16
-	PKHTB	r10,r10,r6, ASR #20	; r10= t[1]-t[6]+8>>4
-	STR	r10,[r0, #92]		; y[6<<3] = t[1]-t[6]+8>>4
-	MOV	r10,r7, ASR #4
-	MOV	r7, r7, LSL #16
-	PKHTB	r10,r10,r7, ASR #20	; r10= t[0]-t[7]+8>>4
-	STR	r10,[r0, #108]		; y[7<<3] = t[0]-t[7]+8>>4
-	MOV	PC,r14
-	ENDP
- ]
-
- [ OC_ARM_ASM_NEON
-	EXPORT	oc_idct8x8_1_neon
-	EXPORT	oc_idct8x8_neon
-
-	ALIGN 16
-OC_IDCT_CONSTS_NEON
-	DCW	    8
-	DCW	64277 ; FB15 (C1S7)
-	DCW	60547 ; EC83 (C2S6)
-	DCW	54491 ; D4DB (C3S5)
-	DCW	46341 ; B505 (C4S4)
-	DCW	36410 ; 471D (C5S3)
-	DCW	25080 ; 30FC (C6S2)
-	DCW	12785 ; 31F1 (C7S1)
-
-oc_idct8x8_1_neon PROC
-	; r0 = ogg_int16_t  *_y
-	; r1 = ogg_uint16_t  _dc
-	VDUP.S16	Q0, r1
-	VMOV		Q1, Q0
-	VST1.64		{D0, D1, D2, D3}, [r0@128]!
-	VST1.64		{D0, D1, D2, D3}, [r0@128]!
-	VST1.64		{D0, D1, D2, D3}, [r0@128]!
-	VST1.64		{D0, D1, D2, D3}, [r0@128]
-	MOV	PC, r14
-	ENDP
-
-oc_idct8x8_neon PROC
-	; r0 = ogg_int16_t *_y
-	; r1 = ogg_int16_t *_x
-	; r2 = int          _last_zzi
-	CMP	r2, #10
-	BLE	oc_idct8x8_10_neon
-oc_idct8x8_slow_neon
-	VPUSH		{D8-D15}
-	MOV	r2, r1
-	ADR	r3, OC_IDCT_CONSTS_NEON
-	; Row transforms (input is pre-transposed)
-	VLD1.64		{D16,D17,D18,D19}, [r2@128]!
-	VLD1.64		{D20,D21,D22,D23}, [r2@128]!
-	VLD1.64		{D24,D25,D26,D27}, [r2@128]!
-	VSUB.S16	Q1, Q8, Q12	; Q8 = x[0]-x[4]
-	VLD1.64		{D28,D29,D30,D31}, [r2@128]
-	VADD.S16	Q8, Q8, Q12	; Q1 = x[0]+x[4]
-	VLD1.64		{D0,D1},           [r3@128]
-	MOV	r12, r14
-	BL	oc_idct8x8_stage123_neon
-; Stage 4
-	VSUB.S16	Q15,Q8, Q7	; Q15 = y[7]=t[0]'-t[7]'
-	VADD.S16	Q8, Q8, Q7	; Q8  = y[0]=t[0]'+t[7]'
-	VSUB.S16	Q14,Q9, Q3	; Q14 = y[6]=t[1]'-t[6]''
-	VADD.S16	Q9, Q9, Q3	; Q9  = y[1]=t[1]'+t[6]''
-	VSUB.S16	Q13,Q10,Q5	; Q13 = y[5]=t[2]'-t[5]''
-	VADD.S16	Q10,Q10,Q5	; Q10 = y[2]=t[2]'+t[5]''
-	VTRN.16		Q14,Q15
-	VSUB.S16	Q12,Q11,Q4	; Q12 = y[4]=t[3]'-t[4]'
-	VADD.S16	Q11,Q11,Q4	; Q11 = y[3]=t[3]'+t[4]'
-	; 8x8 Transpose
-	VTRN.16		Q8, Q9
-	VTRN.16		Q10,Q11
-	VTRN.16		Q12,Q13
-	VTRN.32		Q8, Q10
-	VTRN.32		Q9, Q11
-	VTRN.32		Q12,Q14
-	VTRN.32		Q13,Q15
-	VSWP		D17,D24
-	VSUB.S16	Q1, Q8, Q12	; Q8 = x[0]-x[4]
-	VSWP		D19,D26
-	VADD.S16	Q8, Q8, Q12	; Q1 = x[0]+x[4]
-	VSWP		D21,D28
-	VSWP		D23,D30
-	; Column transforms
-	BL	oc_idct8x8_stage123_neon
-	CMP	r0,r1
-	; We have to put the return address back in the LR, or the branch
-	;  predictor will not recognize the function return and mis-predict the
-	;  entire call stack.
-	MOV	r14, r12
-; Stage 4
-	VSUB.S16	Q15,Q8, Q7	; Q15 = y[7]=t[0]'-t[7]'
-	VADD.S16	Q8, Q8, Q7	; Q8  = y[0]=t[0]'+t[7]'
-	VSUB.S16	Q14,Q9, Q3	; Q14 = y[6]=t[1]'-t[6]''
-	VADD.S16	Q9, Q9, Q3	; Q9  = y[1]=t[1]'+t[6]''
-	VSUB.S16	Q13,Q10,Q5	; Q13 = y[5]=t[2]'-t[5]''
-	VADD.S16	Q10,Q10,Q5	; Q10 = y[2]=t[2]'+t[5]''
-	VSUB.S16	Q12,Q11,Q4	; Q12 = y[4]=t[3]'-t[4]'
-	VADD.S16	Q11,Q11,Q4	; Q11 = y[3]=t[3]'+t[4]'
-	BEQ		oc_idct8x8_slow_neon_noclear
-	VMOV.I8		Q2,#0
-	VPOP		{D8-D15}
-	VMOV.I8		Q3,#0
-	VRSHR.S16	Q8, Q8, #4	; Q8  = y[0]+8>>4
-	VST1.64		{D4, D5, D6, D7}, [r1@128]!
-	VRSHR.S16	Q9, Q9, #4	; Q9  = y[1]+8>>4
-	VRSHR.S16	Q10,Q10,#4	; Q10 = y[2]+8>>4
-	VST1.64		{D4, D5, D6, D7}, [r1@128]!
-	VRSHR.S16	Q11,Q11,#4	; Q11 = y[3]+8>>4
-	VRSHR.S16	Q12,Q12,#4	; Q12 = y[4]+8>>4
-	VST1.64		{D4, D5, D6, D7}, [r1@128]!
-	VRSHR.S16	Q13,Q13,#4	; Q13 = y[5]+8>>4
-	VRSHR.S16	Q14,Q14,#4	; Q14 = y[6]+8>>4
-	VST1.64		{D4, D5, D6, D7}, [r1@128]
-	VRSHR.S16	Q15,Q15,#4	; Q15 = y[7]+8>>4
-	VSTMIA		r0, {D16-D31}
-	MOV	PC, r14
-
-oc_idct8x8_slow_neon_noclear
-	VPOP		{D8-D15}
-	VRSHR.S16	Q8, Q8, #4	; Q8  = y[0]+8>>4
-	VRSHR.S16	Q9, Q9, #4	; Q9  = y[1]+8>>4
-	VRSHR.S16	Q10,Q10,#4	; Q10 = y[2]+8>>4
-	VRSHR.S16	Q11,Q11,#4	; Q11 = y[3]+8>>4
-	VRSHR.S16	Q12,Q12,#4	; Q12 = y[4]+8>>4
-	VRSHR.S16	Q13,Q13,#4	; Q13 = y[5]+8>>4
-	VRSHR.S16	Q14,Q14,#4	; Q14 = y[6]+8>>4
-	VRSHR.S16	Q15,Q15,#4	; Q15 = y[7]+8>>4
-	VSTMIA		r0, {D16-D31}
-	MOV	PC, r14
-	ENDP
-
-oc_idct8x8_stage123_neon PROC
-; Stages 1 & 2
-	VMULL.S16	Q4, D18,D1[3]
-	VMULL.S16	Q5, D19,D1[3]
-	VMULL.S16	Q7, D30,D1[3]
-	VMULL.S16	Q6, D31,D1[3]
-	VMULL.S16	Q2, D30,D0[1]
-	VMULL.S16	Q3, D31,D0[1]
-	VSHRN.S32	D8, Q4, #16
-	VSHRN.S32	D9, Q5, #16	; Q4 = (OC_C7S1*x[1]>>16)
-	VSHRN.S32	D14,Q7, #16
-	VSHRN.S32	D15,Q6, #16	; Q7 = (OC_C7S1*x[7]>>16)
-	VSHRN.S32	D4, Q2, #16
-	VSHRN.S32	D5, Q3, #16	; Q2 = (OC_C1S7*x[7]>>16)-x[7]
-	VSUB.S16	Q4, Q4, Q15
-	VADD.S16	Q7, Q7, Q9
-	VSUB.S16	Q4, Q4, Q2	; Q4 = t[4]
-	VMULL.S16	Q2, D18,D0[1]
-	VMULL.S16	Q9, D19,D0[1]
-	VMULL.S16	Q5, D26,D0[3]
-	VMULL.S16	Q3, D27,D0[3]
-	VMULL.S16	Q6, D22,D0[3]
-	VMULL.S16	Q12,D23,D0[3]
-	VSHRN.S32	D4, Q2, #16
-	VSHRN.S32	D5, Q9, #16	; Q2 = (OC_C1S7*x[1]>>16)-x[1]
-	VSHRN.S32	D10,Q5, #16
-	VSHRN.S32	D11,Q3, #16	; Q5 = (OC_C3S5*x[5]>>16)-x[5]
-	VSHRN.S32	D12,Q6, #16
-	VSHRN.S32	D13,Q12,#16	; Q6 = (OC_C3S5*x[3]>>16)-x[3]
-	VADD.S16	Q7, Q7, Q2	; Q7 = t[7]
-	VSUB.S16	Q5, Q5, Q11
-	VADD.S16	Q6, Q6, Q11
-	VADD.S16	Q5, Q5, Q13
-	VADD.S16	Q6, Q6, Q13
-	VMULL.S16	Q9, D22,D1[1]
-	VMULL.S16	Q11,D23,D1[1]
-	VMULL.S16	Q15,D26,D1[1]
-	VMULL.S16	Q13,D27,D1[1]
-	VMULL.S16	Q2, D20,D1[2]
-	VMULL.S16	Q12,D21,D1[2]
-	VSHRN.S32	D18,Q9, #16
-	VSHRN.S32	D19,Q11,#16	; Q9 = (OC_C5S3*x[3]>>16)-x[3]
-	VSHRN.S32	D30,Q15,#16
-	VSHRN.S32	D31,Q13,#16	; Q15= (OC_C5S3*x[5]>>16)-x[5]
-	VSHRN.S32	D4, Q2, #16
-	VSHRN.S32	D5, Q12,#16	; Q2 = (OC_C6S2*x[2]>>16)
-	VSUB.S16	Q5, Q5, Q9	; Q5 = t[5]
-	VADD.S16	Q6, Q6, Q15	; Q6 = t[6]
-	VSUB.S16	Q2, Q2, Q14
-	VMULL.S16	Q3, D28,D1[2]
-	VMULL.S16	Q11,D29,D1[2]
-	VMULL.S16	Q12,D28,D0[2]
-	VMULL.S16	Q9, D29,D0[2]
-	VMULL.S16	Q13,D20,D0[2]
-	VMULL.S16	Q15,D21,D0[2]
-	VSHRN.S32	D6, Q3, #16
-	VSHRN.S32	D7, Q11,#16	; Q3 = (OC_C6S2*x[6]>>16)
-	VSHRN.S32	D24,Q12,#16
-	VSHRN.S32	D25,Q9, #16	; Q12= (OC_C2S6*x[6]>>16)-x[6]
-	VSHRN.S32	D26,Q13,#16
-	VSHRN.S32	D27,Q15,#16	; Q13= (OC_C2S6*x[2]>>16)-x[2]
-	VSUB.S16	Q9, Q4, Q5	; Q9 = t[4]-t[5]
-	VSUB.S16	Q11,Q7, Q6	; Q11= t[7]-t[6]
-	VADD.S16	Q3, Q3, Q10
-	VADD.S16	Q4, Q4, Q5	; Q4 = t[4]'=t[4]+t[5]
-	VADD.S16	Q7, Q7, Q6	; Q7 = t[7]'=t[7]+t[6]
-	VSUB.S16	Q2, Q2, Q12	; Q2 = t[2]
-	VADD.S16	Q3, Q3, Q13	; Q3 = t[3]
-	VMULL.S16	Q12,D16,D1[0]
-	VMULL.S16	Q13,D17,D1[0]
-	VMULL.S16	Q14,D2, D1[0]
-	VMULL.S16	Q15,D3, D1[0]
-	VMULL.S16	Q5, D18,D1[0]
-	VMULL.S16	Q6, D22,D1[0]
-	VSHRN.S32	D24,Q12,#16
-	VSHRN.S32	D25,Q13,#16
-	VSHRN.S32	D28,Q14,#16
-	VSHRN.S32	D29,Q15,#16
-	VMULL.S16	Q13,D19,D1[0]
-	VMULL.S16	Q15,D23,D1[0]
-	VADD.S16	Q8, Q8, Q12	; Q8 = t[0]
-	VADD.S16	Q1, Q1, Q14	; Q1 = t[1]
-	VSHRN.S32	D10,Q5, #16
-	VSHRN.S32	D12,Q6, #16
-	VSHRN.S32	D11,Q13,#16
-	VSHRN.S32	D13,Q15,#16
-	VADD.S16	Q5, Q5, Q9	; Q5 = t[5]'=OC_C4S4*(t[4]-t[5])>>16
-	VADD.S16	Q6, Q6, Q11	; Q6 = t[6]'=OC_C4S4*(t[7]-t[6])>>16
-; Stage 3
-	VSUB.S16	Q11,Q8, Q3	; Q11 = t[3]''=t[0]-t[3]
-	VADD.S16	Q8, Q8, Q3	; Q8  = t[0]''=t[0]+t[3]
-	VADD.S16	Q9, Q1, Q2	; Q9  = t[1]''=t[1]+t[2]
-	VADD.S16	Q3, Q6, Q5	; Q3  = t[6]''=t[6]'+t[5]'
-	VSUB.S16	Q10,Q1, Q2	; Q10 = t[2]''=t[1]-t[2]
-	VSUB.S16	Q5, Q6, Q5	; Q5  = t[5]''=t[6]'-t[5]'
-	MOV	PC, r14
-	ENDP
-
-oc_idct8x8_10_neon PROC
-	ADR	r3, OC_IDCT_CONSTS_NEON
-	VLD1.64		{D0,D1},          [r3@128]
-	MOV	r2, r1
-	; Row transforms (input is pre-transposed)
-; Stage 1
-	VLD1.64		{D16,D17,D18,D19},[r2@128]!
-	MOV	r12, #16
-	VMULL.S16	Q15,D16,D1[0]	; Q15= OC_C4S4*x[0]-(x[0]<<16)
-	VLD1.64		{D17},            [r2@64], r12
-	VMULL.S16	Q2, D18,D0[1]	; Q2 = OC_C1S7*x[1]-(x[1]<<16)
-	VLD1.64		{D19},            [r2@64]
-	VMULL.S16	Q14,D17,D0[2]	; Q14= OC_C2S6*x[2]-(x[2]<<16)
-	VMULL.S16	Q3, D19,D0[3]	; Q3 = OC_C3S5*x[3]-(x[3]<<16)
-	VMULL.S16	Q13,D19,D1[1]	; Q13= OC_C5S3*x[3]-(x[3]<<16)
-	VMULL.S16	Q12,D18,D1[3]	; Q12= OC_C7S1*x[1]
-	VMULL.S16	Q1, D17,D1[2]	; Q1 = OC_C6S2*x[2]
-	VSHRN.S32	D30,Q15,#16	; D30= t[0]-x[0]
-	VSHRN.S32	D4, Q2, #16	; D4 = t[7]-x[1]
-	VSHRN.S32	D31,Q14,#16	; D31= t[3]-x[2]
-	VSHRN.S32	D6, Q3, #16	; D6 = t[6]-x[3]
-	VSHRN.S32	D7, Q13,#16	; D7 = -t[5]-x[3]
-	VSHRN.S32	D5, Q12,#16	; D5 = t[4]
-	VSHRN.S32	D2, Q1, #16	; D2 = t[2]
-	VADD.S16	D4, D4, D18	; D4 = t[7]
-	VADD.S16	D6, D6, D19	; D6 = t[6]
-	VADD.S16	D7, D7, D19	; D7 = -t[5]
-	VADD.S16	Q15,Q15,Q8	; D30= t[0]
-					; D31= t[3]
-; Stages 2 & 3
-	VSUB.S16	Q12,Q2, Q3	; D24= t[7]-t[6]
-					; D25= t[4]'=t[4]+t[5]
-	VADD.S16	Q13,Q2, Q3	; D26= t[7]'=t[7]+t[6]
-					; D27= t[4]-t[5]
-	VMULL.S16	Q11,D24,D1[0]	; Q11= OC_C4S4*(t[7]-t[6])
-					;       -(t[7]-t[6]<<16)
-	VMULL.S16	Q14,D27,D1[0]	; Q14= OC_C4S4*(t[4]-t[5])
-					;       -(t[4]-t[5]<<16)
-	VADD.S16	D16,D30,D31	; D16= t[0]'=t[0]+t[3]
-	VSUB.S16	D17,D30,D2	; D17= t[2]'=t[0]-t[2]
-	VADD.S16	D18,D30,D2	; D18= t[1]'=t[0]+t[2]
-	VSHRN.S32	D22,Q11,#16	; D22= (OC_C4S4*(t[7]-t[6])>>16)
-					;       -(t[7]-t[6])
-	VSHRN.S32	D23,Q14,#16	; D23= (OC_C4S4*(t[4]-t[5])>>16)
-					;       -(t[4]-t[5])
-	VSUB.S16	D19,D30,D31	; D19= t[3]'=t[0]-t[3]
-	VADD.S16	D22,D22,D24	; D22= t[6]'=OC_C4S4*(t[7]-t[6])>>16
-	VADD.S16	D23,D23,D27	; D23= t[5]'=OC_C4S4*(t[4]-t[5])>>16
-	VSUB.S16	D27,D22,D23	; D27= t[5]''=t[6]'-t[5]'
-	VADD.S16	D24,D22,D23	; D24= t[6]''=t[6]'+t[5]'
-; Stage 4
-	VSUB.S16	Q11,Q8, Q13	; D22= y[7]=t[0]'-t[7]'
-					; D23= y[5]=t[2]'-t[5]''
-	VSUB.S16	Q10,Q9, Q12	; D20= y[6]=t[1]'-t[6]'
-					; D21= y[4]=t[3]'-t[4]''
-	VADD.S16	Q8, Q8, Q13	; D16= y[0]=t[0]'+t[7]'
-					; D17= y[2]=t[2]'+t[5]''
-	VADD.S16	Q9, Q9, Q12	; D18= y[1]=t[1]'-t[6]'
-					; D19= y[3]=t[3]'-t[4]''
-	; 8x4 transpose
-	VTRN.16		Q10,Q11		; Q10= c5c4a5a4 c7c6a7a6
-					; Q11= d5d4b5b4 d7d6b7b6
-	VTRN.16		Q8, Q9		; Q8 = c3c2a3a2 c1c0a1a0
-					; Q9 = d3d2b3b2 d1d0b1b0
-	VSWP		D20,D21		; Q10= c7c6a7a6 c5c4a5a4
-	VSWP		D22,D23		; Q11= d7d6b7b6 d5d4b5b4
-	VUZP.32		Q9, Q11		; Q9 = b7b6b5b4 b3b2b1b0
-					; Q11= d7d6d5d4 d3d2d1d0
-	VMULL.S16	Q15,D18,D0[1]
-	VMULL.S16	Q13,D22,D1[1]
-	VUZP.32		Q8, Q10		; Q8 = a7a6a5a4 a3a2a1a0
-					; Q10= c7c6c5c4 c3c2c1c0
-	; Column transforms
-; Stages 1, 2, & 3
-	VMULL.S16	Q14,D19,D0[1]	; Q14:Q15= OC_C1S7*x[1]-(x[1]<<16)
-	VMULL.S16	Q12,D23,D1[1]	; Q12:Q13= OC_C5S3*x[3]-(x[3]<<16)
-	VMULL.S16	Q3, D22,D0[3]
-	VMULL.S16	Q2, D23,D0[3]	;  Q2:Q3 = OC_C3S5*x[3]-(x[3]<<16)
-	VSHRN.S32	D30,Q15,#16
-	VSHRN.S32	D31,Q14,#16	; Q15= (OC_C1S7*x[1]>>16)-x[1]
-	VSHRN.S32	D26,Q13,#16
-	VSHRN.S32	D27,Q12,#16	; Q13= (OC_C5S3*x[3]>>16)-x[3]
-	VSHRN.S32	D28,Q3, #16
-	VSHRN.S32	D29,Q2, #16	; Q14= (OC_C3S5*x[3]>>16)-x[3]
-	VADD.S16	Q15,Q15,Q9	; Q15= t[7]
-	VADD.S16	Q13,Q13,Q11	; Q13= -t[5]
-	VADD.S16	Q14,Q14,Q11	; Q14= t[6]
-	VMULL.S16	Q12,D18,D1[3]
-	VMULL.S16	Q2, D19,D1[3]	;  Q2:Q12= OC_C7S1*x[1]
-	VMULL.S16	Q1, D16,D1[0]
-	VMULL.S16	Q11,D17,D1[0]	; Q11:Q1 = OC_C4S4*x[0]-(x[0]<<16)
-	VMULL.S16	Q3, D20,D0[2]
-	VMULL.S16	Q9, D21,D0[2]	;  Q9:Q3 = OC_C2S6*x[2]-(x[2]<<16)
-	VSHRN.S32	D24,Q12,#16
-	VSHRN.S32	D25,Q2, #16	; Q12= t[4]
-	VMULL.S16	Q2, D20,D1[2]
-	VSHRN.S32	D2, Q1, #16
-	VSHRN.S32	D3, Q11,#16	; Q1 = (OC_C4S4*x[0]>>16)-x[0]
-	VMULL.S16	Q11,D21,D1[2]	;  Q2:Q11= OC_C6S2*x[2]
-	VSHRN.S32	D6, Q3, #16
-	VSHRN.S32	D7, Q9, #16	; Q3 = (OC_C2S6*x[2]>>16)-x[2]
-	VSUB.S16	Q9, Q15,Q14	; Q9 = t[7]-t[6]
-	VADD.S16	Q15,Q15,Q14	; Q15= t[7]'=t[7]+t[6]
-	VSHRN.S32	D4, Q2, #16
-	VSHRN.S32	D5, Q11,#16	; Q2 = t[2]
-	VADD.S16	Q1, Q1, Q8	; Q1 = t[0]
-	VADD.S16	Q8, Q12,Q13	; Q8 = t[4]-t[5]
-	VADD.S16	Q3, Q3, Q10	; Q3 = t[3]
-	VMULL.S16	Q10,D16,D1[0]
-	VMULL.S16	Q11,D17,D1[0]	; Q11:Q10= OC_C4S4*(t[4]-t[5])
-					;           -(t[4]-t[5]<<16)
-	VSUB.S16	Q12,Q12,Q13	; Q12= t[4]'=t[4]+t[5]
-	VMULL.S16	Q14,D18,D1[0]
-	VMULL.S16	Q13,D19,D1[0]	; Q13:Q14= OC_C4S4*(t[6]-t[7])
-					;           -(t[6]-t[7]<<16)
-	VSHRN.S32	D20,Q10,#16
-	VSHRN.S32	D21,Q11,#16	; Q10= (OC_C4S4*(t[4]-t[5])>>16)
-					;       -(t[4]-t[5])
-	VADD.S16	Q11,Q1, Q3	; Q11= t[0]'=t[0]+t[3]
-	VSUB.S16	Q3, Q1, Q3	; Q3 = t[3]'=t[0]-t[3]
-	VSHRN.S32	D28,Q14,#16
-	VSHRN.S32	D29,Q13,#16	; Q14= (OC_C4S4*(t[7]-t[6])>>16)
-					;       -(t[7]-t[6])
-	VADD.S16	Q10,Q10,Q8	; Q10=t[5]'
-	VADD.S16	Q14,Q14,Q9	; Q14=t[6]'
-	VSUB.S16	Q13,Q14,Q10	; Q13=t[5]''=t[6]'-t[5]'
-	VADD.S16	Q14,Q14,Q10	; Q14=t[6]''=t[6]'+t[5]'
-	VADD.S16	Q10,Q1, Q2	; Q10= t[1]'=t[0]+t[2]
-	VSUB.S16	Q2, Q1, Q2	; Q2 = t[2]'=t[0]-t[2]
-; Stage 4
-	CMP	r0, r1
-	VADD.S16	Q8, Q11,Q15	; Q8  = y[0]=t[0]'+t[7]'
-	VADD.S16	Q9, Q10,Q14	; Q9  = y[1]=t[1]'+t[6]''
-	VSUB.S16	Q15,Q11,Q15	; Q15 = y[7]=t[0]'-t[7]'
-	VSUB.S16	Q14,Q10,Q14	; Q14 = y[6]=t[1]'-t[6]''
-	VADD.S16	Q10,Q2, Q13	; Q10 = y[2]=t[2]'+t[5]''
-	VADD.S16	Q11,Q3, Q12	; Q11 = y[3]=t[3]'+t[4]'
-	VSUB.S16	Q12,Q3, Q12	; Q12 = y[4]=t[3]'-t[4]'
-	VSUB.S16	Q13,Q2, Q13	; Q13 = y[5]=t[2]'-t[5]''
-	BEQ	oc_idct8x8_10_neon_noclear
-	VMOV.I8		D2, #0
-	VRSHR.S16	Q8, Q8, #4	; Q8  = y[0]+8>>4
-	VST1.64		{D2}, [r1@64], r12
-	VRSHR.S16	Q9, Q9, #4	; Q9  = y[1]+8>>4
-	VRSHR.S16	Q10,Q10,#4	; Q10 = y[2]+8>>4
-	VST1.64		{D2}, [r1@64], r12
-	VRSHR.S16	Q11,Q11,#4	; Q11 = y[3]+8>>4
-	VRSHR.S16	Q12,Q12,#4	; Q12 = y[4]+8>>4
-	VST1.64		{D2}, [r1@64], r12
-	VRSHR.S16	Q13,Q13,#4	; Q13 = y[5]+8>>4
-	VRSHR.S16	Q14,Q14,#4	; Q14 = y[6]+8>>4
-	VST1.64		{D2}, [r1@64]
-	VRSHR.S16	Q15,Q15,#4	; Q15 = y[7]+8>>4
-	VSTMIA		r0, {D16-D31}
-	MOV	PC, r14
-
-oc_idct8x8_10_neon_noclear
-	VRSHR.S16	Q8, Q8, #4	; Q8  = y[0]+8>>4
-	VRSHR.S16	Q9, Q9, #4	; Q9  = y[1]+8>>4
-	VRSHR.S16	Q10,Q10,#4	; Q10 = y[2]+8>>4
-	VRSHR.S16	Q11,Q11,#4	; Q11 = y[3]+8>>4
-	VRSHR.S16	Q12,Q12,#4	; Q12 = y[4]+8>>4
-	VRSHR.S16	Q13,Q13,#4	; Q13 = y[5]+8>>4
-	VRSHR.S16	Q14,Q14,#4	; Q14 = y[6]+8>>4
-	VRSHR.S16	Q15,Q15,#4	; Q15 = y[7]+8>>4
-	VSTMIA		r0, {D16-D31}
-	MOV	PC, r14
-	ENDP
- ]
-
-	END
diff --git a/media/libtheora/lib/arm/armint.h b/media/libtheora/lib/arm/armint.h
deleted file mode 100644
index cc62d2438..000000000
--- a/media/libtheora/lib/arm/armint.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86int.h 17344 2010-07-21 01:42:18Z tterribe $
-
- ********************************************************************/
-#if !defined(_arm_armint_H)
-# define _arm_armint_H (1)
-# include "../internal.h"
-
-# if defined(OC_ARM_ASM)
-
-#  if defined(__ARMEB__)
-#   error "Big-endian configurations are not supported by the ARM asm. " \
- "Reconfigure with --disable-asm or undefine OC_ARM_ASM."
-#  endif
-
-#  define oc_state_accel_init oc_state_accel_init_arm
-/*This function is implemented entirely in asm, so it's helpful to pull out all
-   of the things that depend on structure offsets.
-  We reuse the function pointer with the wrong prototype, though.*/
-#  define oc_state_loop_filter_frag_rows(_state,_bv,_refi,_pli, \
- _fragy0,_fragy_end) \
-  ((oc_loop_filter_frag_rows_arm_func) \
-   (_state)->opt_vtable.state_loop_filter_frag_rows)( \
-   (_state)->ref_frame_data[(_refi)],(_state)->ref_ystride[(_pli)], \
-   (_bv), \
-   (_state)->frags, \
-   (_state)->fplanes[(_pli)].froffset \
-   +(_fragy0)*(ptrdiff_t)(_state)->fplanes[(_pli)].nhfrags, \
-   (_state)->fplanes[(_pli)].froffset \
-   +(_fragy_end)*(ptrdiff_t)(_state)->fplanes[(_pli)].nhfrags, \
-   (_state)->fplanes[(_pli)].froffset, \
-   (_state)->fplanes[(_pli)].froffset+(_state)->fplanes[(_pli)].nfrags, \
-   (_state)->frag_buf_offs, \
-   (_state)->fplanes[(_pli)].nhfrags)
-/*For everything else the default vtable macros are fine.*/
-#  define OC_STATE_USE_VTABLE (1)
-# endif
-
-# include "../state.h"
-# include "armcpu.h"
-
-# if defined(OC_ARM_ASM)
-typedef void (*oc_loop_filter_frag_rows_arm_func)(
- unsigned char *_ref_frame_data,int _ystride,signed char _bv[256],
- const oc_fragment *_frags,ptrdiff_t _fragi0,ptrdiff_t _fragi0_end,
- ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
- const ptrdiff_t *_frag_buf_offs,int _nhfrags);
-
-void oc_state_accel_init_arm(oc_theora_state *_state);
-void oc_frag_copy_list_arm(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-void oc_frag_recon_intra_arm(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_arm(unsigned char *_dst,const unsigned char *_src,
- int _ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_arm(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
-void oc_idct8x8_1_arm(ogg_int16_t _y[64],ogg_uint16_t _dc);
-void oc_idct8x8_arm(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_loop_filter_frag_rows_arm(unsigned char *_ref_frame_data,
- int _ystride,signed char *_bv,const oc_fragment *_frags,ptrdiff_t _fragi0,
- ptrdiff_t _fragi0_end,ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
- const ptrdiff_t *_frag_buf_offs,int _nhfrags);
-
-#  if defined(OC_ARM_ASM_EDSP)
-void oc_frag_copy_list_edsp(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-
-#   if defined(OC_ARM_ASM_MEDIA)
-void oc_frag_recon_intra_v6(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_v6(unsigned char *_dst,const unsigned char *_src,
- int _ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_v6(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
-void oc_idct8x8_1_v6(ogg_int16_t _y[64],ogg_uint16_t _dc);
-void oc_idct8x8_v6(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_loop_filter_init_v6(signed char *_bv,int _flimit);
-void oc_loop_filter_frag_rows_v6(unsigned char *_ref_frame_data,
- int _ystride,signed char *_bv,const oc_fragment *_frags,ptrdiff_t _fragi0,
- ptrdiff_t _fragi0_end,ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
- const ptrdiff_t *_frag_buf_offs,int _nhfrags);
-
-#    if defined(OC_ARM_ASM_NEON)
-void oc_frag_copy_list_neon(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-void oc_frag_recon_intra_neon(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_neon(unsigned char *_dst,const unsigned char *_src,
- int _ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_neon(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
-void oc_idct8x8_1_neon(ogg_int16_t _y[64],ogg_uint16_t _dc);
-void oc_idct8x8_neon(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_loop_filter_init_neon(signed char *_bv,int _flimit);
-void oc_loop_filter_frag_rows_neon(unsigned char *_ref_frame_data,
- int _ystride,signed char *_bv,const oc_fragment *_frags,ptrdiff_t _fragi0,
- ptrdiff_t _fragi0_end,ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
- const ptrdiff_t *_frag_buf_offs,int _nhfrags);
-#    endif
-#   endif
-#  endif
-# endif
-
-#endif
diff --git a/media/libtheora/lib/arm/armloop.s b/media/libtheora/lib/arm/armloop.s
deleted file mode 100644
index 0a1d4705e..000000000
--- a/media/libtheora/lib/arm/armloop.s
+++ /dev/null
@@ -1,682 +0,0 @@
-;********************************************************************
-;*                                                                  *
-;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
-;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
-;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
-;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
-;*                                                                  *
-;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
-;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
-;*                                                                  *
-;********************************************************************
-; Original implementation:
-;  Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd
-; last mod: $Id: armloop.s 17481 2010-10-03 22:49:42Z tterribe $
-;********************************************************************
-
-	AREA	|.text|, CODE, READONLY
-
-	; Explicitly specifying alignment here because some versions of
-	; gas don't align code correctly. See
-	; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
-	; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
-	ALIGN
-
-	GET	armopts.s
-
-	EXPORT	oc_loop_filter_frag_rows_arm
-
-; Which bit this is depends on the order of packing within a bitfield.
-; Hopefully that doesn't change among any of the relevant compilers.
-OC_FRAG_CODED_FLAG	*	1
-
-	; Vanilla ARM v4 version
-loop_filter_h_arm PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int           *_bv
-	; preserves r0-r3
-	STMFD	r13!,{r3-r6,r14}
-	MOV	r14,#8
-	MOV	r6, #255
-lfh_arm_lp
-	LDRB	r3, [r0, #-2]		; r3 = _pix[0]
-	LDRB	r12,[r0, #1]		; r12= _pix[3]
-	LDRB	r4, [r0, #-1]		; r4 = _pix[1]
-	LDRB	r5, [r0]		; r5 = _pix[2]
-	SUB	r3, r3, r12		; r3 = _pix[0]-_pix[3]+4
-	ADD	r3, r3, #4
-	SUB	r12,r5, r4		; r12= _pix[2]-_pix[1]
-	ADD	r12,r12,r12,LSL #1	; r12= 3*(_pix[2]-_pix[1])
-	ADD	r12,r12,r3	; r12= _pix[0]-_pix[3]+3*(_pix[2]-_pix[1])+4
-	MOV	r12,r12,ASR #3
-	LDRSB	r12,[r2, r12]
-	; Stall (2 on Xscale)
-	ADDS	r4, r4, r12
-	CMPGT	r6, r4
-	EORLT	r4, r6, r4, ASR #32
-	SUBS	r5, r5, r12
-	CMPGT	r6, r5
-	EORLT	r5, r6, r5, ASR #32
-	STRB	r4, [r0, #-1]
-	STRB	r5, [r0], r1
-	SUBS	r14,r14,#1
-	BGT	lfh_arm_lp
-	SUB	r0, r0, r1, LSL #3
-	LDMFD	r13!,{r3-r6,PC}
-	ENDP
-
-loop_filter_v_arm PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int           *_bv
-	; preserves r0-r3
-	STMFD	r13!,{r3-r6,r14}
-	MOV	r14,#8
-	MOV	r6, #255
-lfv_arm_lp
-	LDRB	r3, [r0, -r1, LSL #1]	; r3 = _pix[0]
-	LDRB	r12,[r0, r1]		; r12= _pix[3]
-	LDRB	r4, [r0, -r1]		; r4 = _pix[1]
-	LDRB	r5, [r0]		; r5 = _pix[2]
-	SUB	r3, r3, r12		; r3 = _pix[0]-_pix[3]+4
-	ADD	r3, r3, #4
-	SUB	r12,r5, r4		; r12= _pix[2]-_pix[1]
-	ADD	r12,r12,r12,LSL #1	; r12= 3*(_pix[2]-_pix[1])
-	ADD	r12,r12,r3	; r12= _pix[0]-_pix[3]+3*(_pix[2]-_pix[1])+4
-	MOV	r12,r12,ASR #3
-	LDRSB	r12,[r2, r12]
-	; Stall (2 on Xscale)
-	ADDS	r4, r4, r12
-	CMPGT	r6, r4
-	EORLT	r4, r6, r4, ASR #32
-	SUBS	r5, r5, r12
-	CMPGT	r6, r5
-	EORLT	r5, r6, r5, ASR #32
-	STRB	r4, [r0, -r1]
-	STRB	r5, [r0], #1
-	SUBS	r14,r14,#1
-	BGT	lfv_arm_lp
-	SUB	r0, r0, #8
-	LDMFD	r13!,{r3-r6,PC}
-	ENDP
-
-oc_loop_filter_frag_rows_arm PROC
-	; r0 = _ref_frame_data
-	; r1 = _ystride
-	; r2 = _bv
-	; r3 = _frags
-	; r4 = _fragi0
-	; r5 = _fragi0_end
-	; r6 = _fragi_top
-	; r7 = _fragi_bot
-	; r8 = _frag_buf_offs
-	; r9 = _nhfrags
-	MOV	r12,r13
-	STMFD	r13!,{r0,r4-r11,r14}
-	LDMFD	r12,{r4-r9}
-	ADD	r2, r2, #127	; _bv += 127
-	CMP	r4, r5		; if(_fragi0>=_fragi0_end)
-	BGE	oslffri_arm_end	;   bail
-	SUBS	r9, r9, #1	; r9 = _nhfrags-1	if (r9<=0)
-	BLE	oslffri_arm_end	;			  bail
-	ADD	r3, r3, r4, LSL #2	; r3 = &_frags[fragi]
-	ADD	r8, r8, r4, LSL #2	; r8 = &_frag_buf_offs[fragi]
-	SUB	r7, r7, r9	; _fragi_bot -= _nhfrags;
-oslffri_arm_lp1
-	MOV	r10,r4		; r10= fragi = _fragi0
-	ADD	r11,r4, r9	; r11= fragi_end-1=fragi+_nhfrags-1
-oslffri_arm_lp2
-	LDR	r14,[r3], #4	; r14= _frags[fragi]	_frags++
-	LDR	r0, [r13]	; r0 = _ref_frame_data
-	LDR	r12,[r8], #4	; r12= _frag_buf_offs[fragi]   _frag_buf_offs++
-	TST	r14,#OC_FRAG_CODED_FLAG
-	BEQ	oslffri_arm_uncoded
-	CMP	r10,r4		; if (fragi>_fragi0)
-	ADD	r0, r0, r12	; r0 = _ref_frame_data + _frag_buf_offs[fragi]
-	BLGT	loop_filter_h_arm
-	CMP	r4, r6		; if (_fragi0>_fragi_top)
-	BLGT	loop_filter_v_arm
-	CMP	r10,r11		; if(fragi+1<fragi_end)===(fragi<fragi_end-1)
-	LDRLT	r12,[r3]	; r12 = _frags[fragi+1]
-	ADD	r0, r0, #8
-	ADD	r10,r10,#1	; r10 = fragi+1;
-	ANDLT	r12,r12,#OC_FRAG_CODED_FLAG
-	CMPLT	r12,#OC_FRAG_CODED_FLAG	; && _frags[fragi+1].coded==0
-	BLLT	loop_filter_h_arm
-	CMP	r10,r7		; if (fragi<_fragi_bot)
-	LDRLT	r12,[r3, r9, LSL #2]	; r12 = _frags[fragi+1+_nhfrags-1]
-	SUB	r0, r0, #8
-	ADD	r0, r0, r1, LSL #3
-	ANDLT	r12,r12,#OC_FRAG_CODED_FLAG
-	CMPLT	r12,#OC_FRAG_CODED_FLAG
-	BLLT	loop_filter_v_arm
-	CMP	r10,r11		; while(fragi<=fragi_end-1)
-	BLE	oslffri_arm_lp2
-	MOV	r4, r10		; r4 = fragi0 += _nhfrags
-	CMP	r4, r5
-	BLT	oslffri_arm_lp1
-oslffri_arm_end
-	LDMFD	r13!,{r0,r4-r11,PC}
-oslffri_arm_uncoded
-	ADD	r10,r10,#1
-	CMP	r10,r11
-	BLE	oslffri_arm_lp2
-	MOV	r4, r10		; r4 = _fragi0 += _nhfrags
-	CMP	r4, r5
-	BLT	oslffri_arm_lp1
-	LDMFD	r13!,{r0,r4-r11,PC}
-	ENDP
-
- [ OC_ARM_ASM_MEDIA
-	EXPORT	oc_loop_filter_init_v6
-	EXPORT	oc_loop_filter_frag_rows_v6
-
-oc_loop_filter_init_v6 PROC
-	; r0 = _bv
-	; r1 = _flimit (=L from the spec)
-	MVN	r1, r1, LSL #1		; r1 = <0xFFFFFF|255-2*L>
-	AND	r1, r1, #255		; r1 = ll=r1&0xFF
-	ORR	r1, r1, r1, LSL #8	; r1 = <ll|ll>
-	PKHBT	r1, r1, r1, LSL #16	; r1 = <ll|ll|ll|ll>
-	STR	r1, [r0]
-	MOV	PC,r14
-	ENDP
-
-; We could use the same strategy as the v filter below, but that would require
-;  40 instructions to load the data and transpose it into columns and another
-;  32 to write out the results at the end, plus the 52 instructions to do the
-;  filtering itself.
-; This is slightly less, and less code, even assuming we could have shared the
-;  52 instructions in the middle with the other function.
-; It executes slightly fewer instructions than the ARMv6 approach David Conrad
-;  proposed for FFmpeg, but not by much:
-;  http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2010-February/083141.html
-; His is a lot less code, though, because it only does two rows at once instead
-;  of four.
-loop_filter_h_v6 PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int            _ll
-	; preserves r0-r3
-	STMFD	r13!,{r4-r11,r14}
-	LDR	r12,=0x10003
-	BL loop_filter_h_core_v6
-	ADD	r0, r0, r1, LSL #2
-	BL loop_filter_h_core_v6
-	SUB	r0, r0, r1, LSL #2
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
-
-loop_filter_h_core_v6 PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int            _ll
-	; r12= 0x10003
-	; Preserves r0-r3, r12; Clobbers r4-r11.
-	LDR	r4,[r0, #-2]!		; r4 = <p3|p2|p1|p0>
-	; Single issue
-	LDR	r5,[r0, r1]!		; r5 = <q3|q2|q1|q0>
-	UXTB16	r6, r4, ROR #16		; r6 = <p0|p2>
-	UXTB16	r4, r4, ROR #8		; r4 = <p3|p1>
-	UXTB16	r7, r5, ROR #16		; r7 = <q0|q2>
-	UXTB16	r5, r5, ROR #8		; r5 = <q3|q1>
-	PKHBT	r8, r4, r5, LSL #16	; r8 = <__|q1|__|p1>
-	PKHBT	r9, r6, r7, LSL #16	; r9 = <__|q2|__|p2>
-	SSUB16	r6, r4, r6		; r6 = <p3-p0|p1-p2>
-	SMLAD	r6, r6, r12,r12		; r6 = <????|(p3-p0)+3*(p1-p2)+3>
-	SSUB16	r7, r5, r7		; r7 = <q3-q0|q1-q2>
-	SMLAD	r7, r7, r12,r12		; r7 = <????|(q0-q3)+3*(q2-q1)+4>
-	LDR	r4,[r0, r1]!		; r4 = <r3|r2|r1|r0>
-	MOV	r6, r6, ASR #3		; r6 = <??????|(p3-p0)+3*(p1-p2)+3>>3>
-	LDR	r5,[r0, r1]!		; r5 = <s3|s2|s1|s0>
-	PKHBT	r11,r6, r7, LSL #13	; r11= <??|-R_q|??|-R_p>
-	UXTB16	r6, r4, ROR #16		; r6 = <r0|r2>
-	UXTB16	r11,r11			; r11= <__|-R_q|__|-R_p>
-	UXTB16	r4, r4, ROR #8		; r4 = <r3|r1>
-	UXTB16	r7, r5, ROR #16		; r7 = <s0|s2>
-	PKHBT	r10,r6, r7, LSL #16	; r10= <__|s2|__|r2>
-	SSUB16	r6, r4, r6		; r6 = <r3-r0|r1-r2>
-	UXTB16	r5, r5, ROR #8		; r5 = <s3|s1>
-	SMLAD	r6, r6, r12,r12		; r6 = <????|(r3-r0)+3*(r2-r1)+3>
-	SSUB16	r7, r5, r7		; r7 = <r3-r0|r1-r2>
-	SMLAD	r7, r7, r12,r12		; r7 = <????|(s0-s3)+3*(s2-s1)+4>
-	ORR	r9, r9, r10, LSL #8	; r9 = <s2|q2|r2|p2>
-	MOV	r6, r6, ASR #3		; r6 = <??????|(r0-r3)+3*(r2-r1)+4>>3>
-	PKHBT	r10,r4, r5, LSL #16	; r10= <__|s1|__|r1>
-	PKHBT	r6, r6, r7, LSL #13	; r6 = <??|-R_s|??|-R_r>
-	ORR	r8, r8, r10, LSL #8	; r8 = <s1|q1|r1|p1>
-	UXTB16	r6, r6			; r6 = <__|-R_s|__|-R_r>
-	MOV	r10,#0
-	ORR	r6, r11,r6, LSL #8	; r6 = <-R_s|-R_q|-R_r|-R_p>
-	; Single issue
-	; There's no min, max or abs instruction.
-	; SSUB8 and SEL will work for abs, and we can do all the rest with
-	;  unsigned saturated adds, which means the GE flags are still all
-	;  set when we're done computing lflim(abs(R_i),L).
-	; This allows us to both add and subtract, and split the results by
-	;  the original sign of R_i.
-	SSUB8	r7, r10,r6
-	; Single issue
-	SEL	r7, r7, r6		; r7 = abs(R_i)
-	; Single issue
-	UQADD8	r4, r7, r2		; r4 = 255-max(2*L-abs(R_i),0)
-	; Single issue
-	UQADD8	r7, r7, r4
-	; Single issue
-	UQSUB8	r7, r7, r4		; r7 = min(abs(R_i),max(2*L-abs(R_i),0))
-	; Single issue
-	UQSUB8	r4, r8, r7
-	UQADD8	r5, r9, r7
-	UQADD8	r8, r8, r7
-	UQSUB8	r9, r9, r7
-	SEL	r8, r8, r4		; r8 = p1+lflim(R_i,L)
-	SEL	r9, r9, r5		; r9 = p2-lflim(R_i,L)
-	MOV	r5, r9, LSR #24		; r5 = s2
-	STRB	r5, [r0,#2]!
-	MOV	r4, r8, LSR #24		; r4 = s1
-	STRB	r4, [r0,#-1]
-	MOV	r5, r9, LSR #8		; r5 = r2
-	STRB	r5, [r0,-r1]!
-	MOV	r4, r8, LSR #8		; r4 = r1
-	STRB	r4, [r0,#-1]
-	MOV	r5, r9, LSR #16		; r5 = q2
-	STRB	r5, [r0,-r1]!
-	MOV	r4, r8, LSR #16		; r4 = q1
-	STRB	r4, [r0,#-1]
-	; Single issue
-	STRB	r9, [r0,-r1]!
-	; Single issue
-	STRB	r8, [r0,#-1]
-	MOV	PC,r14
-	ENDP
-
-; This uses the same strategy as the MMXEXT version for x86, except that UHADD8
-;  computes (a+b>>1) instead of (a+b+1>>1) like PAVGB.
-; This works just as well, with the following procedure for computing the
-;  filter value, f:
-;   u = ~UHADD8(p1,~p2);
-;   v = UHADD8(~p1,p2);
-;   m = v-u;
-;   a = m^UHADD8(m^p0,m^~p3);
-;   f = UHADD8(UHADD8(a,u1),v1);
-;  where f = 127+R, with R in [-127,128] defined as in the spec.
-; This is exactly the same amount of arithmetic as the version that uses PAVGB
-;  as the basic operator.
-; It executes about 2/3 the number of instructions of David Conrad's approach,
-;  but requires more code, because it does all eight columns at once, instead
-;  of four at a time.
-loop_filter_v_v6 PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int            _ll
-	; preserves r0-r11
-	STMFD	r13!,{r4-r11,r14}
-	LDRD	r6, [r0, -r1]!		; r7, r6 = <p5|p1>
-	LDRD	r4, [r0, -r1]		; r5, r4 = <p4|p0>
-	LDRD	r8, [r0, r1]!		; r9, r8 = <p6|p2>
-	MVN	r14,r6			; r14= ~p1
-	LDRD	r10,[r0, r1]		; r11,r10= <p7|p3>
-	; Filter the first four columns.
-	MVN	r12,r8			; r12= ~p2
-	UHADD8	r14,r14,r8		; r14= v1=~p1+p2>>1
-	UHADD8	r12,r12,r6		; r12= p1+~p2>>1
-	MVN	r10, r10		; r10=~p3
-	MVN	r12,r12			; r12= u1=~p1+p2+1>>1
-	SSUB8	r14,r14,r12		; r14= m1=v1-u1
-	; Single issue
-	EOR	r4, r4, r14		; r4 = m1^p0
-	EOR	r10,r10,r14		; r10= m1^~p3
-	UHADD8	r4, r4, r10		; r4 = (m1^p0)+(m1^~p3)>>1
-	; Single issue
-	EOR	r4, r4, r14		; r4 = a1=m1^((m1^p0)+(m1^~p3)>>1)
-	SADD8	r14,r14,r12		; r14= v1=m1+u1
-	UHADD8	r4, r4, r12		; r4 = a1+u1>>1
-	MVN	r12,r9			; r12= ~p6
-	UHADD8	r4, r4, r14		; r4 = f1=(a1+u1>>1)+v1>>1
-	; Filter the second four columns.
-	MVN	r14,r7			; r14= ~p5
-	UHADD8	r12,r12,r7		; r12= p5+~p6>>1
-	UHADD8	r14,r14,r9		; r14= v2=~p5+p6>>1
-	MVN	r12,r12			; r12= u2=~p5+p6+1>>1
-	MVN	r11,r11			; r11=~p7
-	SSUB8	r10,r14,r12		; r10= m2=v2-u2
-	; Single issue
-	EOR	r5, r5, r10		; r5 = m2^p4
-	EOR	r11,r11,r10		; r11= m2^~p7
-	UHADD8	r5, r5, r11		; r5 = (m2^p4)+(m2^~p7)>>1
-	; Single issue
-	EOR	r5, r5, r10		; r5 = a2=m2^((m2^p4)+(m2^~p7)>>1)
-	; Single issue
-	UHADD8	r5, r5, r12		; r5 = a2+u2>>1
-	LDR	r12,=0x7F7F7F7F		; r12 = {127}x4
-	UHADD8	r5, r5, r14		; r5 = f2=(a2+u2>>1)+v2>>1
-	; Now split f[i] by sign.
-	; There's no min or max instruction.
-	; We could use SSUB8 and SEL, but this is just as many instructions and
-	;  dual issues more (for v7 without NEON).
-	UQSUB8	r10,r4, r12		; r10= R_i>0?R_i:0
-	UQSUB8	r4, r12,r4		; r4 = R_i<0?-R_i:0
-	UQADD8	r11,r10,r2		; r11= 255-max(2*L-abs(R_i<0),0)
-	UQADD8	r14,r4, r2		; r14= 255-max(2*L-abs(R_i>0),0)
-	UQADD8	r10,r10,r11
-	UQADD8	r4, r4, r14
-	UQSUB8	r10,r10,r11		; r10= min(abs(R_i<0),max(2*L-abs(R_i<0),0))
-	UQSUB8	r4, r4, r14		; r4 = min(abs(R_i>0),max(2*L-abs(R_i>0),0))
-	UQSUB8	r11,r5, r12		; r11= R_i>0?R_i:0
-	UQADD8	r6, r6, r10
-	UQSUB8	r8, r8, r10
-	UQSUB8	r5, r12,r5		; r5 = R_i<0?-R_i:0
-	UQSUB8	r6, r6, r4		; r6 = p1+lflim(R_i,L)
-	UQADD8	r8, r8, r4		; r8 = p2-lflim(R_i,L)
-	UQADD8	r10,r11,r2		; r10= 255-max(2*L-abs(R_i<0),0)
-	UQADD8	r14,r5, r2		; r14= 255-max(2*L-abs(R_i>0),0)
-	UQADD8	r11,r11,r10
-	UQADD8	r5, r5, r14
-	UQSUB8	r11,r11,r10		; r11= min(abs(R_i<0),max(2*L-abs(R_i<0),0))
-	UQSUB8	r5, r5, r14		; r5 = min(abs(R_i>0),max(2*L-abs(R_i>0),0))
-	UQADD8	r7, r7, r11
-	UQSUB8	r9, r9, r11
-	UQSUB8	r7, r7, r5		; r7 = p5+lflim(R_i,L)
-	STRD	r6, [r0, -r1]		; [p5:p1] = [r7: r6]
-	UQADD8	r9, r9, r5		; r9 = p6-lflim(R_i,L)
-	STRD	r8, [r0]		; [p6:p2] = [r9: r8]
-	LDMFD	r13!,{r4-r11,PC}
-	ENDP
-
-oc_loop_filter_frag_rows_v6 PROC
-	; r0 = _ref_frame_data
-	; r1 = _ystride
-	; r2 = _bv
-	; r3 = _frags
-	; r4 = _fragi0
-	; r5 = _fragi0_end
-	; r6 = _fragi_top
-	; r7 = _fragi_bot
-	; r8 = _frag_buf_offs
-	; r9 = _nhfrags
-	MOV	r12,r13
-	STMFD	r13!,{r0,r4-r11,r14}
-	LDMFD	r12,{r4-r9}
-	LDR	r2, [r2]	; ll = *(int *)_bv
-	CMP	r4, r5		; if(_fragi0>=_fragi0_end)
-	BGE	oslffri_v6_end	;   bail
-	SUBS	r9, r9, #1	; r9 = _nhfrags-1	if (r9<=0)
-	BLE	oslffri_v6_end	;			  bail
-	ADD	r3, r3, r4, LSL #2	; r3 = &_frags[fragi]
-	ADD	r8, r8, r4, LSL #2	; r8 = &_frag_buf_offs[fragi]
-	SUB	r7, r7, r9	; _fragi_bot -= _nhfrags;
-oslffri_v6_lp1
-	MOV	r10,r4		; r10= fragi = _fragi0
-	ADD	r11,r4, r9	; r11= fragi_end-1=fragi+_nhfrags-1
-oslffri_v6_lp2
-	LDR	r14,[r3], #4	; r14= _frags[fragi]	_frags++
-	LDR	r0, [r13]	; r0 = _ref_frame_data
-	LDR	r12,[r8], #4	; r12= _frag_buf_offs[fragi]   _frag_buf_offs++
-	TST	r14,#OC_FRAG_CODED_FLAG
-	BEQ	oslffri_v6_uncoded
-	CMP	r10,r4		; if (fragi>_fragi0)
-	ADD	r0, r0, r12	; r0 = _ref_frame_data + _frag_buf_offs[fragi]
-	BLGT	loop_filter_h_v6
-	CMP	r4, r6		; if (fragi0>_fragi_top)
-	BLGT	loop_filter_v_v6
-	CMP	r10,r11		; if(fragi+1<fragi_end)===(fragi<fragi_end-1)
-	LDRLT	r12,[r3]	; r12 = _frags[fragi+1]
-	ADD	r0, r0, #8
-	ADD	r10,r10,#1	; r10 = fragi+1;
-	ANDLT	r12,r12,#OC_FRAG_CODED_FLAG
-	CMPLT	r12,#OC_FRAG_CODED_FLAG	; && _frags[fragi+1].coded==0
-	BLLT	loop_filter_h_v6
-	CMP	r10,r7		; if (fragi<_fragi_bot)
-	LDRLT	r12,[r3, r9, LSL #2]	; r12 = _frags[fragi+1+_nhfrags-1]
-	SUB	r0, r0, #8
-	ADD	r0, r0, r1, LSL #3
-	ANDLT	r12,r12,#OC_FRAG_CODED_FLAG
-	CMPLT	r12,#OC_FRAG_CODED_FLAG
-	BLLT	loop_filter_v_v6
-	CMP	r10,r11		; while(fragi<=fragi_end-1)
-	BLE	oslffri_v6_lp2
-	MOV	r4, r10		; r4 = fragi0 += nhfrags
-	CMP	r4, r5
-	BLT	oslffri_v6_lp1
-oslffri_v6_end
-	LDMFD	r13!,{r0,r4-r11,PC}
-oslffri_v6_uncoded
-	ADD	r10,r10,#1
-	CMP	r10,r11
-	BLE	oslffri_v6_lp2
-	MOV	r4, r10		; r4 = fragi0 += nhfrags
-	CMP	r4, r5
-	BLT	oslffri_v6_lp1
-	LDMFD	r13!,{r0,r4-r11,PC}
-	ENDP
- ]
-
- [ OC_ARM_ASM_NEON
-	EXPORT	oc_loop_filter_init_neon
-	EXPORT	oc_loop_filter_frag_rows_neon
-
-oc_loop_filter_init_neon PROC
-	; r0 = _bv
-	; r1 = _flimit (=L from the spec)
-	MOV		r1, r1, LSL #1  ; r1 = 2*L
-	VDUP.S16	Q15, r1		; Q15= 2L in U16s
-	VST1.64		{D30,D31}, [r0@128]
-	MOV	PC,r14
-	ENDP
-
-loop_filter_h_neon PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int           *_bv
-	; preserves r0-r3
-	; We assume Q15= 2*L in U16s
-	;                    My best guesses at cycle counts (and latency)--vvv
-	SUB	r12,r0, #2
-	; Doing a 2-element structure load saves doing two VTRN's below, at the
-	;  cost of using two more slower single-lane loads vs. the faster
-	;  all-lane loads.
-	; It's less code this way, though, and benches a hair faster, but it
-	;  leaves D2 and D4 swapped.
-	VLD2.16	{D0[],D2[]},  [r12], r1		; D0 = ____________1100     2,1
-						; D2 = ____________3322
-	VLD2.16	{D4[],D6[]},  [r12], r1		; D4 = ____________5544     2,1
-						; D6 = ____________7766
-	VLD2.16	{D0[1],D2[1]},[r12], r1		; D0 = ________99881100     3,1
-						; D2 = ________BBAA3322
-	VLD2.16	{D4[1],D6[1]},[r12], r1		; D4 = ________DDCC5544     3,1
-						; D6 = ________FFEE7766
-	VLD2.16	{D0[2],D2[2]},[r12], r1		; D0 = ____GGHH99881100     3,1
-						; D2 = ____JJIIBBAA3322
-	VLD2.16	{D4[2],D6[2]},[r12], r1		; D4 = ____KKLLDDCC5544     3,1
-						; D6 = ____NNMMFFEE7766
-	VLD2.16	{D0[3],D2[3]},[r12], r1		; D0 = PPOOGGHH99881100     3,1
-						; D2 = RRQQJJIIBBAA3322
-	VLD2.16	{D4[3],D6[3]},[r12], r1		; D4 = TTSSKKLLDDCC5544     3,1
-						; D6 = VVUUNNMMFFEE7766
-	VTRN.8	D0, D4	; D0 = SSOOKKGGCC884400 D4 = TTPPLLHHDD995511       1,1
-	VTRN.8	D2, D6	; D2 = UUQQMMIIEEAA6622 D6 = VVRRNNJJFFBB7733       1,1
-	VSUBL.U8	Q0, D0, D6	; Q0 = 00 - 33 in S16s              1,3
-	VSUBL.U8	Q8, D2, D4	; Q8 = 22 - 11 in S16s              1,3
-	ADD	r12,r0, #8
-	VADD.S16	Q0, Q0, Q8	;                                   1,3
-	PLD	[r12]
-	VADD.S16	Q0, Q0, Q8	;                                   1,3
-	PLD	[r12,r1]
-	VADD.S16	Q0, Q0, Q8	; Q0 = [0-3]+3*[2-1]                1,3
-	PLD	[r12,r1, LSL #1]
-	VRSHR.S16	Q0, Q0, #3	; Q0 = f = ([0-3]+3*[2-1]+4)>>3     1,4
-	ADD	r12,r12,r1, LSL #2
-	;  We want to do
-	; f =             CLAMP(MIN(-2L-f,0), f, MAX(2L-f,0))
-	;   = ((f >= 0) ? MIN( f ,MAX(2L- f ,0)) : MAX(  f , MIN(-2L- f ,0)))
-	;   = ((f >= 0) ? MIN(|f|,MAX(2L-|f|,0)) : MAX(-|f|, MIN(-2L+|f|,0)))
-	;   = ((f >= 0) ? MIN(|f|,MAX(2L-|f|,0)) :-MIN( |f|,-MIN(-2L+|f|,0)))
-	;   = ((f >= 0) ? MIN(|f|,MAX(2L-|f|,0)) :-MIN( |f|, MAX( 2L-|f|,0)))
-	; So we've reduced the left and right hand terms to be the same, except
-	; for a negation.
-	; Stall x3
-	VABS.S16	Q9, Q0		; Q9 = |f| in U16s                  1,4
-	PLD	[r12,-r1]
-	VSHR.S16	Q0, Q0, #15	; Q0 = -1 or 0 according to sign    1,3
-	PLD	[r12]
-	VQSUB.U16	Q10,Q15,Q9	; Q10= MAX(2L-|f|,0) in U16s        1,4
-	PLD	[r12,r1]
-	VMOVL.U8	Q1, D2	   ; Q2 = __UU__QQ__MM__II__EE__AA__66__22  2,3
-	PLD	[r12,r1,LSL #1]
-	VMIN.U16	Q9, Q10,Q9	; Q9 = MIN(|f|,MAX(2L-|f|))         1,4
-	ADD	r12,r12,r1, LSL #2
-	; Now we need to correct for the sign of f.
-	; For negative elements of Q0, we want to subtract the appropriate
-	; element of Q9. For positive elements we want to add them. No NEON
-	; instruction exists to do this, so we need to negate the negative
-	; elements, and we can then just add them. a-b = a-(1+!b) = a-1+!b
-	VADD.S16	Q9, Q9, Q0	;				    1,3
-	PLD	[r12,-r1]
-	VEOR.S16	Q9, Q9, Q0	; Q9 = real value of f              1,3
-	; Bah. No VRSBW.U8
-	; Stall (just 1 as Q9 not needed to second pipeline stage. I think.)
-	VADDW.U8	Q2, Q9, D4 ; Q1 = xxTTxxPPxxLLxxHHxxDDxx99xx55xx11  1,3
-	VSUB.S16	Q1, Q1, Q9 ; Q2 = xxUUxxQQxxMMxxIIxxEExxAAxx66xx22  1,3
-	VQMOVUN.S16	D4, Q2		; D4 = TTPPLLHHDD995511		    1,1
-	VQMOVUN.S16	D2, Q1		; D2 = UUQQMMIIEEAA6622		    1,1
-	SUB	r12,r0, #1
-	VTRN.8	D4, D2		; D4 = QQPPIIHHAA992211	D2 = MMLLEEDD6655   1,1
-	VST1.16	{D4[0]}, [r12], r1
-	VST1.16	{D2[0]}, [r12], r1
-	VST1.16	{D4[1]}, [r12], r1
-	VST1.16	{D2[1]}, [r12], r1
-	VST1.16	{D4[2]}, [r12], r1
-	VST1.16	{D2[2]}, [r12], r1
-	VST1.16	{D4[3]}, [r12], r1
-	VST1.16	{D2[3]}, [r12], r1
-	MOV	PC,r14
-	ENDP
-
-loop_filter_v_neon PROC
-	; r0 = unsigned char *_pix
-	; r1 = int            _ystride
-	; r2 = int           *_bv
-	; preserves r0-r3
-	; We assume Q15= 2*L in U16s
-	;                    My best guesses at cycle counts (and latency)--vvv
-	SUB	r12,r0, r1, LSL #1
-	VLD1.64	{D0}, [r12@64], r1		; D0 = SSOOKKGGCC884400     2,1
-	VLD1.64	{D2}, [r12@64], r1		; D2 = TTPPLLHHDD995511     2,1
-	VLD1.64	{D4}, [r12@64], r1		; D4 = UUQQMMIIEEAA6622     2,1
-	VLD1.64	{D6}, [r12@64]			; D6 = VVRRNNJJFFBB7733     2,1
-	VSUBL.U8	Q8, D4, D2	; Q8 = 22 - 11 in S16s              1,3
-	VSUBL.U8	Q0, D0, D6	; Q0 = 00 - 33 in S16s              1,3
-	ADD	r12, #8
-	VADD.S16	Q0, Q0, Q8	;                                   1,3
-	PLD	[r12]
-	VADD.S16	Q0, Q0, Q8	;                                   1,3
-	PLD	[r12,r1]
-	VADD.S16	Q0, Q0, Q8	; Q0 = [0-3]+3*[2-1]                1,3
-	SUB	r12, r0, r1
-	VRSHR.S16	Q0, Q0, #3	; Q0 = f = ([0-3]+3*[2-1]+4)>>3     1,4
-	;  We want to do
-	; f =             CLAMP(MIN(-2L-f,0), f, MAX(2L-f,0))
-	;   = ((f >= 0) ? MIN( f ,MAX(2L- f ,0)) : MAX(  f , MIN(-2L- f ,0)))
-	;   = ((f >= 0) ? MIN(|f|,MAX(2L-|f|,0)) : MAX(-|f|, MIN(-2L+|f|,0)))
-	;   = ((f >= 0) ? MIN(|f|,MAX(2L-|f|,0)) :-MIN( |f|,-MIN(-2L+|f|,0)))
-	;   = ((f >= 0) ? MIN(|f|,MAX(2L-|f|,0)) :-MIN( |f|, MAX( 2L-|f|,0)))
-	; So we've reduced the left and right hand terms to be the same, except
-	; for a negation.
-	; Stall x3
-	VABS.S16	Q9, Q0		; Q9 = |f| in U16s                  1,4
-	VSHR.S16	Q0, Q0, #15	; Q0 = -1 or 0 according to sign    1,3
-	; Stall x2
-	VQSUB.U16	Q10,Q15,Q9	; Q10= MAX(2L-|f|,0) in U16s        1,4
-	VMOVL.U8	Q2, D4	   ; Q2 = __UU__QQ__MM__II__EE__AA__66__22  2,3
-	; Stall x2
-	VMIN.U16	Q9, Q10,Q9	; Q9 = MIN(|f|,MAX(2L-|f|))         1,4
-	; Now we need to correct for the sign of f.
-	; For negative elements of Q0, we want to subtract the appropriate
-	; element of Q9. For positive elements we want to add them. No NEON
-	; instruction exists to do this, so we need to negate the negative
-	; elements, and we can then just add them. a-b = a-(1+!b) = a-1+!b
-	; Stall x3
-	VADD.S16	Q9, Q9, Q0	;				    1,3
-	; Stall x2
-	VEOR.S16	Q9, Q9, Q0	; Q9 = real value of f              1,3
-	; Bah. No VRSBW.U8
-	; Stall (just 1 as Q9 not needed to second pipeline stage. I think.)
-	VADDW.U8	Q1, Q9, D2 ; Q1 = xxTTxxPPxxLLxxHHxxDDxx99xx55xx11  1,3
-	VSUB.S16	Q2, Q2, Q9 ; Q2 = xxUUxxQQxxMMxxIIxxEExxAAxx66xx22  1,3
-	VQMOVUN.S16	D2, Q1		; D2 = TTPPLLHHDD995511		    1,1
-	VQMOVUN.S16	D4, Q2		; D4 = UUQQMMIIEEAA6622		    1,1
-	VST1.64	{D2}, [r12@64], r1
-	VST1.64	{D4}, [r12@64], r1
-	MOV	PC,r14
-	ENDP
-
-oc_loop_filter_frag_rows_neon PROC
-	; r0 = _ref_frame_data
-	; r1 = _ystride
-	; r2 = _bv
-	; r3 = _frags
-	; r4 = _fragi0
-	; r5 = _fragi0_end
-	; r6 = _fragi_top
-	; r7 = _fragi_bot
-	; r8 = _frag_buf_offs
-	; r9 = _nhfrags
-	MOV	r12,r13
-	STMFD	r13!,{r0,r4-r11,r14}
-	LDMFD	r12,{r4-r9}
-	CMP	r4, r5		; if(_fragi0>=_fragi0_end)
-	BGE	oslffri_neon_end;   bail
-	SUBS	r9, r9, #1	; r9 = _nhfrags-1	if (r9<=0)
-	BLE	oslffri_neon_end	;		  bail
-	VLD1.64	{D30,D31}, [r2@128]	; Q15= 2L in U16s
-	ADD	r3, r3, r4, LSL #2	; r3 = &_frags[fragi]
-	ADD	r8, r8, r4, LSL #2	; r8 = &_frag_buf_offs[fragi]
-	SUB	r7, r7, r9	; _fragi_bot -= _nhfrags;
-oslffri_neon_lp1
-	MOV	r10,r4		; r10= fragi = _fragi0
-	ADD	r11,r4, r9	; r11= fragi_end-1=fragi+_nhfrags-1
-oslffri_neon_lp2
-	LDR	r14,[r3], #4	; r14= _frags[fragi]	_frags++
-	LDR	r0, [r13]	; r0 = _ref_frame_data
-	LDR	r12,[r8], #4	; r12= _frag_buf_offs[fragi]   _frag_buf_offs++
-	TST	r14,#OC_FRAG_CODED_FLAG
-	BEQ	oslffri_neon_uncoded
-	CMP	r10,r4		; if (fragi>_fragi0)
-	ADD	r0, r0, r12	; r0 = _ref_frame_data + _frag_buf_offs[fragi]
-	BLGT	loop_filter_h_neon
-	CMP	r4, r6		; if (_fragi0>_fragi_top)
-	BLGT	loop_filter_v_neon
-	CMP	r10,r11		; if(fragi+1<fragi_end)===(fragi<fragi_end-1)
-	LDRLT	r12,[r3]	; r12 = _frags[fragi+1]
-	ADD	r0, r0, #8
-	ADD	r10,r10,#1	; r10 = fragi+1;
-	ANDLT	r12,r12,#OC_FRAG_CODED_FLAG
-	CMPLT	r12,#OC_FRAG_CODED_FLAG	; && _frags[fragi+1].coded==0
-	BLLT	loop_filter_h_neon
-	CMP	r10,r7		; if (fragi<_fragi_bot)
-	LDRLT	r12,[r3, r9, LSL #2]	; r12 = _frags[fragi+1+_nhfrags-1]
-	SUB	r0, r0, #8
-	ADD	r0, r0, r1, LSL #3
-	ANDLT	r12,r12,#OC_FRAG_CODED_FLAG
-	CMPLT	r12,#OC_FRAG_CODED_FLAG
-	BLLT	loop_filter_v_neon
-	CMP	r10,r11		; while(fragi<=fragi_end-1)
-	BLE	oslffri_neon_lp2
-	MOV	r4, r10		; r4 = _fragi0 += _nhfrags
-	CMP	r4, r5
-	BLT	oslffri_neon_lp1
-oslffri_neon_end
-	LDMFD	r13!,{r0,r4-r11,PC}
-oslffri_neon_uncoded
-	ADD	r10,r10,#1
-	CMP	r10,r11
-	BLE	oslffri_neon_lp2
-	MOV	r4, r10		; r4 = _fragi0 += _nhfrags
-	CMP	r4, r5
-	BLT	oslffri_neon_lp1
-	LDMFD	r13!,{r0,r4-r11,PC}
-	ENDP
- ]
-
-	END
diff --git a/media/libtheora/lib/arm/armopts.s b/media/libtheora/lib/arm/armopts.s
deleted file mode 100644
index e4da429e4..000000000
--- a/media/libtheora/lib/arm/armopts.s
+++ /dev/null
@@ -1,39 +0,0 @@
-;********************************************************************
-;*                                                                  *
-;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
-;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
-;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
-;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
-;*                                                                  *
-;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
-;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
-;*                                                                  *
-;********************************************************************
-; Original implementation:
-;  Copyright (C) 2009 Robin Watts for Pinknoise Productions Ltd
-; last mod: $Id: armopts.s.in 17430 2010-09-22 21:54:09Z tterribe $
-;********************************************************************
-
-; Set the following to 1 if we have EDSP instructions
-;  (LDRD/STRD, etc., ARMv5E and later).
-OC_ARM_ASM_EDSP		*	1
-
-; Set the following to 1 if we have ARMv6 media instructions.
-OC_ARM_ASM_MEDIA	*	1
-
-; Set the following to 1 if we have NEON (some ARMv7)
-OC_ARM_ASM_NEON		*	1
-
-; Set the following to 1 if LDR/STR can work on unaligned addresses
-; This is assumed to be true for ARMv6 and later code
-OC_ARM_CAN_UNALIGN	*	0
-
-; Large unaligned loads and stores are often configured to cause an exception.
-; They cause an 8 cycle stall when they cross a 128-bit (load) or 64-bit (store)
-;  boundary, so it's usually a bad idea to use them anyway if they can be
-;  avoided.
-
-; Set the following to 1 if LDRD/STRD can work on unaligned addresses
-OC_ARM_CAN_UNALIGN_LDRD	*	0
-
-	END
diff --git a/media/libtheora/lib/arm/armstate.c b/media/libtheora/lib/arm/armstate.c
deleted file mode 100644
index a56060838..000000000
--- a/media/libtheora/lib/arm/armstate.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $
-
- ********************************************************************/
-#include "armint.h"
-
-#if defined(OC_ARM_ASM)
-
-# if defined(OC_ARM_ASM_NEON)
-/*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into
-   the destination.*/
-static const unsigned char OC_FZIG_ZAG_NEON[128]={
-   0, 8, 1, 2, 9,16,24,17,
-  10, 3, 4,11,18,25,32,40,
-  33,26,19,12, 5, 6,13,20,
-  27,34,41,48,56,49,42,35,
-  28,21,14, 7,15,22,29,36,
-  43,50,57,58,51,44,37,30,
-  23,31,38,45,52,59,60,53,
-  46,39,47,54,61,62,55,63,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64
-};
-# endif
-
-void oc_state_accel_init_arm(oc_theora_state *_state){
-  oc_state_accel_init_c(_state);
-  _state->cpu_flags=oc_cpu_flags_get();
-# if defined(OC_STATE_USE_VTABLE)
-  _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm;
-  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm;
-  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm;
-  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm;
-  _state->opt_vtable.idct8x8=oc_idct8x8_arm;
-  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm;
-  /*Note: We _must_ set this function pointer, because the macro in armint.h
-     calls it with different arguments, so the C version will segfault.*/
-  _state->opt_vtable.state_loop_filter_frag_rows=
-   (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm;
-# endif
-# if defined(OC_ARM_ASM_EDSP)
-  if(_state->cpu_flags&OC_CPU_ARM_EDSP){
-#  if defined(OC_STATE_USE_VTABLE)
-    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp;
-#  endif
-  }
-#  if defined(OC_ARM_ASM_MEDIA)
-  if(_state->cpu_flags&OC_CPU_ARM_MEDIA){
-#   if defined(OC_STATE_USE_VTABLE)
-    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6;
-    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6;
-    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6;
-    _state->opt_vtable.idct8x8=oc_idct8x8_v6;
-    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6;
-    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6;
-    _state->opt_vtable.state_loop_filter_frag_rows=
-     (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6;
-#   endif
-  }
-#   if defined(OC_ARM_ASM_NEON)
-  if(_state->cpu_flags&OC_CPU_ARM_NEON){
-#    if defined(OC_STATE_USE_VTABLE)
-    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon;
-    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon;
-    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon;
-    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon;
-    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon;
-    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon;
-    _state->opt_vtable.state_loop_filter_frag_rows=
-     (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon;
-    _state->opt_vtable.idct8x8=oc_idct8x8_neon;
-#    endif
-    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON;
-  }
-#   endif
-#  endif
-# endif
-}
-
-void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
-  unsigned char *dst;
-  ptrdiff_t      frag_buf_off;
-  int            ystride;
-  int            refi;
-  /*Apply the inverse transform.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    ogg_uint16_t p;
-    /*We round this dequant product (and not any of the others) because there's
-       no iDCT rounding.*/
-    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
-    oc_idct8x8_1_arm(_dct_coeffs+64,p);
-  }
-  else{
-    /*First, dequantize the DC coefficient.*/
-    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi);
-  }
-  /*Fill in the target buffer.*/
-  frag_buf_off=_state->frag_buf_offs[_fragi];
-  refi=_state->frags[_fragi].refi;
-  ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
-  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
-  else{
-    const unsigned char *ref;
-    int                  mvoffsets[2];
-    ref=_state->ref_frame_data[refi]+frag_buf_off;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi])>1){
-      oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
-       _dct_coeffs+64);
-    }
-    else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
-  }
-}
-
-# if defined(OC_ARM_ASM_MEDIA)
-void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
-  unsigned char *dst;
-  ptrdiff_t      frag_buf_off;
-  int            ystride;
-  int            refi;
-  /*Apply the inverse transform.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    ogg_uint16_t p;
-    /*We round this dequant product (and not any of the others) because there's
-       no iDCT rounding.*/
-    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
-    oc_idct8x8_1_v6(_dct_coeffs+64,p);
-  }
-  else{
-    /*First, dequantize the DC coefficient.*/
-    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi);
-  }
-  /*Fill in the target buffer.*/
-  frag_buf_off=_state->frag_buf_offs[_fragi];
-  refi=_state->frags[_fragi].refi;
-  ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
-  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
-  else{
-    const unsigned char *ref;
-    int                  mvoffsets[2];
-    ref=_state->ref_frame_data[refi]+frag_buf_off;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi])>1){
-      oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
-       _dct_coeffs+64);
-    }
-    else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
-  }
-}
-
-# if defined(OC_ARM_ASM_NEON)
-void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
-  unsigned char *dst;
-  ptrdiff_t      frag_buf_off;
-  int            ystride;
-  int            refi;
-  /*Apply the inverse transform.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    ogg_uint16_t p;
-    /*We round this dequant product (and not any of the others) because there's
-       no iDCT rounding.*/
-    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
-    oc_idct8x8_1_neon(_dct_coeffs+64,p);
-  }
-  else{
-    /*First, dequantize the DC coefficient.*/
-    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi);
-  }
-  /*Fill in the target buffer.*/
-  frag_buf_off=_state->frag_buf_offs[_fragi];
-  refi=_state->frags[_fragi].refi;
-  ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
-  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
-  else{
-    const unsigned char *ref;
-    int                  mvoffsets[2];
-    ref=_state->ref_frame_data[refi]+frag_buf_off;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi])>1){
-      oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
-       _dct_coeffs+64);
-    }
-    else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
-  }
-}
-#  endif
-# endif
-
-#endif
diff --git a/media/libtheora/lib/bitpack.c b/media/libtheora/lib/bitpack.c
deleted file mode 100644
index 8bfce4c3d..000000000
--- a/media/libtheora/lib/bitpack.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function: packing variable sized words into an octet stream
-  last mod: $Id: bitpack.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-#include <string.h>
-#include <stdlib.h>
-#include "bitpack.h"
-
-/*We're 'MSb' endian; if we write a word but read individual bits,
-   then we'll read the MSb first.*/
-
-void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
-  memset(_b,0,sizeof(*_b));
-  _b->ptr=_buf;
-  _b->stop=_buf+_bytes;
-}
-
-static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
-  const unsigned char *ptr;
-  const unsigned char *stop;
-  oc_pb_window         window;
-  int                  available;
-  unsigned             shift;
-  stop=_b->stop;
-  ptr=_b->ptr;
-  window=_b->window;
-  available=_b->bits;
-  shift=OC_PB_WINDOW_SIZE-available;
-  while(7<shift&&ptr<stop){
-    shift-=8;
-    window|=(oc_pb_window)*ptr++<<shift;
-  }
-  _b->ptr=ptr;
-  available=OC_PB_WINDOW_SIZE-shift;
-  if(_bits>available){
-    if(ptr>=stop){
-      _b->eof=1;
-      available=OC_LOTS_OF_BITS;
-    }
-    else window|=*ptr>>(available&7);
-  }
-  _b->bits=available;
-  return window;
-}
-
-int oc_pack_look1(oc_pack_buf *_b){
-  oc_pb_window window;
-  int          available;
-  window=_b->window;
-  available=_b->bits;
-  if(available<1)_b->window=window=oc_pack_refill(_b,1);
-  return window>>OC_PB_WINDOW_SIZE-1;
-}
-
-void oc_pack_adv1(oc_pack_buf *_b){
-  _b->window<<=1;
-  _b->bits--;
-}
-
-/*Here we assume that 0<=_bits&&_bits<=32.*/
-long oc_pack_read_c(oc_pack_buf *_b,int _bits){
-  oc_pb_window window;
-  int          available;
-  long         result;
-  window=_b->window;
-  available=_b->bits;
-  if(_bits==0)return 0;
-  if(available<_bits){
-    window=oc_pack_refill(_b,_bits);
-    available=_b->bits;
-  }
-  result=window>>OC_PB_WINDOW_SIZE-_bits;
-  available-=_bits;
-  window<<=1;
-  window<<=_bits-1;
-  _b->window=window;
-  _b->bits=available;
-  return result;
-}
-
-int oc_pack_read1_c(oc_pack_buf *_b){
-  oc_pb_window window;
-  int          available;
-  int          result;
-  window=_b->window;
-  available=_b->bits;
-  if(available<1){
-    window=oc_pack_refill(_b,1);
-    available=_b->bits;
-  }
-  result=window>>OC_PB_WINDOW_SIZE-1;
-  available--;
-  window<<=1;
-  _b->window=window;
-  _b->bits=available;
-  return result;
-}
-
-long oc_pack_bytes_left(oc_pack_buf *_b){
-  if(_b->eof)return -1;
-  return _b->stop-_b->ptr+(_b->bits>>3);
-}
diff --git a/media/libtheora/lib/bitpack.h b/media/libtheora/lib/bitpack.h
deleted file mode 100644
index 237b58405..000000000
--- a/media/libtheora/lib/bitpack.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function: packing variable sized words into an octet stream
-  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
-
- ********************************************************************/
-#if !defined(_bitpack_H)
-# define _bitpack_H (1)
-# include <stddef.h>
-# include <limits.h>
-# include "internal.h"
-
-
-
-typedef size_t             oc_pb_window;
-typedef struct oc_pack_buf oc_pack_buf;
-
-
-
-/*Custom bitpacker implementations.*/
-# if defined(OC_ARM_ASM)
-#  include "arm/armbits.h"
-# endif
-
-# if !defined(oc_pack_read)
-#  define oc_pack_read oc_pack_read_c
-# endif
-# if !defined(oc_pack_read1)
-#  define oc_pack_read1 oc_pack_read1_c
-# endif
-# if !defined(oc_huff_token_decode)
-#  define oc_huff_token_decode oc_huff_token_decode_c
-# endif
-
-# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
-/*This is meant to be a large, positive constant that can still be efficiently
-   loaded as an immediate (on platforms like ARM, for example).
-  Even relatively modest values like 100 would work fine.*/
-# define OC_LOTS_OF_BITS (0x40000000)
-
-
-
-struct oc_pack_buf{
-  const unsigned char *stop;
-  const unsigned char *ptr;
-  oc_pb_window         window;
-  int                  bits;
-  int                  eof;
-};
-
-void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
-int oc_pack_look1(oc_pack_buf *_b);
-void oc_pack_adv1(oc_pack_buf *_b);
-/*Here we assume 0<=_bits&&_bits<=32.*/
-long oc_pack_read_c(oc_pack_buf *_b,int _bits);
-int oc_pack_read1_c(oc_pack_buf *_b);
-/* returns -1 for read beyond EOF, or the number of whole bytes available */
-long oc_pack_bytes_left(oc_pack_buf *_b);
-
-/*These two functions are implemented locally in huffdec.c*/
-/*Read in bits without advancing the bitptr.
-  Here we assume 0<=_bits&&_bits<=32.*/
-/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
-/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
-
-#endif
diff --git a/media/libtheora/lib/config.h b/media/libtheora/lib/config.h
deleted file mode 100644
index 49772ac7f..000000000
--- a/media/libtheora/lib/config.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/* config.h.  Generated from config.h.in by configure.  */
-/* config.h.in.  Generated from configure.ac by autoheader.  */
-
-/* libcairo is available for visual debugging output */
-/* #undef HAVE_CAIRO */
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#define HAVE_DLFCN_H 1
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
-
-/* Define to 1 if you have the <machine/soundcard.h> header file. */
-/* #undef HAVE_MACHINE_SOUNDCARD_H */
-
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
-/* Define to 1 if you have the <soundcard.h> header file. */
-/* #undef HAVE_SOUNDCARD_H */
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if you have the <sys/soundcard.h> header file. */
-#define HAVE_SYS_SOUNDCARD_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
-
-/* Define to 1 if your C compiler doesn't accept -c and -o together. */
-/* #undef NO_MINUS_C_MINUS_O */
-
-/* make use of arm asm optimization */
- 
-
-/* Define if assembler supports EDSP instructions */
-
-
-/* Define if assembler supports ARMv6 media instructions */
-
-
-/* Define if compiler supports NEON instructions */
-
-
-/* make use of c64x+ asm optimization */
-/* #undef OC_C64X_ASM */
-
-/* make use of x86_64 asm optimization */
-/* #undef OC_X86_64_ASM */
-
-/* make use of x86 asm optimization */
-/* #undef OC_X86_ASM */
-
-/* Name of package */
-#define PACKAGE "libtheora"
-
-/* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT ""
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME "libtheora"
-
-/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "libtheora 1.2.0alpha1+svn"
-
-/* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "libtheora"
-
-/* Define to the version of this package. */
-#define PACKAGE_VERSION "1.2.0alpha1+svn"
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
-
-/* Define to exclude encode support from the build */
-/* #undef THEORA_DISABLE_ENCODE */
-
-/* Define to exclude floating point code from the build */
-/* #undef THEORA_DISABLE_FLOAT */
-
-/* Version number of package */
-#define VERSION "1.2.0alpha1+svn"
diff --git a/media/libtheora/lib/dct.h b/media/libtheora/lib/dct.h
deleted file mode 100644
index 24ba6f111..000000000
--- a/media/libtheora/lib/dct.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-/*Definitions shared by the forward and inverse DCT transforms.*/
-#if !defined(_dct_H)
-# define _dct_H (1)
-
-/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/
-#define OC_C1S7 ((ogg_int32_t)64277)
-#define OC_C2S6 ((ogg_int32_t)60547)
-#define OC_C3S5 ((ogg_int32_t)54491)
-#define OC_C4S4 ((ogg_int32_t)46341)
-#define OC_C5S3 ((ogg_int32_t)36410)
-#define OC_C6S2 ((ogg_int32_t)25080)
-#define OC_C7S1 ((ogg_int32_t)12785)
-
-#endif
diff --git a/media/libtheora/lib/decapiwrapper.c b/media/libtheora/lib/decapiwrapper.c
deleted file mode 100644
index 12ea475d1..000000000
--- a/media/libtheora/lib/decapiwrapper.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include "apiwrapper.h"
-#include "decint.h"
-#include "theora/theoradec.h"
-
-static void th_dec_api_clear(th_api_wrapper *_api){
-  if(_api->setup)th_setup_free(_api->setup);
-  if(_api->decode)th_decode_free(_api->decode);
-  memset(_api,0,sizeof(*_api));
-}
-
-static void theora_decode_clear(theora_state *_td){
-  if(_td->i!=NULL)theora_info_clear(_td->i);
-  memset(_td,0,sizeof(*_td));
-}
-
-static int theora_decode_control(theora_state *_td,int _req,
- void *_buf,size_t _buf_sz){
-  return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode,
-   _req,_buf,_buf_sz);
-}
-
-static ogg_int64_t theora_decode_granule_frame(theora_state *_td,
- ogg_int64_t _gp){
-  return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
-}
-
-static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
-  return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
-}
-
-static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
-  (oc_state_clear_func)theora_decode_clear,
-  (oc_state_control_func)theora_decode_control,
-  (oc_state_granule_frame_func)theora_decode_granule_frame,
-  (oc_state_granule_time_func)theora_decode_granule_time,
-};
-
-static void th_info2theora_info(theora_info *_ci,const th_info *_info){
-  _ci->version_major=_info->version_major;
-  _ci->version_minor=_info->version_minor;
-  _ci->version_subminor=_info->version_subminor;
-  _ci->width=_info->frame_width;
-  _ci->height=_info->frame_height;
-  _ci->frame_width=_info->pic_width;
-  _ci->frame_height=_info->pic_height;
-  _ci->offset_x=_info->pic_x;
-  _ci->offset_y=_info->pic_y;
-  _ci->fps_numerator=_info->fps_numerator;
-  _ci->fps_denominator=_info->fps_denominator;
-  _ci->aspect_numerator=_info->aspect_numerator;
-  _ci->aspect_denominator=_info->aspect_denominator;
-  switch(_info->colorspace){
-    case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
-    case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
-    default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
-  }
-  switch(_info->pixel_fmt){
-    case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
-    case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
-    case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
-    default:_ci->pixelformat=OC_PF_RSVD;
-  }
-  _ci->target_bitrate=_info->target_bitrate;
-  _ci->quality=_info->quality;
-  _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
-}
-
-int theora_decode_init(theora_state *_td,theora_info *_ci){
-  th_api_info    *apiinfo;
-  th_api_wrapper *api;
-  th_info         info;
-  api=(th_api_wrapper *)_ci->codec_setup;
-  /*Allocate our own combined API wrapper/theora_info struct.
-    We put them both in one malloc'd block so that when the API wrapper is
-     freed, the info struct goes with it.
-    This avoids having to figure out whether or not we need to free the info
-     struct in either theora_info_clear() or theora_clear().*/
-  apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
-  if(apiinfo==NULL)return OC_FAULT;
-  /*Make our own copy of the info struct, since its lifetime should be
-     independent of the one we were passed in.*/
-  *&apiinfo->info=*_ci;
-  /*Convert the info struct now instead of saving the the one we decoded with
-     theora_decode_header(), since the user might have modified values (i.e.,
-     color space, aspect ratio, etc. can be specified from a higher level).
-    The user also might be doing something "clever" with the header packets if
-     they are not using an Ogg encapsulation.*/
-  oc_theora_info2th_info(&info,_ci);
-  /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy
-     of the stuff it needs.*/
-  apiinfo->api.decode=th_decode_alloc(&info,api->setup);
-  if(apiinfo->api.decode==NULL){
-    _ogg_free(apiinfo);
-    return OC_EINVAL;
-  }
-  apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear;
-  _td->internal_encode=NULL;
-  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
-  _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL;
-  _td->granulepos=0;
-  _td->i=&apiinfo->info;
-  _td->i->codec_setup=&apiinfo->api;
-  return 0;
-}
-
-int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
-  th_api_wrapper *api;
-  th_info         info;
-  int             ret;
-  api=(th_api_wrapper *)_ci->codec_setup;
-  /*Allocate an API wrapper struct on demand, since it will not also include a
-     theora_info struct like the ones that are used in a theora_state struct.*/
-  if(api==NULL){
-    _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
-    if(_ci->codec_setup==NULL)return OC_FAULT;
-    api=(th_api_wrapper *)_ci->codec_setup;
-    api->clear=(oc_setup_clear_func)th_dec_api_clear;
-  }
-  /*Convert from the theora_info struct instead of saving our own th_info
-     struct between calls.
-    The user might be doing something "clever" with the header packets if they
-     are not using an Ogg encapsulation, and we don't want to break this.*/
-  oc_theora_info2th_info(&info,_ci);
-  /*We rely on the fact that theora_comment and th_comment structures are
-     actually identical.
-    Take care not to change this fact unless you change the code here as
-     well!*/
-  ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op);
-  /*We also rely on the fact that the error return code values are the same,
-    and that the implementations of these two functions return the same set of
-    them.
-   Note that theora_decode_header() really can return OC_NOTFORMAT, even
-    though it is not currently documented to do so.*/
-  if(ret<0)return ret;
-  th_info2theora_info(_ci,&info);
-  return 0;
-}
-
-int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
-  th_api_wrapper *api;
-  ogg_int64_t     gp;
-  int             ret;
-  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
-  api=(th_api_wrapper *)_td->i->codec_setup;
-  ret=th_decode_packetin(api->decode,_op,&gp);
-  if(ret<0)return OC_BADPACKET;
-  _td->granulepos=gp;
-  return 0;
-}
-
-int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
-  th_api_wrapper  *api;
-  th_dec_ctx      *decode;
-  th_ycbcr_buffer  buf;
-  int              ret;
-  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
-  api=(th_api_wrapper *)_td->i->codec_setup;
-  decode=(th_dec_ctx *)api->decode;
-  if(!decode)return OC_FAULT;
-  ret=th_decode_ycbcr_out(decode,buf);
-  if(ret>=0){
-    _yuv->y_width=buf[0].width;
-    _yuv->y_height=buf[0].height;
-    _yuv->y_stride=buf[0].stride;
-    _yuv->uv_width=buf[1].width;
-    _yuv->uv_height=buf[1].height;
-    _yuv->uv_stride=buf[1].stride;
-    _yuv->y=buf[0].data;
-    _yuv->u=buf[1].data;
-    _yuv->v=buf[2].data;
-  }
-  return ret;
-}
diff --git a/media/libtheora/lib/decinfo.c b/media/libtheora/lib/decinfo.c
deleted file mode 100644
index 603b1f93e..000000000
--- a/media/libtheora/lib/decinfo.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: decinfo.c 17276 2010-06-05 05:57:05Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include "decint.h"
-
-
-
-/*Unpacks a series of octets from a given byte array into the pack buffer.
-  No checking is done to ensure the buffer contains enough data.
-  _opb: The pack buffer to read the octets from.
-  _buf: The byte array to store the unpacked bytes in.
-  _len: The number of octets to unpack.*/
-static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
-  while(_len-->0){
-    long val;
-    val=oc_pack_read(_opb,8);
-    *_buf++=(char)val;
-  }
-}
-
-/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
-static long oc_unpack_length(oc_pack_buf *_opb){
-  long ret[4];
-  int  i;
-  for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
-  return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
-}
-
-static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
-  long val;
-  /*Check the codec bitstream version.*/
-  val=oc_pack_read(_opb,8);
-  _info->version_major=(unsigned char)val;
-  val=oc_pack_read(_opb,8);
-  _info->version_minor=(unsigned char)val;
-  val=oc_pack_read(_opb,8);
-  _info->version_subminor=(unsigned char)val;
-  /*verify we can parse this bitstream version.
-     We accept earlier minors and all subminors, by spec*/
-  if(_info->version_major>TH_VERSION_MAJOR||
-   _info->version_major==TH_VERSION_MAJOR&&
-   _info->version_minor>TH_VERSION_MINOR){
-    return TH_EVERSION;
-  }
-  /*Read the encoded frame description.*/
-  val=oc_pack_read(_opb,16);
-  _info->frame_width=(ogg_uint32_t)val<<4;
-  val=oc_pack_read(_opb,16);
-  _info->frame_height=(ogg_uint32_t)val<<4;
-  val=oc_pack_read(_opb,24);
-  _info->pic_width=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,24);
-  _info->pic_height=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,8);
-  _info->pic_x=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,8);
-  _info->pic_y=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,32);
-  _info->fps_numerator=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,32);
-  _info->fps_denominator=(ogg_uint32_t)val;
-  if(_info->frame_width==0||_info->frame_height==0||
-   _info->pic_width+_info->pic_x>_info->frame_width||
-   _info->pic_height+_info->pic_y>_info->frame_height||
-   _info->fps_numerator==0||_info->fps_denominator==0){
-    return TH_EBADHEADER;
-  }
-  /*Note: The sense of pic_y is inverted in what we pass back to the
-     application compared to how it is stored in the bitstream.
-    This is because the bitstream uses a right-handed coordinate system, while
-     applications expect a left-handed one.*/
-  _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
-  val=oc_pack_read(_opb,24);
-  _info->aspect_numerator=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,24);
-  _info->aspect_denominator=(ogg_uint32_t)val;
-  val=oc_pack_read(_opb,8);
-  _info->colorspace=(th_colorspace)val;
-  val=oc_pack_read(_opb,24);
-  _info->target_bitrate=(int)val;
-  val=oc_pack_read(_opb,6);
-  _info->quality=(int)val;
-  val=oc_pack_read(_opb,5);
-  _info->keyframe_granule_shift=(int)val;
-  val=oc_pack_read(_opb,2);
-  _info->pixel_fmt=(th_pixel_fmt)val;
-  if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
-  val=oc_pack_read(_opb,3);
-  if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
-  return 0;
-}
-
-static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
-  long len;
-  int  i;
-  /*Read the vendor string.*/
-  len=oc_unpack_length(_opb);
-  if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
-  _tc->vendor=_ogg_malloc((size_t)len+1);
-  if(_tc->vendor==NULL)return TH_EFAULT;
-  oc_unpack_octets(_opb,_tc->vendor,len);
-  _tc->vendor[len]='\0';
-  /*Read the user comments.*/
-  _tc->comments=(int)oc_unpack_length(_opb);
-  len=_tc->comments;
-  if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
-    _tc->comments=0;
-    return TH_EBADHEADER;
-  }
-  _tc->comment_lengths=(int *)_ogg_malloc(
-   _tc->comments*sizeof(_tc->comment_lengths[0]));
-  _tc->user_comments=(char **)_ogg_malloc(
-   _tc->comments*sizeof(_tc->user_comments[0]));
-  if(_tc->comment_lengths==NULL||_tc->user_comments==NULL){
-    _tc->comments=0;
-    return TH_EFAULT;
-  }
-  for(i=0;i<_tc->comments;i++){
-    len=oc_unpack_length(_opb);
-    if(len<0||len>oc_pack_bytes_left(_opb)){
-      _tc->comments=i;
-      return TH_EBADHEADER;
-    }
-    _tc->comment_lengths[i]=len;
-    _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
-    if(_tc->user_comments[i]==NULL){
-      _tc->comments=i;
-      return TH_EFAULT;
-    }
-    oc_unpack_octets(_opb,_tc->user_comments[i],len);
-    _tc->user_comments[i][len]='\0';
-  }
-  return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
-}
-
-static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
-  int ret;
-  /*Read the quantizer tables.*/
-  ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
-  if(ret<0)return ret;
-  /*Read the Huffman trees.*/
-  return oc_huff_trees_unpack(_opb,_setup->huff_tables);
-}
-
-static void oc_setup_clear(th_setup_info *_setup){
-  oc_quant_params_clear(&_setup->qinfo);
-  oc_huff_trees_clear(_setup->huff_tables);
-}
-
-static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
- th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
-  char buffer[6];
-  long val;
-  int  packtype;
-  int  ret;
-  val=oc_pack_read(_opb,8);
-  packtype=(int)val;
-  /*If we're at a data packet and we have received all three headers, we're
-     done.*/
-  if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){
-    return 0;
-  }
-  /*Check the codec string.*/
-  oc_unpack_octets(_opb,buffer,6);
-  if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT;
-  switch(packtype){
-    /*Codec info header.*/
-    case 0x80:{
-      /*This should be the first packet, and we should not already be
-         initialized.*/
-      if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER;
-      ret=oc_info_unpack(_opb,_info);
-      if(ret<0)th_info_clear(_info);
-      else ret=3;
-    }break;
-    /*Comment header.*/
-    case 0x81:{
-      if(_tc==NULL)return TH_EFAULT;
-      /*We shoud have already decoded the info header, and should not yet have
-         decoded the comment header.*/
-      if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER;
-      ret=oc_comment_unpack(_opb,_tc);
-      if(ret<0)th_comment_clear(_tc);
-      else ret=2;
-    }break;
-    /*Codec setup header.*/
-    case 0x82:{
-      oc_setup_info *setup;
-      if(_tc==NULL||_setup==NULL)return TH_EFAULT;
-      /*We should have already decoded the info header and the comment header,
-         and should not yet have decoded the setup header.*/
-      if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){
-        return TH_EBADHEADER;
-      }
-      setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
-      if(setup==NULL)return TH_EFAULT;
-      ret=oc_setup_unpack(_opb,setup);
-      if(ret<0){
-        oc_setup_clear(setup);
-        _ogg_free(setup);
-      }
-      else{
-        *_setup=setup;
-        ret=1;
-      }
-    }break;
-    default:{
-      /*We don't know what this header is.*/
-      return TH_EBADHEADER;
-    }break;
-  }
-  return ret;
-}
-
-
-/*Decodes one header packet.
-  This should be called repeatedly with the packets at the beginning of the
-   stream until it returns 0.*/
-int th_decode_headerin(th_info *_info,th_comment *_tc,
- th_setup_info **_setup,ogg_packet *_op){
-  oc_pack_buf opb;
-  if(_op==NULL)return TH_EBADHEADER;
-  if(_info==NULL)return TH_EFAULT;
-  oc_pack_readinit(&opb,_op->packet,_op->bytes);
-  return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
-}
-
-void th_setup_free(th_setup_info *_setup){
-  if(_setup!=NULL){
-    oc_setup_clear(_setup);
-    _ogg_free(_setup);
-  }
-}
diff --git a/media/libtheora/lib/decint.h b/media/libtheora/lib/decint.h
deleted file mode 100644
index bd6522273..000000000
--- a/media/libtheora/lib/decint.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: decint.h 17457 2010-09-24 02:05:49Z tterribe $
-
- ********************************************************************/
-
-#include <limits.h>
-#if !defined(_decint_H)
-# define _decint_H (1)
-# include "theora/theoradec.h"
-# include "state.h"
-# include "bitpack.h"
-# include "huffdec.h"
-# include "dequant.h"
-
-typedef struct th_setup_info         oc_setup_info;
-typedef struct oc_dec_opt_vtable     oc_dec_opt_vtable;
-typedef struct oc_dec_pipeline_state oc_dec_pipeline_state;
-typedef struct th_dec_ctx            oc_dec_ctx;
-
-
-
-/*Decoder-specific accelerated functions.*/
-# if defined(OC_C64X_ASM)
-#  include "c64x/c64xdec.h"
-# endif
-
-# if !defined(oc_dec_accel_init)
-#  define oc_dec_accel_init oc_dec_accel_init_c
-# endif
-# if defined(OC_DEC_USE_VTABLE)
-#  if !defined(oc_dec_dc_unpredict_mcu_plane)
-#   define oc_dec_dc_unpredict_mcu_plane(_dec,_pipe,_pli) \
- ((*(_dec)->opt_vtable.dc_unpredict_mcu_plane)(_dec,_pipe,_pli))
-#  endif
-# else
-#  if !defined(oc_dec_dc_unpredict_mcu_plane)
-#   define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c
-#  endif
-# endif
-
-
-
-/*Constants for the packet-in state machine specific to the decoder.*/
-
-/*Next packet to read: Data packet.*/
-#define OC_PACKET_DATA (0)
-
-
-
-struct th_setup_info{
-  /*The Huffman codes.*/
-  ogg_int16_t   *huff_tables[TH_NHUFFMAN_TABLES];
-  /*The quantization parameters.*/
-  th_quant_info  qinfo;
-};
-
-
-
-/*Decoder specific functions with accelerated variants.*/
-struct oc_dec_opt_vtable{
-  void (*dc_unpredict_mcu_plane)(oc_dec_ctx *_dec,
-   oc_dec_pipeline_state *_pipe,int _pli);
-};
-
-
-
-struct oc_dec_pipeline_state{
-  /*Decoded DCT coefficients.
-    These are placed here instead of on the stack so that they can persist
-     between blocks, which makes clearing them back to zero much faster when
-     only a few non-zero coefficients were decoded.
-    It requires at least 65 elements because the zig-zag index array uses the
-     65th element as a dumping ground for out-of-range indices to protect us
-     from buffer overflow.
-    We make it fully twice as large so that the second half can serve as the
-     reconstruction buffer, which saves passing another parameter to all the
-     acceleration functios.
-    It also solves problems with 16-byte alignment for NEON on ARM.
-    gcc (as of 4.2.1) only seems to be able to give stack variables 8-byte
-     alignment, and silently produces incorrect results if you ask for 16.
-    Finally, keeping it off the stack means there's less likely to be a data
-     hazard beween the NEON co-processor and the regular ARM core, which avoids
-     unnecessary stalls.*/
-  OC_ALIGN16(ogg_int16_t dct_coeffs[128]);
-  OC_ALIGN16(signed char bounding_values[256]);
-  ptrdiff_t           ti[3][64];
-  ptrdiff_t           ebi[3][64];
-  ptrdiff_t           eob_runs[3][64];
-  const ptrdiff_t    *coded_fragis[3];
-  const ptrdiff_t    *uncoded_fragis[3];
-  ptrdiff_t           ncoded_fragis[3];
-  ptrdiff_t           nuncoded_fragis[3];
-  const ogg_uint16_t *dequant[3][3][2];
-  int                 fragy0[3];
-  int                 fragy_end[3];
-  int                 pred_last[3][4];
-  int                 mcu_nvfrags;
-  int                 loop_filter;
-  int                 pp_level;
-};
-
-
-struct th_dec_ctx{
-  /*Shared encoder/decoder state.*/
-  oc_theora_state        state;
-  /*Whether or not packets are ready to be emitted.
-    This takes on negative values while there are remaining header packets to
-     be emitted, reaches 0 when the codec is ready for input, and goes to 1
-     when a frame has been processed and a data packet is ready.*/
-  int                    packet_state;
-  /*Buffer in which to assemble packets.*/
-  oc_pack_buf            opb;
-  /*Huffman decode trees.*/
-  ogg_int16_t           *huff_tables[TH_NHUFFMAN_TABLES];
-  /*The index of the first token in each plane for each coefficient.*/
-  ptrdiff_t              ti0[3][64];
-  /*The number of outstanding EOB runs at the start of each coefficient in each
-     plane.*/
-  ptrdiff_t              eob_runs[3][64];
-  /*The DCT token lists.*/
-  unsigned char         *dct_tokens;
-  /*The extra bits associated with DCT tokens.*/
-  unsigned char         *extra_bits;
-  /*The number of dct tokens unpacked so far.*/
-  int                    dct_tokens_count;
-  /*The out-of-loop post-processing level.*/
-  int                    pp_level;
-  /*The DC scale used for out-of-loop deblocking.*/
-  int                    pp_dc_scale[64];
-  /*The sharpen modifier used for out-of-loop deringing.*/
-  int                    pp_sharp_mod[64];
-  /*The DC quantization index of each block.*/
-  unsigned char         *dc_qis;
-  /*The variance of each block.*/
-  int                   *variances;
-  /*The storage for the post-processed frame buffer.*/
-  unsigned char         *pp_frame_data;
-  /*Whether or not the post-processsed frame buffer has space for chroma.*/
-  int                    pp_frame_state;
-  /*The buffer used for the post-processed frame.
-    Note that this is _not_ guaranteed to have the same strides and offsets as
-     the reference frame buffers.*/
-  th_ycbcr_buffer        pp_frame_buf;
-  /*The striped decode callback function.*/
-  th_stripe_callback     stripe_cb;
-  oc_dec_pipeline_state  pipe;
-# if defined(OC_DEC_USE_VTABLE)
-  /*Table for decoder acceleration functions.*/
-  oc_dec_opt_vtable      opt_vtable;
-# endif
-# if defined(HAVE_CAIRO)
-  /*Output metrics for debugging.*/
-  int                    telemetry;
-  int                    telemetry_mbmode;
-  int                    telemetry_mv;
-  int                    telemetry_qi;
-  int                    telemetry_bits;
-  int                    telemetry_frame_bytes;
-  int                    telemetry_coding_bytes;
-  int                    telemetry_mode_bytes;
-  int                    telemetry_mv_bytes;
-  int                    telemetry_qi_bytes;
-  int                    telemetry_dc_bytes;
-  unsigned char         *telemetry_frame_data;
-# endif
-};
-
-/*Default pure-C implementations of decoder-specific accelerated functions.*/
-void oc_dec_accel_init_c(oc_dec_ctx *_dec);
-
-void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli);
-
-#endif
diff --git a/media/libtheora/lib/decode.c b/media/libtheora/lib/decode.c
deleted file mode 100644
index 563782b7a..000000000
--- a/media/libtheora/lib/decode.c
+++ /dev/null
@@ -1,2963 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: decode.c 17576 2010-10-29 01:07:51Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ogg/ogg.h>
-#include "decint.h"
-#if defined(OC_DUMP_IMAGES)
-# include <stdio.h>
-# include "png.h"
-#endif
-#if defined(HAVE_CAIRO)
-# include <cairo.h>
-#endif
-
-
-/*No post-processing.*/
-#define OC_PP_LEVEL_DISABLED  (0)
-/*Keep track of DC qi for each block only.*/
-#define OC_PP_LEVEL_TRACKDCQI (1)
-/*Deblock the luma plane.*/
-#define OC_PP_LEVEL_DEBLOCKY  (2)
-/*Dering the luma plane.*/
-#define OC_PP_LEVEL_DERINGY   (3)
-/*Stronger luma plane deringing.*/
-#define OC_PP_LEVEL_SDERINGY  (4)
-/*Deblock the chroma planes.*/
-#define OC_PP_LEVEL_DEBLOCKC  (5)
-/*Dering the chroma planes.*/
-#define OC_PP_LEVEL_DERINGC   (6)
-/*Stronger chroma plane deringing.*/
-#define OC_PP_LEVEL_SDERINGC  (7)
-/*Maximum valid post-processing level.*/
-#define OC_PP_LEVEL_MAX       (7)
-
-
-
-/*The mode alphabets for the various mode coding schemes.
-  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
-static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
-  /*Last MV dominates */
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
-    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
-    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
-    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
-    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
-    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
-  },
-  /*No MV dominates.*/
-  {
-    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
-    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
-    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  /*Default ordering.*/
-  {
-    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
-    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  }
-};
-
-
-/*The original DCT tokens are extended and reordered during the construction of
-   the Huffman tables.
-  The extension means more bits can be read with fewer calls to the bitpacker
-   during the Huffman decoding process (at the cost of larger Huffman tables),
-   and fewer tokens require additional extra bits (reducing the average storage
-   per decoded token).
-  The revised ordering reveals essential information in the token value
-   itself; specifically, whether or not there are additional extra bits to read
-   and the parameter to which those extra bits are applied.
-  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
-  The extra bits are added into code word at the bit position inferred from the
-   token value, giving the final code word from which all required parameters
-   are derived.
-  The number of EOBs and the leading zero run length can be extracted directly.
-  The coefficient magnitude is optionally negated before extraction, according
-   to a 'flip' bit.*/
-
-/*The number of additional extra bits that are decoded with each of the
-   internal DCT tokens.*/
-static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
-  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
-};
-
-/*Whether or not an internal token needs any additional extra bits.*/
-#define OC_DCT_TOKEN_NEEDS_MORE(token) \
- (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
-  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
-
-/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
-#define OC_DCT_TOKEN_FAT_EOB (0)
-
-/*The number of EOBs to use for an end-of-frame token.
-  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
-   is not yet available everywhere; this should be equivalent.*/
-#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
-
-/*The location of the (6) run length bits in the code word.
-  These are placed at index 0 and given 8 bits (even though 6 would suffice)
-   because it may be faster to extract the lower byte on some platforms.*/
-#define OC_DCT_CW_RLEN_SHIFT (0)
-/*The location of the (12) EOB bits in the code word.*/
-#define OC_DCT_CW_EOB_SHIFT  (8)
-/*The location of the (1) flip bit in the code word.
-  This must be right under the magnitude bits.*/
-#define OC_DCT_CW_FLIP_BIT   (20)
-/*The location of the (11) token magnitude bits in the code word.
-  These must be last, and rely on a sign-extending right shift.*/
-#define OC_DCT_CW_MAG_SHIFT  (21)
-
-/*Pack the given fields into a code word.*/
-#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
- ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
- (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
- (_flip)<<OC_DCT_CW_FLIP_BIT| \
- (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
-
-/*A special code word value that signals the end of the frame (a long EOB run
-   of zero).*/
-#define OC_DCT_CW_FINISH (0)
-
-/*The position at which to insert the extra bits in the code word.
-  We use this formulation because Intel has no useful cmov.
-  A real architecture would probably do better with two of those.
-  This translates to 11 instructions(!), and is _still_ faster than either a
-   table lookup (just barely) or the naive double-ternary implementation (which
-   gcc translates to a jump and a cmov).
-  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
-   you want to make one of the other shifts zero.*/
-#define OC_DCT_TOKEN_EB_POS(_token) \
- ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
- +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
-
-/*The code words for each internal token.
-  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
-   order.*/
-static const ogg_int32_t OC_DCT_CODE_WORD[92]={
-  /*These tokens require additional extra bits for the EOB count.*/
-  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
-  OC_DCT_CW_FINISH,
-  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
-  OC_DCT_CW_PACK(16, 0,  0,0),
-  /*These tokens require additional extra bits for the magnitude.*/
-  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
-  OC_DCT_CW_PACK( 0, 0, 13,0),
-  OC_DCT_CW_PACK( 0, 0, 13,1),
-  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
-  OC_DCT_CW_PACK( 0, 0, 21,0),
-  OC_DCT_CW_PACK( 0, 0, 21,1),
-  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
-  OC_DCT_CW_PACK( 0, 0, 37,0),
-  OC_DCT_CW_PACK( 0, 0, 37,1),
-  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
-  OC_DCT_CW_PACK( 0, 0, 69,0),
-  OC_DCT_CW_PACK( 0, 0,325,0),
-  OC_DCT_CW_PACK( 0, 0, 69,1),
-  OC_DCT_CW_PACK( 0, 0,325,1),
-  /*These tokens require additional extra bits for the run length.*/
-  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
-  OC_DCT_CW_PACK( 0,10, +1,0),
-  OC_DCT_CW_PACK( 0,10, -1,0),
-  /*OC_DCT_ZRL_TOKEN (6 extra bits)
-    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
-  OC_DCT_CW_PACK( 0, 0,  0,1),
-  /*The remaining tokens require no additional extra bits.*/
-  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 1, 0,  0,0),
-  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 2, 0,  0,0),
-  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 3, 0,  0,0),
-  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
-  OC_DCT_CW_PACK( 0, 1, +1,0),
-  OC_DCT_CW_PACK( 0, 1, -1,0),
-  OC_DCT_CW_PACK( 0, 2, +1,0),
-  OC_DCT_CW_PACK( 0, 2, -1,0),
-  OC_DCT_CW_PACK( 0, 3, +1,0),
-  OC_DCT_CW_PACK( 0, 3, -1,0),
-  OC_DCT_CW_PACK( 0, 4, +1,0),
-  OC_DCT_CW_PACK( 0, 4, -1,0),
-  OC_DCT_CW_PACK( 0, 5, +1,0),
-  OC_DCT_CW_PACK( 0, 5, -1,0),
-  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
-  OC_DCT_CW_PACK( 0, 1, +2,0),
-  OC_DCT_CW_PACK( 0, 1, +3,0),
-  OC_DCT_CW_PACK( 0, 1, -2,0),
-  OC_DCT_CW_PACK( 0, 1, -3,0),
-  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
-  OC_DCT_CW_PACK( 0, 6, +1,0),
-  OC_DCT_CW_PACK( 0, 7, +1,0),
-  OC_DCT_CW_PACK( 0, 8, +1,0),
-  OC_DCT_CW_PACK( 0, 9, +1,0),
-  OC_DCT_CW_PACK( 0, 6, -1,0),
-  OC_DCT_CW_PACK( 0, 7, -1,0),
-  OC_DCT_CW_PACK( 0, 8, -1,0),
-  OC_DCT_CW_PACK( 0, 9, -1,0),
-  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
-  OC_DCT_CW_PACK( 0, 2, +2,0),
-  OC_DCT_CW_PACK( 0, 3, +2,0),
-  OC_DCT_CW_PACK( 0, 2, +3,0),
-  OC_DCT_CW_PACK( 0, 3, +3,0),
-  OC_DCT_CW_PACK( 0, 2, -2,0),
-  OC_DCT_CW_PACK( 0, 3, -2,0),
-  OC_DCT_CW_PACK( 0, 2, -3,0),
-  OC_DCT_CW_PACK( 0, 3, -3,0),
-  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
-    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
-  OC_DCT_CW_PACK( 0, 0,  0,1),
-  OC_DCT_CW_PACK( 0, 1,  0,0),
-  OC_DCT_CW_PACK( 0, 2,  0,0),
-  OC_DCT_CW_PACK( 0, 3,  0,0),
-  OC_DCT_CW_PACK( 0, 4,  0,0),
-  OC_DCT_CW_PACK( 0, 5,  0,0),
-  OC_DCT_CW_PACK( 0, 6,  0,0),
-  OC_DCT_CW_PACK( 0, 7,  0,0),
-  /*OC_ONE_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 0, 0, +1,0),
-  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 0, 0, -1,0),
-  /*OC_TWO_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 0, 0, +2,0),
-  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
-  OC_DCT_CW_PACK( 0, 0, -2,0),
-  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
-  OC_DCT_CW_PACK( 0, 0, +3,0),
-  OC_DCT_CW_PACK( 0, 0, -3,0),
-  OC_DCT_CW_PACK( 0, 0, +4,0),
-  OC_DCT_CW_PACK( 0, 0, -4,0),
-  OC_DCT_CW_PACK( 0, 0, +5,0),
-  OC_DCT_CW_PACK( 0, 0, -5,0),
-  OC_DCT_CW_PACK( 0, 0, +6,0),
-  OC_DCT_CW_PACK( 0, 0, -6,0),
-  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
-  OC_DCT_CW_PACK( 0, 0, +7,0),
-  OC_DCT_CW_PACK( 0, 0, +8,0),
-  OC_DCT_CW_PACK( 0, 0, -7,0),
-  OC_DCT_CW_PACK( 0, 0, -8,0),
-  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
-  OC_DCT_CW_PACK( 0, 0, +9,0),
-  OC_DCT_CW_PACK( 0, 0,+10,0),
-  OC_DCT_CW_PACK( 0, 0,+11,0),
-  OC_DCT_CW_PACK( 0, 0,+12,0),
-  OC_DCT_CW_PACK( 0, 0, -9,0),
-  OC_DCT_CW_PACK( 0, 0,-10,0),
-  OC_DCT_CW_PACK( 0, 0,-11,0),
-  OC_DCT_CW_PACK( 0, 0,-12,0),
-  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
-  OC_DCT_CW_PACK( 8, 0,  0,0),
-  OC_DCT_CW_PACK( 9, 0,  0,0),
-  OC_DCT_CW_PACK(10, 0,  0,0),
-  OC_DCT_CW_PACK(11, 0,  0,0),
-  OC_DCT_CW_PACK(12, 0,  0,0),
-  OC_DCT_CW_PACK(13, 0,  0,0),
-  OC_DCT_CW_PACK(14, 0,  0,0),
-  OC_DCT_CW_PACK(15, 0,  0,0),
-  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
-  OC_DCT_CW_PACK( 4, 0,  0,0),
-  OC_DCT_CW_PACK( 5, 0,  0,0),
-  OC_DCT_CW_PACK( 6, 0,  0,0),
-  OC_DCT_CW_PACK( 7, 0,  0,0),
-};
-
-
-
-static int oc_sb_run_unpack(oc_pack_buf *_opb){
-  /*Coding scheme:
-       Codeword            Run Length
-     0                       1
-     10x                     2-3
-     110x                    4-5
-     1110xx                  6-9
-     11110xxx                10-17
-     111110xxxx              18-33
-     111111xxxxxxxxxxxx      34-4129*/
-  static const ogg_int16_t OC_SB_RUN_TREE[22]={
-    4,
-     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
-     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
-     -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
-     -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
-      2,
-       -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
-  };
-  int ret;
-  ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
-  if(ret>=0x10){
-    int offs;
-    offs=ret&0x1F;
-    ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
-  }
-  return ret;
-}
-
-static int oc_block_run_unpack(oc_pack_buf *_opb){
-  /*Coding scheme:
-     Codeword             Run Length
-     0x                      1-2
-     10x                     3-4
-     110x                    5-6
-     1110xx                  7-10
-     11110xx                 11-14
-     11111xxxx               15-30*/
-  static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
-    5,
-     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
-     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
-     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
-     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
-     -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
-     -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
-     -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
-     33,       36,       39,       44,
-      1,-(1<<8|7),-(1<<8|8),
-      1,-(1<<8|9),-(1<<8|10),
-      2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
-      4,
-       -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
-       -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
-       -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
-       -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
-  };
-  return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
-}
-
-
-
-void oc_dec_accel_init_c(oc_dec_ctx *_dec){
-# if defined(OC_DEC_USE_VTABLE)
-  _dec->opt_vtable.dc_unpredict_mcu_plane=
-   oc_dec_dc_unpredict_mcu_plane_c;
-# endif
-}
-
-static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
- const th_setup_info *_setup){
-  int qti;
-  int pli;
-  int qi;
-  int ret;
-  ret=oc_state_init(&_dec->state,_info,3);
-  if(ret<0)return ret;
-  ret=oc_huff_trees_copy(_dec->huff_tables,
-   (const ogg_int16_t *const *)_setup->huff_tables);
-  if(ret<0){
-    oc_state_clear(&_dec->state);
-    return ret;
-  }
-  /*For each fragment, allocate one byte for every DCT coefficient token, plus
-     one byte for extra-bits for each token, plus one more byte for the long
-     EOB run, just in case it's the very last token and has a run length of
-     one.*/
-  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
-   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
-  if(_dec->dct_tokens==NULL){
-    oc_huff_trees_clear(_dec->huff_tables);
-    oc_state_clear(&_dec->state);
-    return TH_EFAULT;
-  }
-  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
-    _dec->state.dequant_tables[qi][pli][qti]=
-     _dec->state.dequant_table_data[qi][pli][qti];
-  }
-  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
-   &_setup->qinfo);
-  for(qi=0;qi<64;qi++){
-    int qsum;
-    qsum=0;
-    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
-      qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
-       _dec->state.dequant_tables[qi][pli][qti][17]+
-       _dec->state.dequant_tables[qi][pli][qti][18]+
-       _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
-    }
-    _dec->pp_sharp_mod[qi]=-(qsum>>11);
-  }
-  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
-   sizeof(_dec->state.loop_filter_limits));
-  oc_dec_accel_init(_dec);
-  _dec->pp_level=OC_PP_LEVEL_DISABLED;
-  _dec->dc_qis=NULL;
-  _dec->variances=NULL;
-  _dec->pp_frame_data=NULL;
-  _dec->stripe_cb.ctx=NULL;
-  _dec->stripe_cb.stripe_decoded=NULL;
-#if defined(HAVE_CAIRO)
-  _dec->telemetry=0;
-  _dec->telemetry_bits=0;
-  _dec->telemetry_qi=0;
-  _dec->telemetry_mbmode=0;
-  _dec->telemetry_mv=0;
-  _dec->telemetry_frame_data=NULL;
-#endif
-  return 0;
-}
-
-static void oc_dec_clear(oc_dec_ctx *_dec){
-#if defined(HAVE_CAIRO)
-  _ogg_free(_dec->telemetry_frame_data);
-#endif
-  _ogg_free(_dec->pp_frame_data);
-  _ogg_free(_dec->variances);
-  _ogg_free(_dec->dc_qis);
-  _ogg_free(_dec->dct_tokens);
-  oc_huff_trees_clear(_dec->huff_tables);
-  oc_state_clear(&_dec->state);
-}
-
-
-static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
-  long val;
-  /*Check to make sure this is a data packet.*/
-  val=oc_pack_read1(&_dec->opb);
-  if(val!=0)return TH_EBADPACKET;
-  /*Read in the frame type (I or P).*/
-  val=oc_pack_read1(&_dec->opb);
-  _dec->state.frame_type=(int)val;
-  /*Read in the qi list.*/
-  val=oc_pack_read(&_dec->opb,6);
-  _dec->state.qis[0]=(unsigned char)val;
-  val=oc_pack_read1(&_dec->opb);
-  if(!val)_dec->state.nqis=1;
-  else{
-    val=oc_pack_read(&_dec->opb,6);
-    _dec->state.qis[1]=(unsigned char)val;
-    val=oc_pack_read1(&_dec->opb);
-    if(!val)_dec->state.nqis=2;
-    else{
-      val=oc_pack_read(&_dec->opb,6);
-      _dec->state.qis[2]=(unsigned char)val;
-      _dec->state.nqis=3;
-    }
-  }
-  if(_dec->state.frame_type==OC_INTRA_FRAME){
-    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
-      Most of the other unused bits in the VP3 headers were eliminated.
-      I don't know why these remain.*/
-    /*I wanted to eliminate wasted bits, but not all config wiggle room
-       --Monty.*/
-    val=oc_pack_read(&_dec->opb,3);
-    if(val!=0)return TH_EIMPL;
-  }
-  return 0;
-}
-
-/*Mark all fragments as coded and in OC_MODE_INTRA.
-  This also builds up the coded fragment list (in coded order), and clears the
-   uncoded fragment list.
-  It does not update the coded macro block list nor the super block flags, as
-   those are not used when decoding INTRA frames.*/
-static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
-  const oc_sb_map   *sb_maps;
-  const oc_sb_flags *sb_flags;
-  oc_fragment       *frags;
-  ptrdiff_t         *coded_fragis;
-  ptrdiff_t          ncoded_fragis;
-  ptrdiff_t          prev_ncoded_fragis;
-  unsigned           nsbs;
-  unsigned           sbi;
-  int                pli;
-  coded_fragis=_dec->state.coded_fragis;
-  prev_ncoded_fragis=ncoded_fragis=0;
-  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
-  sb_flags=_dec->state.sb_flags;
-  frags=_dec->state.frags;
-  sbi=nsbs=0;
-  for(pli=0;pli<3;pli++){
-    nsbs+=_dec->state.fplanes[pli].nsbs;
-    for(;sbi<nsbs;sbi++){
-      int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++){
-          ptrdiff_t fragi;
-          fragi=sb_maps[sbi][quadi][bi];
-          if(fragi>=0){
-            frags[fragi].coded=1;
-            frags[fragi].refi=OC_FRAME_SELF;
-            frags[fragi].mb_mode=OC_MODE_INTRA;
-            coded_fragis[ncoded_fragis++]=fragi;
-          }
-        }
-      }
-    }
-    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-  }
-  _dec->state.ntotal_coded_fragis=ncoded_fragis;
-}
-
-/*Decodes the bit flags indicating whether each super block is partially coded
-   or not.
-  Return: The number of partially coded super blocks.*/
-static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb_flags *sb_flags;
-  unsigned     nsbs;
-  unsigned     sbi;
-  unsigned     npartial;
-  unsigned     run_count;
-  long         val;
-  int          flag;
-  val=oc_pack_read1(&_dec->opb);
-  flag=(int)val;
-  sb_flags=_dec->state.sb_flags;
-  nsbs=_dec->state.nsbs;
-  sbi=npartial=0;
-  while(sbi<nsbs){
-    int full_run;
-    run_count=oc_sb_run_unpack(&_dec->opb);
-    full_run=run_count>=4129;
-    do{
-      sb_flags[sbi].coded_partially=flag;
-      sb_flags[sbi].coded_fully=0;
-      npartial+=flag;
-      sbi++;
-    }
-    while(--run_count>0&&sbi<nsbs);
-    if(full_run&&sbi<nsbs){
-      val=oc_pack_read1(&_dec->opb);
-      flag=(int)val;
-    }
-    else flag=!flag;
-  }
-  /*TODO: run_count should be 0 here.
-    If it's not, we should issue a warning of some kind.*/
-  return npartial;
-}
-
-/*Decodes the bit flags for whether or not each non-partially-coded super
-   block is fully coded or not.
-  This function should only be called if there is at least one
-   non-partially-coded super block.
-  Return: The number of partially coded super blocks.*/
-static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb_flags *sb_flags;
-  unsigned     nsbs;
-  unsigned     sbi;
-  unsigned     run_count;
-  long         val;
-  int          flag;
-  sb_flags=_dec->state.sb_flags;
-  nsbs=_dec->state.nsbs;
-  /*Skip partially coded super blocks.*/
-  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
-  val=oc_pack_read1(&_dec->opb);
-  flag=(int)val;
-  do{
-    int full_run;
-    run_count=oc_sb_run_unpack(&_dec->opb);
-    full_run=run_count>=4129;
-    for(;sbi<nsbs;sbi++){
-      if(sb_flags[sbi].coded_partially)continue;
-      if(run_count--<=0)break;
-      sb_flags[sbi].coded_fully=flag;
-    }
-    if(full_run&&sbi<nsbs){
-      val=oc_pack_read1(&_dec->opb);
-      flag=(int)val;
-    }
-    else flag=!flag;
-  }
-  while(sbi<nsbs);
-  /*TODO: run_count should be 0 here.
-    If it's not, we should issue a warning of some kind.*/
-}
-
-static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
-  const oc_sb_map   *sb_maps;
-  const oc_sb_flags *sb_flags;
-  signed char       *mb_modes;
-  oc_fragment       *frags;
-  unsigned           nsbs;
-  unsigned           sbi;
-  unsigned           npartial;
-  long               val;
-  int                pli;
-  int                flag;
-  int                run_count;
-  ptrdiff_t         *coded_fragis;
-  ptrdiff_t         *uncoded_fragis;
-  ptrdiff_t          ncoded_fragis;
-  ptrdiff_t          nuncoded_fragis;
-  ptrdiff_t          prev_ncoded_fragis;
-  npartial=oc_dec_partial_sb_flags_unpack(_dec);
-  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
-  if(npartial>0){
-    val=oc_pack_read1(&_dec->opb);
-    flag=!(int)val;
-  }
-  else flag=0;
-  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
-  sb_flags=_dec->state.sb_flags;
-  mb_modes=_dec->state.mb_modes;
-  frags=_dec->state.frags;
-  sbi=nsbs=run_count=0;
-  coded_fragis=_dec->state.coded_fragis;
-  uncoded_fragis=coded_fragis+_dec->state.nfrags;
-  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
-  for(pli=0;pli<3;pli++){
-    nsbs+=_dec->state.fplanes[pli].nsbs;
-    for(;sbi<nsbs;sbi++){
-      int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
-        int quad_coded;
-        int bi;
-        quad_coded=0;
-        for(bi=0;bi<4;bi++){
-          ptrdiff_t fragi;
-          fragi=sb_maps[sbi][quadi][bi];
-          if(fragi>=0){
-            int coded;
-            if(sb_flags[sbi].coded_fully)coded=1;
-            else if(!sb_flags[sbi].coded_partially)coded=0;
-            else{
-              if(run_count<=0){
-                run_count=oc_block_run_unpack(&_dec->opb);
-                flag=!flag;
-              }
-              run_count--;
-              coded=flag;
-            }
-            if(coded)coded_fragis[ncoded_fragis++]=fragi;
-            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
-            quad_coded|=coded;
-            frags[fragi].coded=coded;
-            frags[fragi].refi=OC_FRAME_NONE;
-          }
-        }
-        /*Remember if there's a coded luma block in this macro block.*/
-        if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
-      }
-    }
-    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-  }
-  _dec->state.ntotal_coded_fragis=ncoded_fragis;
-  /*TODO: run_count should be 0 here.
-    If it's not, we should issue a warning of some kind.*/
-}
-
-
-/*Coding scheme:
-   Codeword            Mode Index
-   0                       0
-   10                      1
-   110                     2
-   1110                    3
-   11110                   4
-   111110                  5
-   1111110                 6
-   1111111                 7*/
-static const ogg_int16_t OC_VLC_MODE_TREE[26]={
-  4,
-   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
-   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
-   -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
-   -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
-    3,
-     -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
-     -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
-};
-
-static const ogg_int16_t OC_CLC_MODE_TREE[9]={
-  3,
-   -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
-   -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
-};
-
-/*Unpacks the list of macro block modes for INTER frames.*/
-static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
-  signed char         *mb_modes;
-  const unsigned char *alphabet;
-  unsigned char        scheme0_alphabet[8];
-  const ogg_int16_t   *mode_tree;
-  size_t               nmbs;
-  size_t               mbi;
-  long                 val;
-  int                  mode_scheme;
-  val=oc_pack_read(&_dec->opb,3);
-  mode_scheme=(int)val;
-  if(mode_scheme==0){
-    int mi;
-    /*Just in case, initialize the modes to something.
-      If the bitstream doesn't contain each index exactly once, it's likely
-       corrupt and the rest of the packet is garbage anyway, but this way we
-       won't crash, and we'll decode SOMETHING.*/
-    /*LOOP VECTORIZES*/
-    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
-    for(mi=0;mi<OC_NMODES;mi++){
-      val=oc_pack_read(&_dec->opb,3);
-      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
-    }
-    alphabet=scheme0_alphabet;
-  }
-  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
-  mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
-  mb_modes=_dec->state.mb_modes;
-  nmbs=_dec->state.nmbs;
-  for(mbi=0;mbi<nmbs;mbi++){
-    if(mb_modes[mbi]>0){
-      /*We have a coded luma block; decode a mode.*/
-      mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
-    }
-    /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
-       fact that OC_MODE_INTER_NOMV is already 0.*/
-  }
-}
-
-
-
-static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
-  5,
-   -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
-   -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
-   -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
-   -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
-   -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
-   33,          36,          39,          42,
-   45,          50,          55,          60,
-   65,          74,          83,          92,
-    1,-(1<<8|32+4),-(1<<8|32-4),
-    1,-(1<<8|32+5),-(1<<8|32-5),
-    1,-(1<<8|32+6),-(1<<8|32-6),
-    1,-(1<<8|32+7),-(1<<8|32-7),
-    2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
-    2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
-    2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
-    2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
-    3,
-     -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
-     -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
-    3,
-     -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
-     -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
-    3,
-     -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
-     -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
-    3,
-     -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
-     -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
-};
-
-static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
-  6,
-   -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
-   -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
-   -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
-   -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
-   -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
-   -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
-   -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
-   -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
-   -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
-   -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
-   -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
-   -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
-   -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
-   -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
-   -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
-   -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
-};
-
-
-static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
-  int dx;
-  int dy;
-  dx=oc_huff_token_decode(_opb,_tree)-32;
-  dy=oc_huff_token_decode(_opb,_tree)-32;
-  return OC_MV(dx,dy);
-}
-
-/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
-   block modes and motion vectors to the individual fragments.*/
-static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
-  const oc_mb_map        *mb_maps;
-  const signed char      *mb_modes;
-  oc_set_chroma_mvs_func  set_chroma_mvs;
-  const ogg_int16_t      *mv_comp_tree;
-  oc_fragment            *frags;
-  oc_mv                  *frag_mvs;
-  const unsigned char    *map_idxs;
-  int                     map_nidxs;
-  oc_mv                   last_mv;
-  oc_mv                   prior_mv;
-  oc_mv                   cbmvs[4];
-  size_t                  nmbs;
-  size_t                  mbi;
-  long                    val;
-  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
-  val=oc_pack_read1(&_dec->opb);
-  mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
-  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
-  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
-  prior_mv=last_mv=0;
-  frags=_dec->state.frags;
-  frag_mvs=_dec->state.frag_mvs;
-  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
-  mb_modes=_dec->state.mb_modes;
-  nmbs=_dec->state.nmbs;
-  for(mbi=0;mbi<nmbs;mbi++){
-    int mb_mode;
-    mb_mode=mb_modes[mbi];
-    if(mb_mode!=OC_MODE_INVALID){
-      oc_mv     mbmv;
-      ptrdiff_t fragi;
-      int       mapi;
-      int       mapii;
-      int       refi;
-      if(mb_mode==OC_MODE_INTER_MV_FOUR){
-        oc_mv lbmvs[4];
-        int   bi;
-        prior_mv=last_mv;
-        for(bi=0;bi<4;bi++){
-          fragi=mb_maps[mbi][0][bi];
-          if(frags[fragi].coded){
-            frags[fragi].refi=OC_FRAME_PREV;
-            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
-            lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
-            frag_mvs[fragi]=lbmvs[bi];
-          }
-          else lbmvs[bi]=0;
-        }
-        (*set_chroma_mvs)(cbmvs,lbmvs);
-        for(mapii=4;mapii<map_nidxs;mapii++){
-          mapi=map_idxs[mapii];
-          bi=mapi&3;
-          fragi=mb_maps[mbi][mapi>>2][bi];
-          if(frags[fragi].coded){
-            frags[fragi].refi=OC_FRAME_PREV;
-            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
-            frag_mvs[fragi]=cbmvs[bi];
-          }
-        }
-      }
-      else{
-        switch(mb_mode){
-          case OC_MODE_INTER_MV:{
-            prior_mv=last_mv;
-            last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
-          }break;
-          case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
-          case OC_MODE_INTER_MV_LAST2:{
-            mbmv=prior_mv;
-            prior_mv=last_mv;
-            last_mv=mbmv;
-          }break;
-          case OC_MODE_GOLDEN_MV:{
-            mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
-          }break;
-          default:mbmv=0;break;
-        }
-        /*Fill in the MVs for the fragments.*/
-        refi=OC_FRAME_FOR_MODE(mb_mode);
-        mapii=0;
-        do{
-          mapi=map_idxs[mapii];
-          fragi=mb_maps[mbi][mapi>>2][mapi&3];
-          if(frags[fragi].coded){
-            frags[fragi].refi=refi;
-            frags[fragi].mb_mode=mb_mode;
-            frag_mvs[fragi]=mbmv;
-          }
-        }
-        while(++mapii<map_nidxs);
-      }
-    }
-  }
-}
-
-static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
-  oc_fragment     *frags;
-  const ptrdiff_t *coded_fragis;
-  ptrdiff_t        ncoded_fragis;
-  ptrdiff_t        fragii;
-  ptrdiff_t        fragi;
-  ncoded_fragis=_dec->state.ntotal_coded_fragis;
-  if(ncoded_fragis<=0)return;
-  frags=_dec->state.frags;
-  coded_fragis=_dec->state.coded_fragis;
-  if(_dec->state.nqis==1){
-    /*If this frame has only a single qi value, then just use it for all coded
-       fragments.*/
-    for(fragii=0;fragii<ncoded_fragis;fragii++){
-      frags[coded_fragis[fragii]].qii=0;
-    }
-  }
-  else{
-    long val;
-    int  flag;
-    int  nqi1;
-    int  run_count;
-    /*Otherwise, we decode a qi index for each fragment, using two passes of
-      the same binary RLE scheme used for super-block coded bits.
-     The first pass marks each fragment as having a qii of 0 or greater than
-      0, and the second pass (if necessary), distinguishes between a qii of
-      1 and 2.
-     At first we just store the qii in the fragment.
-     After all the qii's are decoded, we make a final pass to replace them
-      with the corresponding qi's for this frame.*/
-    val=oc_pack_read1(&_dec->opb);
-    flag=(int)val;
-    nqi1=0;
-    fragii=0;
-    while(fragii<ncoded_fragis){
-      int full_run;
-      run_count=oc_sb_run_unpack(&_dec->opb);
-      full_run=run_count>=4129;
-      do{
-        frags[coded_fragis[fragii++]].qii=flag;
-        nqi1+=flag;
-      }
-      while(--run_count>0&&fragii<ncoded_fragis);
-      if(full_run&&fragii<ncoded_fragis){
-        val=oc_pack_read1(&_dec->opb);
-        flag=(int)val;
-      }
-      else flag=!flag;
-    }
-    /*TODO: run_count should be 0 here.
-      If it's not, we should issue a warning of some kind.*/
-    /*If we have 3 different qi's for this frame, and there was at least one
-       fragment with a non-zero qi, make the second pass.*/
-    if(_dec->state.nqis==3&&nqi1>0){
-      /*Skip qii==0 fragments.*/
-      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
-      val=oc_pack_read1(&_dec->opb);
-      flag=(int)val;
-      do{
-        int full_run;
-        run_count=oc_sb_run_unpack(&_dec->opb);
-        full_run=run_count>=4129;
-        for(;fragii<ncoded_fragis;fragii++){
-          fragi=coded_fragis[fragii];
-          if(frags[fragi].qii==0)continue;
-          if(run_count--<=0)break;
-          frags[fragi].qii+=flag;
-        }
-        if(full_run&&fragii<ncoded_fragis){
-          val=oc_pack_read1(&_dec->opb);
-          flag=(int)val;
-        }
-        else flag=!flag;
-      }
-      while(fragii<ncoded_fragis);
-      /*TODO: run_count should be 0 here.
-        If it's not, we should issue a warning of some kind.*/
-    }
-  }
-}
-
-
-
-/*Unpacks the DC coefficient tokens.
-  Unlike when unpacking the AC coefficient tokens, we actually need to decode
-   the DC coefficient values now so that we can do DC prediction.
-  _huff_idx:   The index of the Huffman table to use for each color plane.
-  _ntoks_left: The number of tokens left to be decoded in each color plane for
-                each coefficient.
-               This is updated as EOB tokens and zero run tokens are decoded.
-  Return: The length of any outstanding EOB run.*/
-static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
- ptrdiff_t _ntoks_left[3][64]){
-  unsigned char   *dct_tokens;
-  oc_fragment     *frags;
-  const ptrdiff_t *coded_fragis;
-  ptrdiff_t        ncoded_fragis;
-  ptrdiff_t        fragii;
-  ptrdiff_t        eobs;
-  ptrdiff_t        ti;
-  int              pli;
-  dct_tokens=_dec->dct_tokens;
-  frags=_dec->state.frags;
-  coded_fragis=_dec->state.coded_fragis;
-  ncoded_fragis=fragii=eobs=ti=0;
-  for(pli=0;pli<3;pli++){
-    ptrdiff_t run_counts[64];
-    ptrdiff_t eob_count;
-    ptrdiff_t eobi;
-    int       rli;
-    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
-    memset(run_counts,0,sizeof(run_counts));
-    _dec->eob_runs[pli][0]=eobs;
-    _dec->ti0[pli][0]=ti;
-    /*Continue any previous EOB run, if there was one.*/
-    eobi=eobs;
-    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
-    eob_count=eobi;
-    eobs-=eobi;
-    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
-    while(fragii<ncoded_fragis){
-      int token;
-      int cw;
-      int eb;
-      int skip;
-      token=oc_huff_token_decode(&_dec->opb,
-       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
-      dct_tokens[ti++]=(unsigned char)token;
-      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
-        eb=(int)oc_pack_read(&_dec->opb,
-         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
-        dct_tokens[ti++]=(unsigned char)eb;
-        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
-        eb<<=OC_DCT_TOKEN_EB_POS(token);
-      }
-      else eb=0;
-      cw=OC_DCT_CODE_WORD[token]+eb;
-      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
-      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
-      if(eobs){
-        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
-        eob_count+=eobi;
-        eobs-=eobi;
-        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
-      }
-      else{
-        int coeff;
-        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
-        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
-        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
-        if(skip)coeff=0;
-        run_counts[skip]++;
-        frags[coded_fragis[fragii++]].dc=coeff;
-      }
-    }
-    /*Add the total EOB count to the longest run length.*/
-    run_counts[63]+=eob_count;
-    /*And convert the run_counts array to a moment table.*/
-    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
-    /*Finally, subtract off the number of coefficients that have been
-       accounted for by runs started in this coefficient.*/
-    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
-  }
-  _dec->dct_tokens_count=ti;
-  return eobs;
-}
-
-/*Unpacks the AC coefficient tokens.
-  This can completely discard coefficient values while unpacking, and so is
-   somewhat simpler than unpacking the DC coefficient tokens.
-  _huff_idx:   The index of the Huffman table to use for each color plane.
-  _ntoks_left: The number of tokens left to be decoded in each color plane for
-                each coefficient.
-               This is updated as EOB tokens and zero run tokens are decoded.
-  _eobs:       The length of any outstanding EOB run from previous
-                coefficients.
-  Return: The length of any outstanding EOB run.*/
-static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
- ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
-  unsigned char *dct_tokens;
-  ptrdiff_t      ti;
-  int            pli;
-  dct_tokens=_dec->dct_tokens;
-  ti=_dec->dct_tokens_count;
-  for(pli=0;pli<3;pli++){
-    ptrdiff_t run_counts[64];
-    ptrdiff_t eob_count;
-    size_t    ntoks_left;
-    size_t    ntoks;
-    int       rli;
-    _dec->eob_runs[pli][_zzi]=_eobs;
-    _dec->ti0[pli][_zzi]=ti;
-    ntoks_left=_ntoks_left[pli][_zzi];
-    memset(run_counts,0,sizeof(run_counts));
-    eob_count=0;
-    ntoks=0;
-    while(ntoks+_eobs<ntoks_left){
-      int token;
-      int cw;
-      int eb;
-      int skip;
-      ntoks+=_eobs;
-      eob_count+=_eobs;
-      token=oc_huff_token_decode(&_dec->opb,
-       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
-      dct_tokens[ti++]=(unsigned char)token;
-      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
-        eb=(int)oc_pack_read(&_dec->opb,
-         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
-        dct_tokens[ti++]=(unsigned char)eb;
-        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
-        eb<<=OC_DCT_TOKEN_EB_POS(token);
-      }
-      else eb=0;
-      cw=OC_DCT_CODE_WORD[token]+eb;
-      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
-      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
-      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
-      if(_eobs==0){
-        run_counts[skip]++;
-        ntoks++;
-      }
-    }
-    /*Add the portion of the last EOB run actually used by this coefficient.*/
-    eob_count+=ntoks_left-ntoks;
-    /*And remove it from the remaining EOB count.*/
-    _eobs-=ntoks_left-ntoks;
-    /*Add the total EOB count to the longest run length.*/
-    run_counts[63]+=eob_count;
-    /*And convert the run_counts array to a moment table.*/
-    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
-    /*Finally, subtract off the number of coefficients that have been
-       accounted for by runs started in this coefficient.*/
-    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
-  }
-  _dec->dct_tokens_count=ti;
-  return _eobs;
-}
-
-/*Tokens describing the DCT coefficients that belong to each fragment are
-   stored in the bitstream grouped by coefficient, not by fragment.
-
-  This means that we either decode all the tokens in order, building up a
-   separate coefficient list for each fragment as we go, and then go back and
-   do the iDCT on each fragment, or we have to create separate lists of tokens
-   for each coefficient, so that we can pull the next token required off the
-   head of the appropriate list when decoding a specific fragment.
-
-  The former was VP3's choice, and it meant 2*w*h extra storage for all the
-   decoded coefficient values.
-
-  We take the second option, which lets us store just one to three bytes per
-   token (generally far fewer than the number of coefficients, due to EOB
-   tokens and zero runs), and which requires us to only maintain a counter for
-   each of the 64 coefficients, instead of a counter for every fragment to
-   determine where the next token goes.
-
-  We actually use 3 counters per coefficient, one for each color plane, so we
-   can decode all color planes simultaneously.
-  This lets color conversion, etc., be done as soon as a full MCU (one or
-   two super block rows) is decoded, while the image data is still in cache.*/
-
-static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
-  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
-  ptrdiff_t  ntoks_left[3][64];
-  int        huff_idxs[2];
-  ptrdiff_t  eobs;
-  long       val;
-  int        pli;
-  int        zzi;
-  int        hgi;
-  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
-    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
-  }
-  val=oc_pack_read(&_dec->opb,4);
-  huff_idxs[0]=(int)val;
-  val=oc_pack_read(&_dec->opb,4);
-  huff_idxs[1]=(int)val;
-  _dec->eob_runs[0][0]=0;
-  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
-#if defined(HAVE_CAIRO)
-  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
-#endif
-  val=oc_pack_read(&_dec->opb,4);
-  huff_idxs[0]=(int)val;
-  val=oc_pack_read(&_dec->opb,4);
-  huff_idxs[1]=(int)val;
-  zzi=1;
-  for(hgi=1;hgi<5;hgi++){
-    huff_idxs[0]+=16;
-    huff_idxs[1]+=16;
-    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
-      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
-    }
-  }
-  /*TODO: eobs should be exactly zero, or 4096 or greater.
-    The second case occurs when an EOB run of size zero is encountered, which
-     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
-    If neither of these conditions holds, then a warning should be issued.*/
-}
-
-
-static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
-  /*pp_level 0: disabled; free any memory used and return*/
-  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
-    if(_dec->dc_qis!=NULL){
-      _ogg_free(_dec->dc_qis);
-      _dec->dc_qis=NULL;
-      _ogg_free(_dec->variances);
-      _dec->variances=NULL;
-      _ogg_free(_dec->pp_frame_data);
-      _dec->pp_frame_data=NULL;
-    }
-    return 1;
-  }
-  if(_dec->dc_qis==NULL){
-    /*If we haven't been tracking DC quantization indices, there's no point in
-       starting now.*/
-    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
-    _dec->dc_qis=(unsigned char *)_ogg_malloc(
-     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
-    if(_dec->dc_qis==NULL)return 1;
-    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
-  }
-  else{
-    unsigned char   *dc_qis;
-    const ptrdiff_t *coded_fragis;
-    ptrdiff_t        ncoded_fragis;
-    ptrdiff_t        fragii;
-    unsigned char    qi0;
-    /*Update the DC quantization index of each coded block.*/
-    dc_qis=_dec->dc_qis;
-    coded_fragis=_dec->state.coded_fragis;
-    ncoded_fragis=_dec->state.ncoded_fragis[0]+
-     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
-    qi0=(unsigned char)_dec->state.qis[0];
-    for(fragii=0;fragii<ncoded_fragis;fragii++){
-      dc_qis[coded_fragis[fragii]]=qi0;
-    }
-  }
-  /*pp_level 1: Stop after updating DC quantization indices.*/
-  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
-    if(_dec->variances!=NULL){
-      _ogg_free(_dec->variances);
-      _dec->variances=NULL;
-      _ogg_free(_dec->pp_frame_data);
-      _dec->pp_frame_data=NULL;
-    }
-    return 1;
-  }
-  if(_dec->variances==NULL){
-    size_t frame_sz;
-    size_t c_sz;
-    int    c_w;
-    int    c_h;
-    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
-    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
-    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
-    c_sz=c_w*(size_t)c_h;
-    /*Allocate space for the chroma planes, even if we're not going to use
-       them; this simplifies allocation state management, though it may waste
-       memory on the few systems that don't overcommit pages.*/
-    frame_sz+=c_sz<<1;
-    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
-     frame_sz*sizeof(_dec->pp_frame_data[0]));
-    _dec->variances=(int *)_ogg_malloc(
-     _dec->state.nfrags*sizeof(_dec->variances[0]));
-    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
-      _ogg_free(_dec->pp_frame_data);
-      _dec->pp_frame_data=NULL;
-      _ogg_free(_dec->variances);
-      _dec->variances=NULL;
-      return 1;
-    }
-    /*Force an update of the PP buffer pointers.*/
-    _dec->pp_frame_state=0;
-  }
-  /*Update the PP buffer pointers if necessary.*/
-  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
-    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
-      /*If chroma processing is disabled, just use the PP luma plane.*/
-      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
-      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
-      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
-      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
-       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
-    }
-    else{
-      size_t y_sz;
-      size_t c_sz;
-      int    c_w;
-      int    c_h;
-      /*Otherwise, set up pointers to all three PP planes.*/
-      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
-      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
-      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
-      c_sz=c_w*(size_t)c_h;
-      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
-      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
-      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
-      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
-      _dec->pp_frame_buf[1].width=c_w;
-      _dec->pp_frame_buf[1].height=c_h;
-      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
-      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
-      _dec->pp_frame_buf[2].width=c_w;
-      _dec->pp_frame_buf[2].height=c_h;
-      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
-      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
-      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
-    }
-    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
-  }
-  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
-  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
-    memcpy(_dec->pp_frame_buf+1,
-     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
-     sizeof(_dec->pp_frame_buf[1])*2);
-  }
-  return 0;
-}
-
-
-/*Initialize the main decoding pipeline.*/
-static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe){
-  const ptrdiff_t *coded_fragis;
-  const ptrdiff_t *uncoded_fragis;
-  int              flimit;
-  int              pli;
-  int              qii;
-  int              qti;
-  int              zzi;
-  /*If chroma is sub-sampled in the vertical direction, we have to decode two
-     super block rows of Y' for each super block row of Cb and Cr.*/
-  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
-  /*Initialize the token and extra bits indices for each plane and
-     coefficient.*/
-  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
-  /*Also copy over the initial the EOB run counts.*/
-  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
-  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
-  coded_fragis=_dec->state.coded_fragis;
-  uncoded_fragis=coded_fragis+_dec->state.nfrags;
-  for(pli=0;pli<3;pli++){
-    ptrdiff_t ncoded_fragis;
-    _pipe->coded_fragis[pli]=coded_fragis;
-    _pipe->uncoded_fragis[pli]=uncoded_fragis;
-    ncoded_fragis=_dec->state.ncoded_fragis[pli];
-    coded_fragis+=ncoded_fragis;
-    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
-  }
-  /*Set up condensed quantizer tables.*/
-  for(pli=0;pli<3;pli++){
-    for(qii=0;qii<_dec->state.nqis;qii++){
-      for(qti=0;qti<2;qti++){
-        _pipe->dequant[pli][qii][qti]=
-         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
-      }
-    }
-  }
-  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
-  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
-  /*Initialize the bounding value array for the loop filter.*/
-  flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
-  _pipe->loop_filter=flimit!=0;
-  if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
-  /*Initialize any buffers needed for post-processing.
-    We also save the current post-processing level, to guard against the user
-     changing it from a callback.*/
-  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
-  /*If we don't have enough information to post-process, disable it, regardless
-     of the user-requested level.*/
-  else{
-    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
-    memcpy(_dec->pp_frame_buf,
-     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
-     sizeof(_dec->pp_frame_buf[0])*3);
-  }
-  /*Clear down the DCT coefficient buffer for the first block.*/
-  for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
-}
-
-/*Undo the DC prediction in a single plane of an MCU (one or two super block
-   rows).
-  As a side effect, the number of coded and uncoded fragments in this plane of
-   the MCU is also computed.*/
-void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli){
-  const oc_fragment_plane *fplane;
-  oc_fragment             *frags;
-  int                     *pred_last;
-  ptrdiff_t                ncoded_fragis;
-  ptrdiff_t                fragi;
-  int                      fragx;
-  int                      fragy;
-  int                      fragy0;
-  int                      fragy_end;
-  int                      nhfrags;
-  /*Compute the first and last fragment row of the current MCU for this
-     plane.*/
-  fplane=_dec->state.fplanes+_pli;
-  fragy0=_pipe->fragy0[_pli];
-  fragy_end=_pipe->fragy_end[_pli];
-  nhfrags=fplane->nhfrags;
-  pred_last=_pipe->pred_last[_pli];
-  frags=_dec->state.frags;
-  ncoded_fragis=0;
-  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
-  for(fragy=fragy0;fragy<fragy_end;fragy++){
-    if(fragy==0){
-      /*For the first row, all of the cases reduce to just using the previous
-         predictor for the same reference frame.*/
-      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
-        if(frags[fragi].coded){
-          int refi;
-          refi=frags[fragi].refi;
-          pred_last[refi]=frags[fragi].dc+=pred_last[refi];
-          ncoded_fragis++;
-        }
-      }
-    }
-    else{
-      oc_fragment *u_frags;
-      int          l_ref;
-      int          ul_ref;
-      int          u_ref;
-      u_frags=frags-nhfrags;
-      l_ref=-1;
-      ul_ref=-1;
-      u_ref=u_frags[fragi].refi;
-      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
-        int ur_ref;
-        if(fragx+1>=nhfrags)ur_ref=-1;
-        else ur_ref=u_frags[fragi+1].refi;
-        if(frags[fragi].coded){
-          int pred;
-          int refi;
-          refi=frags[fragi].refi;
-          /*We break out a separate case based on which of our neighbors use
-             the same reference frames.
-            This is somewhat faster than trying to make a generic case which
-             handles all of them, since it reduces lots of poorly predicted
-             jumps to one switch statement, and also lets a number of the
-             multiplications be optimized out by strength reduction.*/
-          switch((l_ref==refi)|(ul_ref==refi)<<1|
-           (u_ref==refi)<<2|(ur_ref==refi)<<3){
-            default:pred=pred_last[refi];break;
-            case  1:
-            case  3:pred=frags[fragi-1].dc;break;
-            case  2:pred=u_frags[fragi-1].dc;break;
-            case  4:
-            case  6:
-            case 12:pred=u_frags[fragi].dc;break;
-            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
-            case  8:pred=u_frags[fragi+1].dc;break;
-            case  9:
-            case 11:
-            case 13:{
-              /*The TI compiler mis-compiles this line.*/
-              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
-            }break;
-            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
-            case 14:{
-              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
-               +10*u_frags[fragi].dc)/16;
-            }break;
-            case  7:
-            case 15:{
-              int p0;
-              int p1;
-              int p2;
-              p0=frags[fragi-1].dc;
-              p1=u_frags[fragi-1].dc;
-              p2=u_frags[fragi].dc;
-              pred=(29*(p0+p2)-26*p1)/32;
-              if(abs(pred-p2)>128)pred=p2;
-              else if(abs(pred-p0)>128)pred=p0;
-              else if(abs(pred-p1)>128)pred=p1;
-            }break;
-          }
-          pred_last[refi]=frags[fragi].dc+=pred;
-          ncoded_fragis++;
-          l_ref=refi;
-        }
-        else l_ref=-1;
-        ul_ref=u_ref;
-        u_ref=ur_ref;
-      }
-    }
-  }
-  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
-  /*Also save the number of uncoded fragments so we know how many to copy.*/
-  _pipe->nuncoded_fragis[_pli]=
-   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
-}
-
-/*Reconstructs all coded fragments in a single MCU (one or two super block
-   rows).
-  This requires that each coded fragment have a proper macro block mode and
-   motion vector (if not in INTRA mode), and have its DC value decoded, with
-   the DC prediction process reversed, and the number of coded and uncoded
-   fragments in this plane of the MCU be counted.
-  The token lists for each color plane and coefficient should also be filled
-   in, along with initial token offsets, extra bits offsets, and EOB run
-   counts.*/
-static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli){
-  unsigned char       *dct_tokens;
-  const unsigned char *dct_fzig_zag;
-  ogg_uint16_t         dc_quant[2];
-  const oc_fragment   *frags;
-  const ptrdiff_t     *coded_fragis;
-  ptrdiff_t            ncoded_fragis;
-  ptrdiff_t            fragii;
-  ptrdiff_t           *ti;
-  ptrdiff_t           *eob_runs;
-  int                  qti;
-  dct_tokens=_dec->dct_tokens;
-  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
-  frags=_dec->state.frags;
-  coded_fragis=_pipe->coded_fragis[_pli];
-  ncoded_fragis=_pipe->ncoded_fragis[_pli];
-  ti=_pipe->ti[_pli];
-  eob_runs=_pipe->eob_runs[_pli];
-  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
-  for(fragii=0;fragii<ncoded_fragis;fragii++){
-    const ogg_uint16_t *ac_quant;
-    ptrdiff_t           fragi;
-    int                 last_zzi;
-    int                 zzi;
-    fragi=coded_fragis[fragii];
-    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
-    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
-    /*Decode the AC coefficients.*/
-    for(zzi=0;zzi<64;){
-      int token;
-      last_zzi=zzi;
-      if(eob_runs[zzi]){
-        eob_runs[zzi]--;
-        break;
-      }
-      else{
-        ptrdiff_t eob;
-        int       cw;
-        int       rlen;
-        int       coeff;
-        int       lti;
-        lti=ti[zzi];
-        token=dct_tokens[lti++];
-        cw=OC_DCT_CODE_WORD[token];
-        /*These parts could be done branchless, but the branches are fairly
-           predictable and the C code translates into more than a few
-           instructions, so it's worth it to avoid them.*/
-        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
-          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
-        }
-        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
-        if(token==OC_DCT_TOKEN_FAT_EOB){
-          eob+=dct_tokens[lti++]<<8;
-          if(eob==0)eob=OC_DCT_EOB_FINISH;
-        }
-        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
-        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
-        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
-        eob_runs[zzi]=eob;
-        ti[zzi]=lti;
-        zzi+=rlen;
-        _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
-         (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
-        zzi+=!eob;
-      }
-    }
-    /*TODO: zzi should be exactly 64 here.
-      If it's not, we should report some kind of warning.*/
-    zzi=OC_MINI(zzi,64);
-    _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
-    /*last_zzi is always initialized.
-      If your compiler thinks otherwise, it is dumb.*/
-    oc_state_frag_recon(&_dec->state,fragi,_pli,
-     _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
-  }
-  _pipe->coded_fragis[_pli]+=ncoded_fragis;
-  /*Right now the reconstructed MCU has only the coded blocks in it.*/
-  /*TODO: We make the decision here to always copy the uncoded blocks into it
-     from the reference frame.
-    We could also copy the coded blocks back over the reference frame, if we
-     wait for an additional MCU to be decoded, which might be faster if only a
-     small number of blocks are coded.
-    However, this introduces more latency, creating a larger cache footprint.
-    It's unknown which decision is better, but this one results in simpler
-     code, and the hard case (high bitrate, high resolution) is handled
-     correctly.*/
-  /*Copy the uncoded blocks from the previous reference frame.*/
-  if(_pipe->nuncoded_fragis[_pli]>0){
-    _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
-    oc_frag_copy_list(&_dec->state,
-     _dec->state.ref_frame_data[OC_FRAME_SELF],
-     _dec->state.ref_frame_data[OC_FRAME_PREV],
-     _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
-     _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
-  }
-}
-
-/*Filter a horizontal block edge.*/
-static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
- int *_variance0,int *_variance1){
-  unsigned char       *rdst;
-  const unsigned char *rsrc;
-  unsigned char       *cdst;
-  const unsigned char *csrc;
-  int                  r[10];
-  int                  sum0;
-  int                  sum1;
-  int                  bx;
-  int                  by;
-  rdst=_dst;
-  rsrc=_src;
-  for(bx=0;bx<8;bx++){
-    cdst=rdst;
-    csrc=rsrc;
-    for(by=0;by<10;by++){
-      r[by]=*csrc;
-      csrc+=_src_ystride;
-    }
-    sum0=sum1=0;
-    for(by=0;by<4;by++){
-      sum0+=abs(r[by+1]-r[by]);
-      sum1+=abs(r[by+5]-r[by+6]);
-    }
-    *_variance0+=OC_MINI(255,sum0);
-    *_variance1+=OC_MINI(255,sum1);
-    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
-      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
-      cdst+=_dst_ystride;
-      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
-      cdst+=_dst_ystride;
-      for(by=0;by<4;by++){
-        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
-         r[by+4]+r[by+5]+r[by+6]+4>>3);
-        cdst+=_dst_ystride;
-      }
-      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
-      cdst+=_dst_ystride;
-      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
-    }
-    else{
-      for(by=1;by<=8;by++){
-        *cdst=(unsigned char)r[by];
-        cdst+=_dst_ystride;
-      }
-    }
-    rdst++;
-    rsrc++;
-  }
-}
-
-/*Filter a vertical block edge.*/
-static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
- int _qstep,int _flimit,int *_variances){
-  unsigned char       *rdst;
-  const unsigned char *rsrc;
-  unsigned char       *cdst;
-  int                  r[10];
-  int                  sum0;
-  int                  sum1;
-  int                  bx;
-  int                  by;
-  cdst=_dst;
-  for(by=0;by<8;by++){
-    rsrc=cdst-1;
-    rdst=cdst;
-    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
-    sum0=sum1=0;
-    for(bx=0;bx<4;bx++){
-      sum0+=abs(r[bx+1]-r[bx]);
-      sum1+=abs(r[bx+5]-r[bx+6]);
-    }
-    _variances[0]+=OC_MINI(255,sum0);
-    _variances[1]+=OC_MINI(255,sum1);
-    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
-      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
-      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
-      for(bx=0;bx<4;bx++){
-        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
-         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
-      }
-      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
-      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
-    }
-    cdst+=_dst_ystride;
-  }
-}
-
-static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
- th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
- int _fragy_end){
-  oc_fragment_plane   *fplane;
-  int                 *variance;
-  unsigned char       *dc_qi;
-  unsigned char       *dst;
-  const unsigned char *src;
-  ptrdiff_t            froffset;
-  int                  dst_ystride;
-  int                  src_ystride;
-  int                  nhfrags;
-  int                  width;
-  int                  notstart;
-  int                  notdone;
-  int                  flimit;
-  int                  qstep;
-  int                  y_end;
-  int                  y;
-  int                  x;
-  _dst+=_pli;
-  _src+=_pli;
-  fplane=_dec->state.fplanes+_pli;
-  nhfrags=fplane->nhfrags;
-  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
-  variance=_dec->variances+froffset;
-  dc_qi=_dec->dc_qis+froffset;
-  notstart=_fragy0>0;
-  notdone=_fragy_end<fplane->nvfrags;
-  /*We want to clear an extra row of variances, except at the end.*/
-  memset(variance+(nhfrags&-notstart),0,
-   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
-  /*Except for the first time, we want to point to the middle of the row.*/
-  y=(_fragy0<<3)+(notstart<<2);
-  dst_ystride=_dst->stride;
-  src_ystride=_src->stride;
-  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
-  src=_src->data+y*(ptrdiff_t)src_ystride;
-  width=_dst->width;
-  for(;y<4;y++){
-    memcpy(dst,src,width*sizeof(dst[0]));
-    dst+=dst_ystride;
-    src+=src_ystride;
-  }
-  /*We also want to skip the last row in the frame for this loop.*/
-  y_end=_fragy_end-!notdone<<3;
-  for(;y<y_end;y+=8){
-    qstep=_dec->pp_dc_scale[*dc_qi];
-    flimit=(qstep*3)>>2;
-    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
-     qstep,flimit,variance,variance+nhfrags);
-    variance++;
-    dc_qi++;
-    for(x=8;x<width;x+=8){
-      qstep=_dec->pp_dc_scale[*dc_qi];
-      flimit=(qstep*3)>>2;
-      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
-       qstep,flimit,variance,variance+nhfrags);
-      oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
-       qstep,flimit,variance-1);
-      variance++;
-      dc_qi++;
-    }
-    dst+=dst_ystride<<3;
-    src+=src_ystride<<3;
-  }
-  /*And finally, handle the last row in the frame, if it's in the range.*/
-  if(!notdone){
-    int height;
-    height=_dst->height;
-    for(;y<height;y++){
-      memcpy(dst,src,width*sizeof(dst[0]));
-      dst+=dst_ystride;
-      src+=src_ystride;
-    }
-    /*Filter the last row of vertical block edges.*/
-    dc_qi++;
-    for(x=8;x<width;x+=8){
-      qstep=_dec->pp_dc_scale[*dc_qi++];
-      flimit=(qstep*3)>>2;
-      oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
-       qstep,flimit,variance++);
-    }
-  }
-}
-
-static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
- int _dc_scale,int _sharp_mod,int _strong){
-  static const unsigned char OC_MOD_MAX[2]={24,32};
-  static const unsigned char OC_MOD_SHIFT[2]={1,0};
-  const unsigned char *psrc;
-  const unsigned char *src;
-  const unsigned char *nsrc;
-  unsigned char       *dst;
-  int                  vmod[72];
-  int                  hmod[72];
-  int                  mod_hi;
-  int                  by;
-  int                  bx;
-  mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
-  dst=_idata;
-  src=dst;
-  psrc=src-(_ystride&-!(_b&4));
-  for(by=0;by<9;by++){
-    for(bx=0;bx<8;bx++){
-      int mod;
-      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
-      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
-    }
-    psrc=src;
-    src+=_ystride&-(!(_b&8)|by<7);
-  }
-  nsrc=dst;
-  psrc=dst-!(_b&1);
-  for(bx=0;bx<9;bx++){
-    src=nsrc;
-    for(by=0;by<8;by++){
-      int mod;
-      mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
-      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
-      psrc+=_ystride;
-      src+=_ystride;
-    }
-    psrc=nsrc;
-    nsrc+=!(_b&2)|bx<7;
-  }
-  src=dst;
-  psrc=src-(_ystride&-!(_b&4));
-  nsrc=src+_ystride;
-  for(by=0;by<8;by++){
-    int a;
-    int b;
-    int w;
-    a=128;
-    b=64;
-    w=hmod[by];
-    a-=w;
-    b+=w**(src-!(_b&1));
-    w=vmod[by<<3];
-    a-=w;
-    b+=w*psrc[0];
-    w=vmod[by+1<<3];
-    a-=w;
-    b+=w*nsrc[0];
-    w=hmod[(1<<3)+by];
-    a-=w;
-    b+=w*src[1];
-    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
-    for(bx=1;bx<7;bx++){
-      a=128;
-      b=64;
-      w=hmod[(bx<<3)+by];
-      a-=w;
-      b+=w*src[bx-1];
-      w=vmod[(by<<3)+bx];
-      a-=w;
-      b+=w*psrc[bx];
-      w=vmod[(by+1<<3)+bx];
-      a-=w;
-      b+=w*nsrc[bx];
-      w=hmod[(bx+1<<3)+by];
-      a-=w;
-      b+=w*src[bx+1];
-      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
-    }
-    a=128;
-    b=64;
-    w=hmod[(7<<3)+by];
-    a-=w;
-    b+=w*src[6];
-    w=vmod[(by<<3)+7];
-    a-=w;
-    b+=w*psrc[7];
-    w=vmod[(by+1<<3)+7];
-    a-=w;
-    b+=w*nsrc[7];
-    w=hmod[(8<<3)+by];
-    a-=w;
-    b+=w*src[7+!(_b&2)];
-    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
-    dst+=_ystride;
-    psrc=src;
-    src=nsrc;
-    nsrc+=_ystride&-(!(_b&8)|by<6);
-  }
-}
-
-#define OC_DERING_THRESH1 (384)
-#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
-#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
-#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
-
-static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
- int _pli,int _fragy0,int _fragy_end){
-  th_img_plane      *iplane;
-  oc_fragment_plane *fplane;
-  oc_fragment       *frag;
-  int               *variance;
-  unsigned char     *idata;
-  ptrdiff_t          froffset;
-  int                ystride;
-  int                nhfrags;
-  int                sthresh;
-  int                strong;
-  int                y_end;
-  int                width;
-  int                height;
-  int                y;
-  int                x;
-  iplane=_img+_pli;
-  fplane=_dec->state.fplanes+_pli;
-  nhfrags=fplane->nhfrags;
-  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
-  variance=_dec->variances+froffset;
-  frag=_dec->state.frags+froffset;
-  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
-  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
-  y=_fragy0<<3;
-  ystride=iplane->stride;
-  idata=iplane->data+y*(ptrdiff_t)ystride;
-  y_end=_fragy_end<<3;
-  width=iplane->width;
-  height=iplane->height;
-  for(;y<y_end;y+=8){
-    for(x=0;x<width;x+=8){
-      int b;
-      int qi;
-      int var;
-      qi=_dec->state.qis[frag->qii];
-      var=*variance;
-      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
-      if(strong&&var>sthresh){
-        oc_dering_block(idata+x,ystride,b,
-         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
-         !(b&2)&&variance[1]>OC_DERING_THRESH4||
-         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
-         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
-          oc_dering_block(idata+x,ystride,b,
-           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-          oc_dering_block(idata+x,ystride,b,
-           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-        }
-      }
-      else if(var>OC_DERING_THRESH2){
-        oc_dering_block(idata+x,ystride,b,
-         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-      }
-      else if(var>OC_DERING_THRESH1){
-        oc_dering_block(idata+x,ystride,b,
-         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
-      }
-      frag++;
-      variance++;
-    }
-    idata+=ystride<<3;
-  }
-}
-
-
-
-th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
-  oc_dec_ctx *dec;
-  if(_info==NULL||_setup==NULL)return NULL;
-  dec=oc_aligned_malloc(sizeof(*dec),16);
-  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
-    oc_aligned_free(dec);
-    return NULL;
-  }
-  dec->state.curframe_num=0;
-  return dec;
-}
-
-void th_decode_free(th_dec_ctx *_dec){
-  if(_dec!=NULL){
-    oc_dec_clear(_dec);
-    oc_aligned_free(_dec);
-  }
-}
-
-int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
- size_t _buf_sz){
-  switch(_req){
-  case TH_DECCTL_GET_PPLEVEL_MAX:{
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    (*(int *)_buf)=OC_PP_LEVEL_MAX;
-    return 0;
-  }break;
-  case TH_DECCTL_SET_PPLEVEL:{
-    int pp_level;
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    pp_level=*(int *)_buf;
-    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
-    _dec->pp_level=pp_level;
-    return 0;
-  }break;
-  case TH_DECCTL_SET_GRANPOS:{
-    ogg_int64_t granpos;
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
-    granpos=*(ogg_int64_t *)_buf;
-    if(granpos<0)return TH_EINVAL;
-    _dec->state.granpos=granpos;
-    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
-     -_dec->state.granpos_bias;
-    _dec->state.curframe_num=_dec->state.keyframe_num
-     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
-    return 0;
-  }break;
-  case TH_DECCTL_SET_STRIPE_CB:{
-    th_stripe_callback *cb;
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
-    cb=(th_stripe_callback *)_buf;
-    _dec->stripe_cb.ctx=cb->ctx;
-    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
-    return 0;
-  }break;
-#ifdef HAVE_CAIRO
-  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    _dec->telemetry=1;
-    _dec->telemetry_mbmode=*(int *)_buf;
-    return 0;
-  }break;
-  case TH_DECCTL_SET_TELEMETRY_MV:{
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    _dec->telemetry=1;
-    _dec->telemetry_mv=*(int *)_buf;
-    return 0;
-  }break;
-  case TH_DECCTL_SET_TELEMETRY_QI:{
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    _dec->telemetry=1;
-    _dec->telemetry_qi=*(int *)_buf;
-    return 0;
-  }break;
-  case TH_DECCTL_SET_TELEMETRY_BITS:{
-    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-    if(_buf_sz!=sizeof(int))return TH_EINVAL;
-    _dec->telemetry=1;
-    _dec->telemetry_bits=*(int *)_buf;
-    return 0;
-  }break;
-#endif
-  default:return TH_EIMPL;
-  }
-}
-
-/*We're decoding an INTER frame, but have no initialized reference
-   buffers (i.e., decoding did not start on a key frame).
-  We initialize them to a solid gray here.*/
-static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
-  th_info   *info;
-  size_t     yplane_sz;
-  size_t     cplane_sz;
-  ptrdiff_t  yoffset;
-  int        yhstride;
-  int        yheight;
-  int        chstride;
-  int        cheight;
-  _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
-  _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
-  _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
-  _dec->state.ref_frame_data[OC_FRAME_GOLD]=
-   _dec->state.ref_frame_data[OC_FRAME_PREV]=
-   _dec->state.ref_frame_data[OC_FRAME_SELF]=
-   _dec->state.ref_frame_bufs[0][0].data;
-  memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
-   sizeof(_dec->pp_frame_buf[0])*3);
-  info=&_dec->state.info;
-  yhstride=abs(_dec->state.ref_ystride[0]);
-  yheight=info->frame_height+2*OC_UMV_PADDING;
-  chstride=abs(_dec->state.ref_ystride[1]);
-  cheight=yheight>>!(info->pixel_fmt&2);
-  yplane_sz=yhstride*(size_t)yheight+16;
-  cplane_sz=chstride*(size_t)cheight;
-  yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
-  memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
-}
-
-int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
- ogg_int64_t *_granpos){
-  int ret;
-  if(_dec==NULL||_op==NULL)return TH_EFAULT;
-  /*A completely empty packet indicates a dropped frame and is treated exactly
-     like an inter frame with no coded blocks.*/
-  if(_op->bytes==0){
-    _dec->state.frame_type=OC_INTER_FRAME;
-    _dec->state.ntotal_coded_fragis=0;
-  }
-  else{
-    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
-    ret=oc_dec_frame_header_unpack(_dec);
-    if(ret<0)return ret;
-    if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
-    else oc_dec_coded_flags_unpack(_dec);
-  }
-  /*If there have been no reference frames, and we need one, initialize one.*/
-  if(_dec->state.frame_type!=OC_INTRA_FRAME&&
-   (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
-   _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
-    oc_dec_init_dummy_frame(_dec);
-  }
-  /*If this was an inter frame with no coded blocks...*/
-  if(_dec->state.ntotal_coded_fragis<=0){
-    /*Just update the granule position and return.*/
-    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
-     _dec->state.info.keyframe_granule_shift)
-     +(_dec->state.curframe_num-_dec->state.keyframe_num);
-    _dec->state.curframe_num++;
-    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
-    return TH_DUPFRAME;
-  }
-  else{
-    th_ycbcr_buffer stripe_buf;
-    int             stripe_fragy;
-    int             refi;
-    int             pli;
-    int             notstart;
-    int             notdone;
-    /*Select a free buffer to use for the reconstructed version of this frame.*/
-    for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
-     refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
-    _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
-    _dec->state.ref_frame_data[OC_FRAME_SELF]=
-     _dec->state.ref_frame_bufs[refi][0].data;
-#if defined(HAVE_CAIRO)
-    _dec->telemetry_frame_bytes=_op->bytes;
-#endif
-    if(_dec->state.frame_type==OC_INTRA_FRAME){
-      _dec->state.keyframe_num=_dec->state.curframe_num;
-#if defined(HAVE_CAIRO)
-      _dec->telemetry_coding_bytes=
-       _dec->telemetry_mode_bytes=
-       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
-#endif
-    }
-    else{
-#if defined(HAVE_CAIRO)
-      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
-#endif
-      oc_dec_mb_modes_unpack(_dec);
-#if defined(HAVE_CAIRO)
-      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
-#endif
-      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
-#if defined(HAVE_CAIRO)
-      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
-#endif
-    }
-    oc_dec_block_qis_unpack(_dec);
-#if defined(HAVE_CAIRO)
-    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
-#endif
-    oc_dec_residual_tokens_unpack(_dec);
-    /*Update granule position.
-      This must be done before the striped decode callbacks so that the
-       application knows what to do with the frame data.*/
-    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
-     _dec->state.info.keyframe_granule_shift)
-     +(_dec->state.curframe_num-_dec->state.keyframe_num);
-    _dec->state.curframe_num++;
-    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
-    /*All of the rest of the operations -- DC prediction reversal,
-       reconstructing coded fragments, copying uncoded fragments, loop
-       filtering, extending borders, and out-of-loop post-processing -- should
-       be pipelined.
-      I.e., DC prediction reversal, reconstruction, and uncoded fragment
-       copying are done for one or two super block rows, then loop filtering is
-       run as far as it can, then bordering copying, then post-processing.
-      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
-       block rows, and one chroma.
-      Otherwise, an MCU consists of one super block row from each plane.
-      Inside each MCU, we perform all of the steps on one color plane before
-       moving on to the next.
-      After reconstruction, the additional filtering stages introduce a delay
-       since they need some pixels from the next fragment row.
-      Thus the actual number of decoded rows available is slightly smaller for
-       the first MCU, and slightly larger for the last.
-
-      This entire process allows us to operate on the data while it is still in
-       cache, resulting in big performance improvements.
-      An application callback allows further application processing (blitting
-       to video memory, color conversion, etc.) to also use the data while it's
-       in cache.*/
-    oc_dec_pipeline_init(_dec,&_dec->pipe);
-    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
-    notstart=0;
-    notdone=1;
-    for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
-      int avail_fragy0;
-      int avail_fragy_end;
-      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
-      notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
-      for(pli=0;pli<3;pli++){
-        oc_fragment_plane *fplane;
-        int                frag_shift;
-        int                pp_offset;
-        int                sdelay;
-        int                edelay;
-        fplane=_dec->state.fplanes+pli;
-        /*Compute the first and last fragment row of the current MCU for this
-           plane.*/
-        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
-        _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
-        _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
-         _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
-        oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
-        oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
-        sdelay=edelay=0;
-        if(_dec->pipe.loop_filter){
-          sdelay+=notstart;
-          edelay+=notdone;
-          oc_state_loop_filter_frag_rows(&_dec->state,
-           _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
-           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
-        }
-        /*To fill the borders, we have an additional two pixel delay, since a
-           fragment in the next row could filter its top edge, using two pixels
-           from a fragment in this row.
-          But there's no reason to delay a full fragment between the two.*/
-        oc_state_borders_fill_rows(&_dec->state,refi,pli,
-         (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
-         (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
-        /*Out-of-loop post-processing.*/
-        pp_offset=3*(pli!=0);
-        if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
-          /*Perform de-blocking in one plane.*/
-          sdelay+=notstart;
-          edelay+=notdone;
-          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
-           _dec->state.ref_frame_bufs[refi],pli,
-           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
-          if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
-            /*Perform de-ringing in one plane.*/
-            sdelay+=notstart;
-            edelay+=notdone;
-            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
-             _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
-          }
-        }
-        /*If no post-processing is done, we still need to delay a row for the
-           loop filter, thanks to the strange filtering order VP3 chose.*/
-        else if(_dec->pipe.loop_filter){
-          sdelay+=notstart;
-          edelay+=notdone;
-        }
-        /*Compute the intersection of the available rows in all planes.
-          If chroma is sub-sampled, the effect of each of its delays is
-           doubled, but luma might have more post-processing filters enabled
-           than chroma, so we don't know up front which one is the limiting
-           factor.*/
-        avail_fragy0=OC_MINI(avail_fragy0,
-         _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
-        avail_fragy_end=OC_MINI(avail_fragy_end,
-         _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
-      }
-      if(_dec->stripe_cb.stripe_decoded!=NULL){
-        /*The callback might want to use the FPU, so let's make sure they can.
-          We violate all kinds of ABI restrictions by not doing this until
-           now, but none of them actually matter since we don't use floating
-           point ourselves.*/
-        oc_restore_fpu(&_dec->state);
-        /*Make the callback, ensuring we flip the sense of the "start" and
-           "end" of the available region upside down.*/
-        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
-         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
-         _dec->state.fplanes[0].nvfrags-avail_fragy0);
-      }
-      notstart=1;
-    }
-    /*Finish filling in the reference frame borders.*/
-    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
-    /*Update the reference frame indices.*/
-    if(_dec->state.frame_type==OC_INTRA_FRAME){
-      /*The new frame becomes both the previous and gold reference frames.*/
-      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
-       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
-       _dec->state.ref_frame_idx[OC_FRAME_SELF];
-      _dec->state.ref_frame_data[OC_FRAME_GOLD]=
-       _dec->state.ref_frame_data[OC_FRAME_PREV]=
-       _dec->state.ref_frame_data[OC_FRAME_SELF];
-    }
-    else{
-      /*Otherwise, just replace the previous reference frame.*/
-      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
-       _dec->state.ref_frame_idx[OC_FRAME_SELF];
-      _dec->state.ref_frame_data[OC_FRAME_PREV]=
-       _dec->state.ref_frame_data[OC_FRAME_SELF];
-    }
-    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
-       gamma values, if nothing else).*/
-    oc_restore_fpu(&_dec->state);
-#if defined(OC_DUMP_IMAGES)
-    /*We only dump images if there were some coded blocks.*/
-    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
-#endif
-    return 0;
-  }
-}
-
-int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
-  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
-  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
-#if defined(HAVE_CAIRO)
-  /*If telemetry ioctls are active, we need to draw to the output buffer.
-    Stuff the plane into cairo.*/
-  if(_dec->telemetry){
-    cairo_surface_t *cs;
-    unsigned char   *data;
-    unsigned char   *y_row;
-    unsigned char   *u_row;
-    unsigned char   *v_row;
-    unsigned char   *rgb_row;
-    int              cstride;
-    int              w;
-    int              h;
-    int              x;
-    int              y;
-    int              hdec;
-    int              vdec;
-    w=_ycbcr[0].width;
-    h=_ycbcr[0].height;
-    hdec=!(_dec->state.info.pixel_fmt&1);
-    vdec=!(_dec->state.info.pixel_fmt&2);
-    /*Lazy data buffer init.
-      We could try to re-use the post-processing buffer, which would save
-       memory, but complicate the allocation logic there.
-      I don't think anyone cares about memory usage when using telemetry; it is
-       not meant for embedded devices.*/
-    if(_dec->telemetry_frame_data==NULL){
-      _dec->telemetry_frame_data=_ogg_malloc(
-       (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
-      if(_dec->telemetry_frame_data==NULL)return 0;
-    }
-    cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
-    /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
-    data=cairo_image_surface_get_data(cs);
-    if(data==NULL){
-      cairo_surface_destroy(cs);
-      return 0;
-    }
-    cstride=cairo_image_surface_get_stride(cs);
-    y_row=_ycbcr[0].data;
-    u_row=_ycbcr[1].data;
-    v_row=_ycbcr[2].data;
-    rgb_row=data;
-    for(y=0;y<h;y++){
-      for(x=0;x<w;x++){
-        int r;
-        int g;
-        int b;
-        r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
-        g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
-         -2672387*v_row[x>>hdec]+447306710)/3287200;
-        b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
-        rgb_row[4*x+0]=OC_CLAMP255(b);
-        rgb_row[4*x+1]=OC_CLAMP255(g);
-        rgb_row[4*x+2]=OC_CLAMP255(r);
-      }
-      y_row+=_ycbcr[0].stride;
-      u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
-      v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
-      rgb_row+=cstride;
-    }
-    /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
-    {
-      cairo_t           *c;
-      const oc_fragment *frags;
-      oc_mv             *frag_mvs;
-      const signed char *mb_modes;
-      oc_mb_map         *mb_maps;
-      size_t             nmbs;
-      size_t             mbi;
-      int                row2;
-      int                col2;
-      int                qim[3]={0,0,0};
-      if(_dec->state.nqis==2){
-        int bqi;
-        bqi=_dec->state.qis[0];
-        if(_dec->state.qis[1]>bqi)qim[1]=1;
-        if(_dec->state.qis[1]<bqi)qim[1]=-1;
-      }
-      if(_dec->state.nqis==3){
-        int bqi;
-        int cqi;
-        int dqi;
-        bqi=_dec->state.qis[0];
-        cqi=_dec->state.qis[1];
-        dqi=_dec->state.qis[2];
-        if(cqi>bqi&&dqi>bqi){
-          if(dqi>cqi){
-            qim[1]=1;
-            qim[2]=2;
-          }
-          else{
-            qim[1]=2;
-            qim[2]=1;
-          }
-        }
-        else if(cqi<bqi&&dqi<bqi){
-          if(dqi<cqi){
-            qim[1]=-1;
-            qim[2]=-2;
-          }
-          else{
-            qim[1]=-2;
-            qim[2]=-1;
-          }
-        }
-        else{
-          if(cqi<bqi)qim[1]=-1;
-          else qim[1]=1;
-          if(dqi<bqi)qim[2]=-1;
-          else qim[2]=1;
-        }
-      }
-      c=cairo_create(cs);
-      frags=_dec->state.frags;
-      frag_mvs=_dec->state.frag_mvs;
-      mb_modes=_dec->state.mb_modes;
-      mb_maps=_dec->state.mb_maps;
-      nmbs=_dec->state.nmbs;
-      row2=0;
-      col2=0;
-      for(mbi=0;mbi<nmbs;mbi++){
-        float x;
-        float y;
-        int   bi;
-        y=h-(row2+((col2+1>>1)&1))*16-16;
-        x=(col2>>1)*16;
-        cairo_set_line_width(c,1.);
-        /*Keyframe (all intra) red box.*/
-        if(_dec->state.frame_type==OC_INTRA_FRAME){
-          if(_dec->telemetry_mbmode&0x02){
-            cairo_set_source_rgba(c,1.,0,0,.5);
-            cairo_rectangle(c,x+2.5,y+2.5,11,11);
-            cairo_stroke_preserve(c);
-            cairo_set_source_rgba(c,1.,0,0,.25);
-            cairo_fill(c);
-          }
-        }
-        else{
-          ptrdiff_t fragi;
-          int       frag_mvx;
-          int       frag_mvy;
-          for(bi=0;bi<4;bi++){
-            fragi=mb_maps[mbi][0][bi];
-            if(fragi>=0&&frags[fragi].coded){
-              frag_mvx=OC_MV_X(frag_mvs[fragi]);
-              frag_mvy=OC_MV_Y(frag_mvs[fragi]);
-              break;
-            }
-          }
-          if(bi<4){
-            switch(mb_modes[mbi]){
-              case OC_MODE_INTRA:{
-                if(_dec->telemetry_mbmode&0x02){
-                  cairo_set_source_rgba(c,1.,0,0,.5);
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_stroke_preserve(c);
-                  cairo_set_source_rgba(c,1.,0,0,.25);
-                  cairo_fill(c);
-                }
-              }break;
-              case OC_MODE_INTER_NOMV:{
-                if(_dec->telemetry_mbmode&0x01){
-                  cairo_set_source_rgba(c,0,0,1.,.5);
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_stroke_preserve(c);
-                  cairo_set_source_rgba(c,0,0,1.,.25);
-                  cairo_fill(c);
-                }
-              }break;
-              case OC_MODE_INTER_MV:{
-                if(_dec->telemetry_mbmode&0x04){
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_set_source_rgba(c,0,1.,0,.5);
-                  cairo_stroke(c);
-                }
-                if(_dec->telemetry_mv&0x04){
-                  cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+8,y+8);
-                  cairo_stroke(c);
-                }
-              }break;
-              case OC_MODE_INTER_MV_LAST:{
-                if(_dec->telemetry_mbmode&0x08){
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_set_source_rgba(c,0,1.,0,.5);
-                  cairo_move_to(c,x+13.5,y+2.5);
-                  cairo_line_to(c,x+2.5,y+8);
-                  cairo_line_to(c,x+13.5,y+13.5);
-                  cairo_stroke(c);
-                }
-                if(_dec->telemetry_mv&0x08){
-                  cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+8,y+8);
-                  cairo_stroke(c);
-                }
-              }break;
-              case OC_MODE_INTER_MV_LAST2:{
-                if(_dec->telemetry_mbmode&0x10){
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_set_source_rgba(c,0,1.,0,.5);
-                  cairo_move_to(c,x+8,y+2.5);
-                  cairo_line_to(c,x+2.5,y+8);
-                  cairo_line_to(c,x+8,y+13.5);
-                  cairo_move_to(c,x+13.5,y+2.5);
-                  cairo_line_to(c,x+8,y+8);
-                  cairo_line_to(c,x+13.5,y+13.5);
-                  cairo_stroke(c);
-                }
-                if(_dec->telemetry_mv&0x10){
-                  cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+8,y+8);
-                  cairo_stroke(c);
-                }
-              }break;
-              case OC_MODE_GOLDEN_NOMV:{
-                if(_dec->telemetry_mbmode&0x20){
-                  cairo_set_source_rgba(c,1.,1.,0,.5);
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_stroke_preserve(c);
-                  cairo_set_source_rgba(c,1.,1.,0,.25);
-                  cairo_fill(c);
-                }
-              }break;
-              case OC_MODE_GOLDEN_MV:{
-                if(_dec->telemetry_mbmode&0x40){
-                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
-                  cairo_set_source_rgba(c,1.,1.,0,.5);
-                  cairo_stroke(c);
-                }
-                if(_dec->telemetry_mv&0x40){
-                  cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+8,y+8);
-                  cairo_stroke(c);
-                }
-              }break;
-              case OC_MODE_INTER_MV_FOUR:{
-                if(_dec->telemetry_mbmode&0x80){
-                  cairo_rectangle(c,x+2.5,y+2.5,4,4);
-                  cairo_rectangle(c,x+9.5,y+2.5,4,4);
-                  cairo_rectangle(c,x+2.5,y+9.5,4,4);
-                  cairo_rectangle(c,x+9.5,y+9.5,4,4);
-                  cairo_set_source_rgba(c,0,1.,0,.5);
-                  cairo_stroke(c);
-                }
-                /*4mv is odd, coded in raster order.*/
-                fragi=mb_maps[mbi][0][0];
-                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mvx=OC_MV_X(frag_mvs[fragi]);
-                  frag_mvx=OC_MV_Y(frag_mvs[fragi]);
-                  cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+4,y+12);
-                  cairo_stroke(c);
-                }
-                fragi=mb_maps[mbi][0][1];
-                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mvx=OC_MV_X(frag_mvs[fragi]);
-                  frag_mvx=OC_MV_Y(frag_mvs[fragi]);
-                  cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+12,y+12);
-                  cairo_stroke(c);
-                }
-                fragi=mb_maps[mbi][0][2];
-                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mvx=OC_MV_X(frag_mvs[fragi]);
-                  frag_mvx=OC_MV_Y(frag_mvs[fragi]);
-                  cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+4,y+4);
-                  cairo_stroke(c);
-                }
-                fragi=mb_maps[mbi][0][3];
-                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mvx=OC_MV_X(frag_mvs[fragi]);
-                  frag_mvx=OC_MV_Y(frag_mvs[fragi]);
-                  cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
-                  cairo_set_source_rgba(c,1.,1.,1.,.9);
-                  cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
-                  cairo_stroke_preserve(c);
-                  cairo_set_line_width(c,1.);
-                  cairo_line_to(c,x+12,y+4);
-                  cairo_stroke(c);
-                }
-              }break;
-            }
-          }
-        }
-        /*qii illustration.*/
-        if(_dec->telemetry_qi&0x2){
-          cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
-          for(bi=0;bi<4;bi++){
-            ptrdiff_t fragi;
-            int       qiv;
-            int       xp;
-            int       yp;
-            xp=x+(bi&1)*8;
-            yp=y+8-(bi&2)*4;
-            fragi=mb_maps[mbi][0][bi];
-            if(fragi>=0&&frags[fragi].coded){
-              qiv=qim[frags[fragi].qii];
-              cairo_set_line_width(c,3.);
-              cairo_set_source_rgba(c,0.,0.,0.,.5);
-              switch(qiv){
-                /*Double plus:*/
-                case 2:{
-                  if((bi&1)^((bi&2)>>1)){
-                    cairo_move_to(c,xp+2.5,yp+1.5);
-                    cairo_line_to(c,xp+2.5,yp+3.5);
-                    cairo_move_to(c,xp+1.5,yp+2.5);
-                    cairo_line_to(c,xp+3.5,yp+2.5);
-                    cairo_move_to(c,xp+5.5,yp+4.5);
-                    cairo_line_to(c,xp+5.5,yp+6.5);
-                    cairo_move_to(c,xp+4.5,yp+5.5);
-                    cairo_line_to(c,xp+6.5,yp+5.5);
-                    cairo_stroke_preserve(c);
-                    cairo_set_source_rgba(c,0.,1.,1.,1.);
-                  }
-                  else{
-                    cairo_move_to(c,xp+5.5,yp+1.5);
-                    cairo_line_to(c,xp+5.5,yp+3.5);
-                    cairo_move_to(c,xp+4.5,yp+2.5);
-                    cairo_line_to(c,xp+6.5,yp+2.5);
-                    cairo_move_to(c,xp+2.5,yp+4.5);
-                    cairo_line_to(c,xp+2.5,yp+6.5);
-                    cairo_move_to(c,xp+1.5,yp+5.5);
-                    cairo_line_to(c,xp+3.5,yp+5.5);
-                    cairo_stroke_preserve(c);
-                    cairo_set_source_rgba(c,0.,1.,1.,1.);
-                  }
-                }break;
-                /*Double minus:*/
-                case -2:{
-                  cairo_move_to(c,xp+2.5,yp+2.5);
-                  cairo_line_to(c,xp+5.5,yp+2.5);
-                  cairo_move_to(c,xp+2.5,yp+5.5);
-                  cairo_line_to(c,xp+5.5,yp+5.5);
-                  cairo_stroke_preserve(c);
-                  cairo_set_source_rgba(c,1.,1.,1.,1.);
-                }break;
-                /*Plus:*/
-                case 1:{
-                  if(bi&2==0)yp-=2;
-                  if(bi&1==0)xp-=2;
-                  cairo_move_to(c,xp+4.5,yp+2.5);
-                  cairo_line_to(c,xp+4.5,yp+6.5);
-                  cairo_move_to(c,xp+2.5,yp+4.5);
-                  cairo_line_to(c,xp+6.5,yp+4.5);
-                  cairo_stroke_preserve(c);
-                  cairo_set_source_rgba(c,.1,1.,.3,1.);
-                  break;
-                }
-                /*Fall through.*/
-                /*Minus:*/
-                case -1:{
-                  cairo_move_to(c,xp+2.5,yp+4.5);
-                  cairo_line_to(c,xp+6.5,yp+4.5);
-                  cairo_stroke_preserve(c);
-                  cairo_set_source_rgba(c,1.,.3,.1,1.);
-                }break;
-                default:continue;
-              }
-              cairo_set_line_width(c,1.);
-              cairo_stroke(c);
-            }
-          }
-        }
-        col2++;
-        if((col2>>1)>=_dec->state.nhmbs){
-          col2=0;
-          row2+=2;
-        }
-      }
-      /*Bit usage indicator[s]:*/
-      if(_dec->telemetry_bits){
-        int widths[6];
-        int fpsn;
-        int fpsd;
-        int mult;
-        int fullw;
-        int padw;
-        int i;
-        fpsn=_dec->state.info.fps_numerator;
-        fpsd=_dec->state.info.fps_denominator;
-        mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
-        fullw=250.f*h*fpsd*mult/fpsn;
-        padw=w-24;
-        /*Header and coded block bits.*/
-        if(_dec->telemetry_frame_bytes<0||
-         _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
-          _dec->telemetry_frame_bytes=0;
-        }
-        if(_dec->telemetry_coding_bytes<0||
-         _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
-          _dec->telemetry_coding_bytes=0;
-        }
-        if(_dec->telemetry_mode_bytes<0||
-         _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
-          _dec->telemetry_mode_bytes=0;
-        }
-        if(_dec->telemetry_mv_bytes<0||
-         _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
-          _dec->telemetry_mv_bytes=0;
-        }
-        if(_dec->telemetry_qi_bytes<0||
-         _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
-          _dec->telemetry_qi_bytes=0;
-        }
-        if(_dec->telemetry_dc_bytes<0||
-         _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
-          _dec->telemetry_dc_bytes=0;
-        }
-        widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
-        widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
-        widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
-        widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
-        widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
-        widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
-        for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
-        cairo_set_source_rgba(c,.0,.0,.0,.6);
-        cairo_rectangle(c,10,h-33,widths[0]+1,5);
-        cairo_rectangle(c,10,h-29,widths[1]+1,5);
-        cairo_rectangle(c,10,h-25,widths[2]+1,5);
-        cairo_rectangle(c,10,h-21,widths[3]+1,5);
-        cairo_rectangle(c,10,h-17,widths[4]+1,5);
-        cairo_rectangle(c,10,h-13,widths[5]+1,5);
-        cairo_fill(c);
-        cairo_set_source_rgb(c,1,0,0);
-        cairo_rectangle(c,10.5,h-32.5,widths[0],4);
-        cairo_fill(c);
-        cairo_set_source_rgb(c,0,1,0);
-        cairo_rectangle(c,10.5,h-28.5,widths[1],4);
-        cairo_fill(c);
-        cairo_set_source_rgb(c,0,0,1);
-        cairo_rectangle(c,10.5,h-24.5,widths[2],4);
-        cairo_fill(c);
-        cairo_set_source_rgb(c,.6,.4,.0);
-        cairo_rectangle(c,10.5,h-20.5,widths[3],4);
-        cairo_fill(c);
-        cairo_set_source_rgb(c,.3,.3,.3);
-        cairo_rectangle(c,10.5,h-16.5,widths[4],4);
-        cairo_fill(c);
-        cairo_set_source_rgb(c,.5,.5,.8);
-        cairo_rectangle(c,10.5,h-12.5,widths[5],4);
-        cairo_fill(c);
-      }
-      /*Master qi indicator[s]:*/
-      if(_dec->telemetry_qi&0x1){
-        cairo_text_extents_t extents;
-        char                 buffer[10];
-        int                  p;
-        int                  y;
-        p=0;
-        y=h-7.5;
-        if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
-        buffer[p++]=48+_dec->state.qis[0]%10;
-        if(_dec->state.nqis>=2){
-          buffer[p++]=' ';
-          if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
-          buffer[p++]=48+_dec->state.qis[1]%10;
-        }
-        if(_dec->state.nqis==3){
-          buffer[p++]=' ';
-          if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
-          buffer[p++]=48+_dec->state.qis[2]%10;
-        }
-        buffer[p++]='\0';
-        cairo_select_font_face(c,"sans",
-         CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
-        cairo_set_font_size(c,18);
-        cairo_text_extents(c,buffer,&extents);
-        cairo_set_source_rgb(c,1,1,1);
-        cairo_move_to(c,w-extents.x_advance-10,y);
-        cairo_show_text(c,buffer);
-        cairo_set_source_rgb(c,0,0,0);
-        cairo_move_to(c,w-extents.x_advance-10,y);
-        cairo_text_path(c,buffer);
-        cairo_set_line_width(c,.8);
-        cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
-        cairo_stroke(c);
-      }
-      cairo_destroy(c);
-    }
-    /*Out of the Cairo plane into the telemetry YUV buffer.*/
-    _ycbcr[0].data=_dec->telemetry_frame_data;
-    _ycbcr[0].stride=_ycbcr[0].width;
-    _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
-    _ycbcr[1].stride=_ycbcr[1].width;
-    _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
-    _ycbcr[2].stride=_ycbcr[2].width;
-    y_row=_ycbcr[0].data;
-    u_row=_ycbcr[1].data;
-    v_row=_ycbcr[2].data;
-    rgb_row=data;
-    /*This is one of the few places it's worth handling chroma on a
-       case-by-case basis.*/
-    switch(_dec->state.info.pixel_fmt){
-      case TH_PF_420:{
-        for(y=0;y<h;y+=2){
-          unsigned char *y_row2;
-          unsigned char *rgb_row2;
-          y_row2=y_row+_ycbcr[0].stride;
-          rgb_row2=rgb_row+cstride;
-          for(x=0;x<w;x+=2){
-            int y;
-            int u;
-            int v;
-            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
-             +24966*rgb_row[4*x+0]+4207500)/255000;
-            y_row[x]=OC_CLAMP255(y);
-            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
-             +24966*rgb_row[4*x+4]+4207500)/255000;
-            y_row[x+1]=OC_CLAMP255(y);
-            y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
-             +24966*rgb_row2[4*x+0]+4207500)/255000;
-            y_row2[x]=OC_CLAMP255(y);
-            y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
-             +24966*rgb_row2[4*x+4]+4207500)/255000;
-            y_row2[x+1]=OC_CLAMP255(y);
-            u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
-             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
-             -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
-             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
-             +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
-             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
-            v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
-             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
-             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
-              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
-             -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
-              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
-            u_row[x>>1]=OC_CLAMP255(u);
-            v_row[x>>1]=OC_CLAMP255(v);
-          }
-          y_row+=_ycbcr[0].stride<<1;
-          u_row+=_ycbcr[1].stride;
-          v_row+=_ycbcr[2].stride;
-          rgb_row+=cstride<<1;
-        }
-      }break;
-      case TH_PF_422:{
-        for(y=0;y<h;y++){
-          for(x=0;x<w;x+=2){
-            int y;
-            int u;
-            int v;
-            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
-             +24966*rgb_row[4*x+0]+4207500)/255000;
-            y_row[x]=OC_CLAMP255(y);
-            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
-             +24966*rgb_row[4*x+4]+4207500)/255000;
-            y_row[x+1]=OC_CLAMP255(y);
-            u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
-             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
-             +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
-            v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
-             -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
-             -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
-            u_row[x>>1]=OC_CLAMP255(u);
-            v_row[x>>1]=OC_CLAMP255(v);
-          }
-          y_row+=_ycbcr[0].stride;
-          u_row+=_ycbcr[1].stride;
-          v_row+=_ycbcr[2].stride;
-          rgb_row+=cstride;
-        }
-      }break;
-      /*case TH_PF_444:*/
-      default:{
-        for(y=0;y<h;y++){
-          for(x=0;x<w;x++){
-            int y;
-            int u;
-            int v;
-            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
-             +24966*rgb_row[4*x+0]+4207500)/255000;
-            u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
-             +99232*rgb_row[4*x+0]+29032005)/225930;
-            v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
-             -25536*rgb_row[4*x+0]+45940035)/357510;
-            y_row[x]=OC_CLAMP255(y);
-            u_row[x]=OC_CLAMP255(u);
-            v_row[x]=OC_CLAMP255(v);
-          }
-          y_row+=_ycbcr[0].stride;
-          u_row+=_ycbcr[1].stride;
-          v_row+=_ycbcr[2].stride;
-          rgb_row+=cstride;
-        }
-      }break;
-    }
-    /*Finished.
-      Destroy the surface.*/
-    cairo_surface_destroy(cs);
-  }
-#endif
-  return 0;
-}
diff --git a/media/libtheora/lib/dequant.c b/media/libtheora/lib/dequant.c
deleted file mode 100644
index e554872d4..000000000
--- a/media/libtheora/lib/dequant.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: dequant.c 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ogg/ogg.h>
-#include "dequant.h"
-#include "decint.h"
-
-int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){
-  th_quant_base *base_mats;
-  long           val;
-  int            nbase_mats;
-  int            sizes[64];
-  int            indices[64];
-  int            nbits;
-  int            bmi;
-  int            ci;
-  int            qti;
-  int            pli;
-  int            qri;
-  int            qi;
-  int            i;
-  val=oc_pack_read(_opb,3);
-  nbits=(int)val;
-  for(qi=0;qi<64;qi++){
-    val=oc_pack_read(_opb,nbits);
-    _qinfo->loop_filter_limits[qi]=(unsigned char)val;
-  }
-  val=oc_pack_read(_opb,4);
-  nbits=(int)val+1;
-  for(qi=0;qi<64;qi++){
-    val=oc_pack_read(_opb,nbits);
-    _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
-  }
-  val=oc_pack_read(_opb,4);
-  nbits=(int)val+1;
-  for(qi=0;qi<64;qi++){
-    val=oc_pack_read(_opb,nbits);
-    _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
-  }
-  val=oc_pack_read(_opb,9);
-  nbase_mats=(int)val+1;
-  base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
-  if(base_mats==NULL)return TH_EFAULT;
-  for(bmi=0;bmi<nbase_mats;bmi++){
-    for(ci=0;ci<64;ci++){
-      val=oc_pack_read(_opb,8);
-      base_mats[bmi][ci]=(unsigned char)val;
-    }
-  }
-  nbits=oc_ilog(nbase_mats-1);
-  for(i=0;i<6;i++){
-    th_quant_ranges *qranges;
-    th_quant_base   *qrbms;
-    int             *qrsizes;
-    qti=i/3;
-    pli=i%3;
-    qranges=_qinfo->qi_ranges[qti]+pli;
-    if(i>0){
-      val=oc_pack_read1(_opb);
-      if(!val){
-        int qtj;
-        int plj;
-        if(qti>0){
-          val=oc_pack_read1(_opb);
-          if(val){
-            qtj=qti-1;
-            plj=pli;
-          }
-          else{
-            qtj=(i-1)/3;
-            plj=(i-1)%3;
-          }
-        }
-        else{
-          qtj=(i-1)/3;
-          plj=(i-1)%3;
-        }
-        *qranges=*(_qinfo->qi_ranges[qtj]+plj);
-        continue;
-      }
-    }
-    val=oc_pack_read(_opb,nbits);
-    indices[0]=(int)val;
-    for(qi=qri=0;qi<63;){
-      val=oc_pack_read(_opb,oc_ilog(62-qi));
-      sizes[qri]=(int)val+1;
-      qi+=(int)val+1;
-      val=oc_pack_read(_opb,nbits);
-      indices[++qri]=(int)val;
-    }
-    /*Note: The caller is responsible for cleaning up any partially
-       constructed qinfo.*/
-    if(qi>63){
-      _ogg_free(base_mats);
-      return TH_EBADHEADER;
-    }
-    qranges->nranges=qri;
-    qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
-    if(qranges->sizes==NULL){
-      /*Note: The caller is responsible for cleaning up any partially
-         constructed qinfo.*/
-      _ogg_free(base_mats);
-      return TH_EFAULT;
-    }
-    memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
-    qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
-    if(qrbms==NULL){
-      /*Note: The caller is responsible for cleaning up any partially
-         constructed qinfo.*/
-      _ogg_free(base_mats);
-      return TH_EFAULT;
-    }
-    qranges->base_matrices=(const th_quant_base *)qrbms;
-    do{
-      bmi=indices[qri];
-      /*Note: The caller is responsible for cleaning up any partially
-         constructed qinfo.*/
-      if(bmi>=nbase_mats){
-        _ogg_free(base_mats);
-        return TH_EBADHEADER;
-      }
-      memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
-    }
-    while(qri-->0);
-  }
-  _ogg_free(base_mats);
-  return 0;
-}
-
-void oc_quant_params_clear(th_quant_info *_qinfo){
-  int i;
-  for(i=6;i-->0;){
-    int qti;
-    int pli;
-    qti=i/3;
-    pli=i%3;
-    /*Clear any duplicate pointer references.*/
-    if(i>0){
-      int qtj;
-      int plj;
-      qtj=(i-1)/3;
-      plj=(i-1)%3;
-      if(_qinfo->qi_ranges[qti][pli].sizes==
-       _qinfo->qi_ranges[qtj][plj].sizes){
-        _qinfo->qi_ranges[qti][pli].sizes=NULL;
-      }
-      if(_qinfo->qi_ranges[qti][pli].base_matrices==
-       _qinfo->qi_ranges[qtj][plj].base_matrices){
-        _qinfo->qi_ranges[qti][pli].base_matrices=NULL;
-      }
-    }
-    if(qti>0){
-      if(_qinfo->qi_ranges[1][pli].sizes==
-       _qinfo->qi_ranges[0][pli].sizes){
-        _qinfo->qi_ranges[1][pli].sizes=NULL;
-      }
-      if(_qinfo->qi_ranges[1][pli].base_matrices==
-       _qinfo->qi_ranges[0][pli].base_matrices){
-        _qinfo->qi_ranges[1][pli].base_matrices=NULL;
-      }
-    }
-    /*Now free all the non-duplicate storage.*/
-    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes);
-    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
-  }
-}
diff --git a/media/libtheora/lib/dequant.h b/media/libtheora/lib/dequant.h
deleted file mode 100644
index ef25838e3..000000000
--- a/media/libtheora/lib/dequant.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-#if !defined(_dequant_H)
-# define _dequant_H (1)
-# include "quant.h"
-# include "bitpack.h"
-
-int oc_quant_params_unpack(oc_pack_buf *_opb,
- th_quant_info *_qinfo);
-void oc_quant_params_clear(th_quant_info *_qinfo);
-
-#endif
diff --git a/media/libtheora/lib/fragment.c b/media/libtheora/lib/fragment.c
deleted file mode 100644
index 4ba6af1b7..000000000
--- a/media/libtheora/lib/fragment.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: fragment.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-#include <string.h>
-#include "internal.h"
-
-void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){
-  int i;
-  for(i=8;i-->0;){
-    memcpy(_dst,_src,8*sizeof(*_dst));
-    _dst+=_ystride;
-    _src+=_ystride;
-  }
-}
-
-/*Copies the fragments specified by the lists of fragment indices from one
-   frame to another.
-  _dst_frame:     The reference frame to copy to.
-  _src_frame:     The reference frame to copy from.
-  _ystride:       The row stride of the reference frames.
-  _fragis:        A pointer to a list of fragment indices.
-  _nfragis:       The number of fragment indices to copy.
-  _frag_buf_offs: The offsets of fragments in the reference frames.*/
-void oc_frag_copy_list_c(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs){
-  ptrdiff_t fragii;
-  for(fragii=0;fragii<_nfragis;fragii++){
-    ptrdiff_t frag_buf_off;
-    frag_buf_off=_frag_buf_offs[_fragis[fragii]];
-    oc_frag_copy_c(_dst_frame+frag_buf_off,
-     _src_frame+frag_buf_off,_ystride);
-  }
-}
-
-void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride,
- const ogg_int16_t _residue[64]){
-  int i;
-  for(i=0;i<8;i++){
-    int j;
-    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128);
-    _dst+=_ystride;
-  }
-}
-
-void oc_frag_recon_inter_c(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
-  int i;
-  for(i=0;i<8;i++){
-    int j;
-    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]);
-    _dst+=_ystride;
-    _src+=_ystride;
-  }
-}
-
-void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){
-  int i;
-  for(i=0;i<8;i++){
-    int j;
-    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1));
-    _dst+=_ystride;
-    _src1+=_ystride;
-    _src2+=_ystride;
-  }
-}
-
-void oc_restore_fpu_c(void){}
diff --git a/media/libtheora/lib/huffdec.c b/media/libtheora/lib/huffdec.c
deleted file mode 100644
index fe013c611..000000000
--- a/media/libtheora/lib/huffdec.c
+++ /dev/null
@@ -1,521 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ogg/ogg.h>
-#include "huffdec.h"
-#include "decint.h"
-
-
-
-/*Instead of storing every branching in the tree, subtrees can be collapsed
-   into one node, with a table of size 1<<nbits pointing directly to its
-   descedents nbits levels down.
-  This allows more than one bit to be read at a time, and avoids following all
-   the intermediate branches with next to no increased code complexity once
-   the collapsed tree has been built.
-  We do _not_ require that a subtree be complete to be collapsed, but instead
-   store duplicate pointers in the table, and record the actual depth of the
-   node below its parent.
-  This tells us the number of bits to advance the stream after reaching it.
-
-  This turns out to be equivalent to the method described in \cite{Hash95},
-   without the requirement that codewords be sorted by length.
-  If the codewords were sorted by length (so-called ``canonical-codes''), they
-   could be decoded much faster via either Lindell and Moffat's approach or
-   Hashemian's Condensed Huffman Code approach, the latter of which has an
-   extremely small memory footprint.
-  We can't use Choueka et al.'s finite state machine approach, which is
-   extremely fast, because we can't allow multiple symbols to be output at a
-   time; the codebook can and does change between symbols.
-  It also has very large memory requirements, which impairs cache coherency.
-
-  We store the tree packed in an array of 16-bit integers (words).
-  Each node consists of a single word, followed consecutively by two or more
-   indices of its children.
-  Let n be the value of this first word.
-  This is the number of bits that need to be read to traverse the node, and
-   must be positive.
-  1<<n entries follow in the array, each an index to a child node.
-  If the child is positive, then it is the index of another internal node in
-   the table.
-  If the child is negative or zero, then it is a leaf node.
-  These are stored directly in the child pointer to save space, since they only
-   require a single word.
-  If a leaf node would have been encountered before reading n bits, then it is
-   duplicated the necessary number of times in this table.
-  Leaf nodes pack both a token value and their actual depth in the tree.
-  The token in the leaf node is (-leaf&255).
-  The number of bits that need to be consumed to reach the leaf, starting from
-   the current node, is (-leaf>>8).
-
-  @ARTICLE{Hash95,
-    author="Reza Hashemian",
-    title="Memory Efficient and High-Speed Search {Huffman} Coding",
-    journal="{IEEE} Transactions on Communications",
-    volume=43,
-    number=10,
-    pages="2576--2581",
-    month=Oct,
-    year=1995
-  }*/
-
-
-
-/*The map from external spec-defined tokens to internal tokens.
-  This is constructed so that any extra bits read with the original token value
-   can be masked off the least significant bits of its internal token index.
-  In addition, all of the tokens which require additional extra bits are placed
-   at the start of the list, and grouped by type.
-  OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
-   giving it index 0 may simplify comparisons on some architectures.
-  These requirements require some substantial reordering.*/
-static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
-  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
-  15,
-  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
-  16,
-  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
-  17,
-  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
-  88,
-  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
-  80,
-  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
-   1,
-  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
-   0,
-  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
-  48,
-  /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
-  14,
-  /*OC_ONE_TOKEN (0 extra bits)*/
-  56,
-  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
-  57,
-  /*OC_TWO_TOKEN (0 extra bits)*/
-  58,
-  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
-  59,
-  /*OC_DCT_VAL_CAT2 (1 extra bit)*/
-  60,
-  62,
-  64,
-  66,
-  /*OC_DCT_VAL_CAT3 (2 extra bits)*/
-  68,
-  /*OC_DCT_VAL_CAT4 (3 extra bits)*/
-  72,
-  /*OC_DCT_VAL_CAT5 (4 extra bits)*/
-   2,
-  /*OC_DCT_VAL_CAT6 (5 extra bits)*/
-   4,
-  /*OC_DCT_VAL_CAT7 (6 extra bits)*/
-   6,
-  /*OC_DCT_VAL_CAT8 (10 extra bits)*/
-   8,
-  /*OC_DCT_RUN_CAT1A (1 extra bit)*/
-  18,
-  20,
-  22,
-  24,
-  26,
-  /*OC_DCT_RUN_CAT1B (3 extra bits)*/
-  32,
-  /*OC_DCT_RUN_CAT1C (4 extra bits)*/
-  12,
-  /*OC_DCT_RUN_CAT2A (2 extra bits)*/
-  28,
-  /*OC_DCT_RUN_CAT2B (3 extra bits)*/
-  40
-};
-
-/*The log base 2 of number of internal tokens associated with each of the spec
-   tokens (i.e., how many of the extra bits are folded into the token value).
-  Increasing the maximum value beyond 3 will enlarge the amount of stack
-   required for tree construction.*/
-static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
-  0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
-};
-
-
-/*The size a lookup table is allowed to grow to relative to the number of
-   unique nodes it contains.
-  E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
-   wasted (1/4 of the space must be used).
-  Larger numbers can decode tokens with fewer read operations, while smaller
-   numbers may save more space.
-  With a sample file:
-  32233473 read calls are required when no tree collapsing is done (100.0%).
-  19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
-  11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
-  10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
-  10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
-  Since a value of 2 gets us the vast majority of the speed-up with only a
-   small amount of wasted memory, this is what we use.
-  This value must be less than 128, or you could create a tree with more than
-   32767 entries, which would overflow the 16-bit words used to index it.*/
-#define OC_HUFF_SLUSH (2)
-/*The root of the tree is on the fast path, and a larger value here is more
-   beneficial than elsewhere in the tree.
-  7 appears to give the best performance, trading off between increased use of
-   the single-read fast path and cache footprint for the tables, though
-   obviously this will depend on your cache size.
-  Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
-#define OC_ROOT_HUFF_SLUSH (7)
-
-
-
-/*Unpacks a Huffman codebook.
-  _opb:    The buffer to unpack from.
-  _tokens: Stores a list of internal tokens, in the order they were found in
-            the codebook, and the lengths of their corresponding codewords.
-           This is enough to completely define the codebook, while minimizing
-            stack usage and avoiding temporary allocations (for platforms
-            where free() is a no-op).
-  Return: The number of internal tokens in the codebook, or a negative value
-   on error.*/
-int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
-  ogg_uint32_t code;
-  int          len;
-  int          ntokens;
-  int          nleaves;
-  code=0;
-  len=ntokens=nleaves=0;
-  for(;;){
-    long bits;
-    bits=oc_pack_read1(_opb);
-    /*Only process nodes so long as there's more bits in the buffer.*/
-    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
-    /*Read an internal node:*/
-    if(!bits){
-      len++;
-      /*Don't allow codewords longer than 32 bits.*/
-      if(len>32)return TH_EBADHEADER;
-    }
-    /*Read a leaf node:*/
-    else{
-      ogg_uint32_t code_bit;
-      int          neb;
-      int          nentries;
-      int          token;
-      /*Don't allow more than 32 spec-tokens per codebook.*/
-      if(++nleaves>32)return TH_EBADHEADER;
-      bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
-      neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
-      token=OC_DCT_TOKEN_MAP[bits];
-      nentries=1<<neb;
-      while(nentries-->0){
-        _tokens[ntokens][0]=(unsigned char)token++;
-        _tokens[ntokens][1]=(unsigned char)(len+neb);
-        ntokens++;
-      }
-      code_bit=0x80000000U>>len-1;
-      while(len>0&&(code&code_bit)){
-        code^=code_bit;
-        code_bit<<=1;
-        len--;
-      }
-      if(len<=0)break;
-      code|=code_bit;
-    }
-  }
-  return ntokens;
-}
-
-/*Count how many tokens would be required to fill a subtree at depth _depth.
-  _tokens: A list of internal tokens, in the order they are found in the
-            codebook, and the lengths of their corresponding codewords.
-  _depth:  The depth of the desired node in the corresponding tree structure.
-  Return: The number of tokens that belong to that subtree.*/
-static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
-  ogg_uint32_t code;
-  int          ti;
-  code=0;
-  ti=0;
-  do{
-    if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
-    else{
-      /*Because of the expanded internal tokens, we can have codewords as long
-         as 35 bits.
-        A single recursion here is enough to advance past them.*/
-      code++;
-      ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
-    }
-  }
-  while(code<0x80000000U);
-  return ti;
-}
-
-/*Compute the number of bits to use for a collapsed tree node at the given
-   depth.
-  _tokens:  A list of internal tokens, in the order they are found in the
-             codebook, and the lengths of their corresponding codewords.
-  _ntokens: The number of tokens corresponding to this tree node.
-  _depth:   The depth of this tree node.
-  Return: The number of bits to use for a collapsed tree node rooted here.
-          This is always at least one, even if this was a leaf node.*/
-static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
- int _ntokens,int _depth){
-  int got_leaves;
-  int loccupancy;
-  int occupancy;
-  int slush;
-  int nbits;
-  int best_nbits;
-  slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
-  /*It's legal to have a tree with just a single node, which requires no bits
-     to decode and always returns the same token.
-    However, no encoder actually does this (yet).
-    To avoid a special case in oc_huff_token_decode(), we force the number of
-     lookahead bits to be at least one.
-    This will produce a tree that looks ahead one bit and then advances the
-     stream zero bits.*/
-  nbits=1;
-  occupancy=2;
-  got_leaves=1;
-  do{
-    int ti;
-    if(got_leaves)best_nbits=nbits;
-    nbits++;
-    got_leaves=0;
-    loccupancy=occupancy;
-    for(occupancy=ti=0;ti<_ntokens;occupancy++){
-      if(_tokens[ti][1]<_depth+nbits)ti++;
-      else if(_tokens[ti][1]==_depth+nbits){
-        got_leaves=1;
-        ti++;
-      }
-      else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
-    }
-  }
-  while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
-  return best_nbits;
-}
-
-/*Determines the size in words of a Huffman tree node that represents a
-   subtree of depth _nbits.
-  _nbits: The depth of the subtree.
-          This must be greater than zero.
-  Return: The number of words required to store the node.*/
-static size_t oc_huff_node_size(int _nbits){
-  return 1+(1<<_nbits);
-}
-
-/*Produces a collapsed-tree representation of the given token list.
-  _tree: The storage for the collapsed Huffman tree.
-         This may be NULL to compute the required storage size instead of
-          constructing the tree.
-  _tokens:  A list of internal tokens, in the order they are found in the
-             codebook, and the lengths of their corresponding codewords.
-  _ntokens: The number of tokens corresponding to this tree node.
-  Return: The number of words required to store the tree.*/
-#if defined(_MSC_VER) && _MSC_VER >= 1700
-#pragma optimize( "", off )
-#endif
-static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
- unsigned char _tokens[][2],int _ntokens){
-  ogg_int16_t   node[34];
-  unsigned char depth[34];
-  unsigned char last[34];
-  size_t        ntree;
-  int           ti;
-  int           l;
-  depth[0]=0;
-  last[0]=(unsigned char)(_ntokens-1);
-  ntree=0;
-  ti=0;
-  l=0;
-  do{
-    int nbits;
-    nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
-    node[l]=(ogg_int16_t)ntree;
-    ntree+=oc_huff_node_size(nbits);
-    if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
-    do{
-      while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
-        if(_tree!=NULL){
-          ogg_int16_t leaf;
-          int         nentries;
-          nentries=1<<depth[l]+nbits-_tokens[ti][1];
-          leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
-          while(nentries-->0)_tree[node[l]++]=leaf;
-        }
-        ti++;
-      }
-      if(ti<=last[l]){
-        /*We need to recurse*/
-        depth[l+1]=(unsigned char)(depth[l]+nbits);
-        if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
-        l++;
-        last[l]=
-         (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
-        break;
-      }
-      /*Pop back up a level of recursion.*/
-      else if(l-->0)nbits=depth[l+1]-depth[l];
-    }
-    while(l>=0);
-  }
-  while(l>=0);
-  return ntree;
-}
-#if defined(_MSC_VER) && _MSC_VER >= 1700
-#pragma optimize( "", on )
-#endif
-
-/*Unpacks a set of Huffman trees, and reduces them to a collapsed
-   representation.
-  _opb:   The buffer to unpack the trees from.
-  _nodes: The table to fill with the Huffman trees.
-  Return: 0 on success, or a negative value on error.
-          The caller is responsible for cleaning up any partially initialized
-           _nodes on failure.*/
-int oc_huff_trees_unpack(oc_pack_buf *_opb,
- ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
-  int i;
-  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    unsigned char  tokens[256][2];
-    int            ntokens;
-    ogg_int16_t   *tree;
-    size_t         size;
-    /*Unpack the full tree into a temporary buffer.*/
-    ntokens=oc_huff_tree_unpack(_opb,tokens);
-    if(ntokens<0)return ntokens;
-    /*Figure out how big the collapsed tree will be and allocate space for it.*/
-    size=oc_huff_tree_collapse(NULL,tokens,ntokens);
-    /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
-       OC_ROOT_HUFF_SLUSH too large.*/
-    if(size>32767)return TH_EIMPL;
-    tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
-    if(tree==NULL)return TH_EFAULT;
-    /*Construct the collapsed the tree.*/
-    oc_huff_tree_collapse(tree,tokens,ntokens);
-    _nodes[i]=tree;
-  }
-  return 0;
-}
-
-/*Determines the size in words of a Huffman subtree.
-  _tree: The complete Huffman tree.
-  _node: The index of the root of the desired subtree.
-  Return: The number of words required to store the tree.*/
-static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
-  size_t size;
-  int    nchildren;
-  int    n;
-  int    i;
-  n=_tree[_node];
-  size=oc_huff_node_size(n);
-  nchildren=1<<n;
-  i=0;
-  do{
-    int child;
-    child=_tree[_node+i+1];
-    if(child<=0)i+=1<<n-(-child>>8);
-    else{
-      size+=oc_huff_tree_size(_tree,child);
-      i++;
-    }
-  }
-  while(i<nchildren);
-  return size;
-}
-
-/*Makes a copy of the given set of Huffman trees.
-  _dst: The array to store the copy in.
-  _src: The array of trees to copy.*/
-int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
- const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
-  int total;
-  int i;
-  total=0;
-  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    size_t size;
-    size=oc_huff_tree_size(_src[i],0);
-    total+=size;
-    _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
-    if(_dst[i]==NULL){
-      while(i-->0)_ogg_free(_dst[i]);
-      return TH_EFAULT;
-    }
-    memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
-  }
-  return 0;
-}
-
-/*Frees the memory used by a set of Huffman trees.
-  _nodes: The array of trees to free.*/
-void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
-  int i;
-  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
-}
-
-
-/*Unpacks a single token using the given Huffman tree.
-  _opb:  The buffer to unpack the token from.
-  _node: The tree to unpack the token with.
-  Return: The token value.*/
-int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
-  const unsigned char *ptr;
-  const unsigned char *stop;
-  oc_pb_window         window;
-  int                  available;
-  long                 bits;
-  int                  node;
-  int                  n;
-  ptr=_opb->ptr;
-  window=_opb->window;
-  stop=_opb->stop;
-  available=_opb->bits;
-  node=0;
-  for(;;){
-    n=_tree[node];
-    if(n>available){
-      unsigned shift;
-      shift=OC_PB_WINDOW_SIZE-available;
-      do{
-        /*We don't bother setting eof because we won't check for it after we've
-           started decoding DCT tokens.*/
-        if(ptr>=stop){
-          shift=(unsigned)-OC_LOTS_OF_BITS;
-          break;
-        }
-        shift-=8;
-        window|=(oc_pb_window)*ptr++<<shift;
-      }
-      while(shift>=8);
-      /*Note: We never request more than 24 bits, so there's no need to fill in
-         the last partial byte here.*/
-      available=OC_PB_WINDOW_SIZE-shift;
-    }
-    bits=window>>OC_PB_WINDOW_SIZE-n;
-    node=_tree[node+1+bits];
-    if(node<=0)break;
-    window<<=n;
-    available-=n;
-  }
-  node=-node;
-  n=node>>8;
-  window<<=n;
-  available-=n;
-  _opb->ptr=ptr;
-  _opb->window=window;
-  _opb->bits=available;
-  return node&255;
-}
diff --git a/media/libtheora/lib/huffdec.h b/media/libtheora/lib/huffdec.h
deleted file mode 100644
index 2fd112a90..000000000
--- a/media/libtheora/lib/huffdec.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: huffdec.h 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_huffdec_H)
-# define _huffdec_H (1)
-# include "huffman.h"
-# include "bitpack.h"
-
-
-
-int oc_huff_trees_unpack(oc_pack_buf *_opb,
- ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
- const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]);
-void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_node);
-
-#endif
diff --git a/media/libtheora/lib/huffman.h b/media/libtheora/lib/huffman.h
deleted file mode 100644
index 36cf7572e..000000000
--- a/media/libtheora/lib/huffman.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: huffman.h 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-#if !defined(_huffman_H)
-# define _hufffman_H (1)
-# include "theora/codec.h"
-# include "ocintrin.h"
-
-/*The range of valid quantized DCT coefficient values.
-  VP3 used 511 in the encoder, but the bitstream is capable of 580.*/
-#define OC_DCT_VAL_RANGE         (580)
-
-#define OC_NDCT_TOKEN_BITS       (5)
-
-#define OC_DCT_EOB1_TOKEN        (0)
-#define OC_DCT_EOB2_TOKEN        (1)
-#define OC_DCT_EOB3_TOKEN        (2)
-#define OC_DCT_REPEAT_RUN0_TOKEN (3)
-#define OC_DCT_REPEAT_RUN1_TOKEN (4)
-#define OC_DCT_REPEAT_RUN2_TOKEN (5)
-#define OC_DCT_REPEAT_RUN3_TOKEN (6)
-
-#define OC_DCT_SHORT_ZRL_TOKEN   (7)
-#define OC_DCT_ZRL_TOKEN         (8)
-
-#define OC_ONE_TOKEN             (9)
-#define OC_MINUS_ONE_TOKEN       (10)
-#define OC_TWO_TOKEN             (11)
-#define OC_MINUS_TWO_TOKEN       (12)
-
-#define OC_DCT_VAL_CAT2          (13)
-#define OC_DCT_VAL_CAT3          (17)
-#define OC_DCT_VAL_CAT4          (18)
-#define OC_DCT_VAL_CAT5          (19)
-#define OC_DCT_VAL_CAT6          (20)
-#define OC_DCT_VAL_CAT7          (21)
-#define OC_DCT_VAL_CAT8          (22)
-
-#define OC_DCT_RUN_CAT1A         (23)
-#define OC_DCT_RUN_CAT1B         (28)
-#define OC_DCT_RUN_CAT1C         (29)
-#define OC_DCT_RUN_CAT2A         (30)
-#define OC_DCT_RUN_CAT2B         (31)
-
-#define OC_NDCT_EOB_TOKEN_MAX    (7)
-#define OC_NDCT_ZRL_TOKEN_MAX    (9)
-#define OC_NDCT_VAL_MAX          (23)
-#define OC_NDCT_VAL_CAT1_MAX     (13)
-#define OC_NDCT_VAL_CAT2_MAX     (17)
-#define OC_NDCT_VAL_CAT2_SIZE    (OC_NDCT_VAL_CAT2_MAX-OC_DCT_VAL_CAT2)
-#define OC_NDCT_RUN_MAX          (32)
-#define OC_NDCT_RUN_CAT1A_MAX    (28)
-
-extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
-
-#endif
diff --git a/media/libtheora/lib/idct.c b/media/libtheora/lib/idct.c
deleted file mode 100644
index c56eb94c5..000000000
--- a/media/libtheora/lib/idct.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#include <string.h>
-#include "internal.h"
-#include "dct.h"
-
-/*Performs an inverse 8 point Type-II DCT transform.
-  The output is scaled by a factor of 2 relative to the orthonormal version of
-   the transform.
-  _y: The buffer to store the result in.
-      Data will be placed in every 8th entry (e.g., in a column of an 8x8
-       block).
-  _x: The input coefficients.
-      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
-static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
-  ogg_int32_t t[8];
-  ogg_int32_t r;
-  /*Stage 1:*/
-  /*0-1 butterfly.*/
-  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
-  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
-  /*2-3 rotation by 6pi/16.*/
-  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
-  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
-  /*4-7 rotation by 7pi/16.*/
-  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
-  /*5-6 rotation by 3pi/16.*/
-  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
-  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
-  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
-  /*Stage 2:*/
-  /*4-5 butterfly.*/
-  r=t[4]+t[5];
-  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
-  t[4]=r;
-  /*7-6 butterfly.*/
-  r=t[7]+t[6];
-  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
-  t[7]=r;
-  /*Stage 3:*/
-  /*0-3 butterfly.*/
-  r=t[0]+t[3];
-  t[3]=t[0]-t[3];
-  t[0]=r;
-  /*1-2 butterfly.*/
-  r=t[1]+t[2];
-  t[2]=t[1]-t[2];
-  t[1]=r;
-  /*6-5 butterfly.*/
-  r=t[6]+t[5];
-  t[5]=t[6]-t[5];
-  t[6]=r;
-  /*Stage 4:*/
-  /*0-7 butterfly.*/
-  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
-  /*1-6 butterfly.*/
-  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
-  /*2-5 butterfly.*/
-  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
-  /*3-4 butterfly.*/
-  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
-  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
-  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
-  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
-  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
-}
-
-/*Performs an inverse 8 point Type-II DCT transform.
-  The output is scaled by a factor of 2 relative to the orthonormal version of
-   the transform.
-  _y: The buffer to store the result in.
-      Data will be placed in every 8th entry (e.g., in a column of an 8x8
-       block).
-  _x: The input coefficients.
-      Only the first 4 entries are used.
-      The other 4 are assumed to be 0.*/
-static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
-  ogg_int32_t t[8];
-  ogg_int32_t r;
-  /*Stage 1:*/
-  t[0]=OC_C4S4*_x[0]>>16;
-  t[2]=OC_C6S2*_x[2]>>16;
-  t[3]=OC_C2S6*_x[2]>>16;
-  t[4]=OC_C7S1*_x[1]>>16;
-  t[5]=-(OC_C5S3*_x[3]>>16);
-  t[6]=OC_C3S5*_x[3]>>16;
-  t[7]=OC_C1S7*_x[1]>>16;
-  /*Stage 2:*/
-  r=t[4]+t[5];
-  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
-  t[4]=r;
-  r=t[7]+t[6];
-  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
-  t[7]=r;
-  /*Stage 3:*/
-  t[1]=t[0]+t[2];
-  t[2]=t[0]-t[2];
-  r=t[0]+t[3];
-  t[3]=t[0]-t[3];
-  t[0]=r;
-  r=t[6]+t[5];
-  t[5]=t[6]-t[5];
-  t[6]=r;
-  /*Stage 4:*/
-  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
-  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
-  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
-  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
-  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
-  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
-  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
-  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
-}
-
-/*Performs an inverse 8 point Type-II DCT transform.
-  The output is scaled by a factor of 2 relative to the orthonormal version of
-   the transform.
-  _y: The buffer to store the result in.
-      Data will be placed in every 8th entry (e.g., in a column of an 8x8
-       block).
-  _x: The input coefficients.
-      Only the first 3 entries are used.
-      The other 5 are assumed to be 0.*/
-static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
-  ogg_int32_t t[8];
-  ogg_int32_t r;
-  /*Stage 1:*/
-  t[0]=OC_C4S4*_x[0]>>16;
-  t[2]=OC_C6S2*_x[2]>>16;
-  t[3]=OC_C2S6*_x[2]>>16;
-  t[4]=OC_C7S1*_x[1]>>16;
-  t[7]=OC_C1S7*_x[1]>>16;
-  /*Stage 2:*/
-  t[5]=OC_C4S4*t[4]>>16;
-  t[6]=OC_C4S4*t[7]>>16;
-  /*Stage 3:*/
-  t[1]=t[0]+t[2];
-  t[2]=t[0]-t[2];
-  r=t[0]+t[3];
-  t[3]=t[0]-t[3];
-  t[0]=r;
-  r=t[6]+t[5];
-  t[5]=t[6]-t[5];
-  t[6]=r;
-  /*Stage 4:*/
-  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
-  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
-  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
-  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
-  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
-  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
-  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
-  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
-}
-
-/*Performs an inverse 8 point Type-II DCT transform.
-  The output is scaled by a factor of 2 relative to the orthonormal version of
-   the transform.
-  _y: The buffer to store the result in.
-      Data will be placed in every 8th entry (e.g., in a column of an 8x8
-       block).
-  _x: The input coefficients.
-      Only the first 2 entries are used.
-      The other 6 are assumed to be 0.*/
-static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
-  ogg_int32_t t[8];
-  ogg_int32_t r;
-  /*Stage 1:*/
-  t[0]=OC_C4S4*_x[0]>>16;
-  t[4]=OC_C7S1*_x[1]>>16;
-  t[7]=OC_C1S7*_x[1]>>16;
-  /*Stage 2:*/
-  t[5]=OC_C4S4*t[4]>>16;
-  t[6]=OC_C4S4*t[7]>>16;
-  /*Stage 3:*/
-  r=t[6]+t[5];
-  t[5]=t[6]-t[5];
-  t[6]=r;
-  /*Stage 4:*/
-  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
-  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
-  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
-  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
-  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
-  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
-  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
-  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
-}
-
-/*Performs an inverse 8 point Type-II DCT transform.
-  The output is scaled by a factor of 2 relative to the orthonormal version of
-   the transform.
-  _y: The buffer to store the result in.
-      Data will be placed in every 8th entry (e.g., in a column of an 8x8
-       block).
-  _x: The input coefficients.
-      Only the first entry is used.
-      The other 7 are assumed to be 0.*/
-static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
-  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
-   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.
-  All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
-   x  x  0  0  0  0  0  0
-   x  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-  _y: The buffer to store the result in.
-      This may be the same as _x.
-  _x: The input coefficients.*/
-static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  ogg_int16_t w[64];
-  int         i;
-  /*Transform rows of x into columns of w.*/
-  idct8_2(w,_x);
-  idct8_1(w+1,_x+8);
-  /*Transform rows of w into columns of y.*/
-  for(i=0;i<8;i++)idct8_2(_y+i,w+i*8);
-  /*Adjust for the scale factor.*/
-  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
-  /*Clear input data for next block (decoder only).*/
-  if(_x!=_y)_x[0]=_x[1]=_x[8]=0;
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.
-  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
-   x  x  x  x  0  0  0  0
-   x  x  x  0  0  0  0  0
-   x  x  0  0  0  0  0  0
-   x  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-   0  0  0  0  0  0  0  0
-  _y: The buffer to store the result in.
-      This may be the same as _x.
-  _x: The input coefficients.*/
-static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  ogg_int16_t w[64];
-  int         i;
-  /*Transform rows of x into columns of w.*/
-  idct8_4(w,_x);
-  idct8_3(w+1,_x+8);
-  idct8_2(w+2,_x+16);
-  idct8_1(w+3,_x+24);
-  /*Transform rows of w into columns of y.*/
-  for(i=0;i<8;i++)idct8_4(_y+i,w+i*8);
-  /*Adjust for the scale factor.*/
-  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
-  /*Clear input data for next block (decoder only).*/
-  if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0;
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.
-  _y: The buffer to store the result in.
-      This may be the same as _x.
-  _x: The input coefficients.*/
-static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  ogg_int16_t w[64];
-  int         i;
-  /*Transform rows of x into columns of w.*/
-  for(i=0;i<8;i++)idct8(w+i,_x+i*8);
-  /*Transform rows of w into columns of y.*/
-  for(i=0;i<8;i++)idct8(_y+i,w+i*8);
-  /*Adjust for the scale factor.*/
-  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
-  if(_x!=_y)for(i=0;i<64;i++)_x[i]=0;
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.*/
-void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
-  /*_last_zzi is subtly different from an actual count of the number of
-     coefficients we decoded for this block.
-    It contains the value of zzi BEFORE the final token in the block was
-     decoded.
-    In most cases this is an EOB token (the continuation of an EOB run from a
-     previous block counts), and so this is the same as the coefficient count.
-    However, in the case that the last token was NOT an EOB token, but filled
-     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
-    Provided the last token was not a pure zero run, the minimum value it can
-     be is 46, and so that doesn't affect any of the cases in this routine.
-    However, if the last token WAS a pure zero run of length 63, then _last_zzi
-     will be 1 while the number of coefficients decoded is 64.
-    Thus, we will trigger the following special case, where the real
-     coefficient count would not.
-    Note also that a zero run of length 64 will give _last_zzi a value of 0,
-     but we still process the DC coefficient, which might have a non-zero value
-     due to DC prediction.
-    Although convoluted, this is arguably the correct behavior: it allows us to
-     use a smaller transform when the block ends with a long zero run instead
-     of a normal EOB token.
-    It could be smarter... multiple separate zero runs at the end of a block
-     will fool it, but an encoder that generates these really deserves what it
-     gets.
-    Needless to say we inherited this approach from VP3.*/
-  /*Then perform the iDCT.*/
-  if(_last_zzi<=3)oc_idct8x8_3(_y,_x);
-  else if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
-  else oc_idct8x8_slow(_y,_x);
-}
diff --git a/media/libtheora/lib/info.c b/media/libtheora/lib/info.c
deleted file mode 100644
index 6b9762978..000000000
--- a/media/libtheora/lib/info.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include "internal.h"
-
-
-
-/*This is more or less the same as strncasecmp, but that doesn't exist
-   everywhere, and this is a fairly trivial function, so we include it.
-  Note: We take advantage of the fact that we know _n is less than or equal to
-   the length of at least one of the strings.*/
-static int oc_tagcompare(const char *_s1,const char *_s2,int _n){
-  int c;
-  for(c=0;c<_n;c++){
-    if(toupper(_s1[c])!=toupper(_s2[c]))return !0;
-  }
-  return _s1[c]!='=';
-}
-
-
-
-void th_info_init(th_info *_info){
-  memset(_info,0,sizeof(*_info));
-  _info->version_major=TH_VERSION_MAJOR;
-  _info->version_minor=TH_VERSION_MINOR;
-  _info->version_subminor=TH_VERSION_SUB;
-  _info->keyframe_granule_shift=6;
-}
-
-void th_info_clear(th_info *_info){
-  memset(_info,0,sizeof(*_info));
-}
-
-
-
-void th_comment_init(th_comment *_tc){
-  memset(_tc,0,sizeof(*_tc));
-}
-
-void th_comment_add(th_comment *_tc,char *_comment){
-  char **user_comments;
-  int   *comment_lengths;
-  int    comment_len;
-  user_comments=_ogg_realloc(_tc->user_comments,
-   (_tc->comments+2)*sizeof(*_tc->user_comments));
-  if(user_comments==NULL)return;
-  _tc->user_comments=user_comments;
-  comment_lengths=_ogg_realloc(_tc->comment_lengths,
-   (_tc->comments+2)*sizeof(*_tc->comment_lengths));
-  if(comment_lengths==NULL)return;
-  _tc->comment_lengths=comment_lengths;
-  comment_len=strlen(_comment);
-  comment_lengths[_tc->comments]=comment_len;
-  user_comments[_tc->comments]=_ogg_malloc(comment_len+1);
-  if(user_comments[_tc->comments]==NULL)return;
-  memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1);
-  _tc->comments++;
-  _tc->user_comments[_tc->comments]=NULL;
-}
-
-void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){
-  char *comment;
-  int   tag_len;
-  int   val_len;
-  tag_len=strlen(_tag);
-  val_len=strlen(_val);
-  /*+2 for '=' and '\0'.*/
-  comment=_ogg_malloc(tag_len+val_len+2);
-  if(comment==NULL)return;
-  memcpy(comment,_tag,tag_len);
-  comment[tag_len]='=';
-  memcpy(comment+tag_len+1,_val,val_len+1);
-  th_comment_add(_tc,comment);
-  _ogg_free(comment);
-}
-
-char *th_comment_query(th_comment *_tc,char *_tag,int _count){
-  long i;
-  int  found;
-  int  tag_len;
-  tag_len=strlen(_tag);
-  found=0;
-  for(i=0;i<_tc->comments;i++){
-    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){
-      /*We return a pointer to the data, not a copy.*/
-      if(_count==found++)return _tc->user_comments[i]+tag_len+1;
-    }
-  }
-  /*Didn't find anything.*/
-  return NULL;
-}
-
-int th_comment_query_count(th_comment *_tc,char *_tag){
-  long i;
-  int  tag_len;
-  int  count;
-  tag_len=strlen(_tag);
-  count=0;
-  for(i=0;i<_tc->comments;i++){
-    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++;
-  }
-  return count;
-}
-
-void th_comment_clear(th_comment *_tc){
-  if(_tc!=NULL){
-    long i;
-    for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]);
-    _ogg_free(_tc->user_comments);
-    _ogg_free(_tc->comment_lengths);
-    _ogg_free(_tc->vendor);
-    memset(_tc,0,sizeof(*_tc));
-  }
-}
diff --git a/media/libtheora/lib/internal.c b/media/libtheora/lib/internal.c
deleted file mode 100644
index 1b2611da1..000000000
--- a/media/libtheora/lib/internal.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: internal.c 17506 2010-10-13 02:52:41Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-#include "internal.h"
-
-
-
-/*A map from the index in the zig zag scan to the coefficient number in a
-   block.
-  All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
-   past the end of a block in bogus streams get mapped to a known location.*/
-const unsigned char OC_FZIG_ZAG[128]={
-   0, 1, 8,16, 9, 2, 3,10,
-  17,24,32,25,18,11, 4, 5,
-  12,19,26,33,40,48,41,34,
-  27,20,13, 6, 7,14,21,28,
-  35,42,49,56,57,50,43,36,
-  29,22,15,23,30,37,44,51,
-  58,59,52,45,38,31,39,46,
-  53,60,61,54,47,55,62,63,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64
-};
-
-/*A map from the coefficient number in a block to its index in the zig zag
-   scan.*/
-const unsigned char OC_IZIG_ZAG[64]={
-   0, 1, 5, 6,14,15,27,28,
-   2, 4, 7,13,16,26,29,42,
-   3, 8,12,17,25,30,41,43,
-   9,11,18,24,31,40,44,53,
-  10,19,23,32,39,45,52,54,
-  20,22,33,38,46,51,55,60,
-  21,34,37,47,50,56,59,61,
-  35,36,48,49,57,58,62,63
-};
-
-/*A map from physical macro block ordering to bitstream macro block
-   ordering within a super block.*/
-const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}};
-
-/*A list of the indices in the oc_mb.map array that can be valid for each of
-   the various chroma decimation types.*/
-const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
-  {0,1,2,3,4,8},
-  {0,1,2,3,4,5,8,9},
-  {0,1,2,3,4,6,8,10},
-  {0,1,2,3,4,5,6,7,8,9,10,11}
-};
-
-/*The number of indices in the oc_mb.map array that can be valid for each of
-   the various chroma decimation types.*/
-const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
-
-/*The number of extra bits that are coded with each of the DCT tokens.
-  Each DCT token has some fixed number of additional bits (possibly 0) stored
-   after the token itself, containing, for example, coefficient magnitude,
-   sign bits, etc.*/
-const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={
-  0,0,0,2,3,4,12,3,6,
-  0,0,0,0,
-  1,1,1,1,2,3,4,5,6,10,
-  1,1,1,1,1,3,4,
-  2,3
-};
-
-
-
-int oc_ilog(unsigned _v){
-  int ret;
-  for(ret=0;_v;ret++)_v>>=1;
-  return ret;
-}
-
-
-
-void *oc_aligned_malloc(size_t _sz,size_t _align){
-  unsigned char *p;
-  if(_align-1>UCHAR_MAX||(_align&_align-1)||_sz>~(size_t)0-_align)return NULL;
-  p=(unsigned char *)_ogg_malloc(_sz+_align);
-  if(p!=NULL){
-    int offs;
-    offs=((p-(unsigned char *)0)-1&_align-1);
-    p[offs]=offs;
-    p+=offs+1;
-  }
-  return p;
-}
-
-void oc_aligned_free(void *_ptr){
-  unsigned char *p;
-  p=(unsigned char *)_ptr;
-  if(p!=NULL){
-    int offs;
-    offs=*--p;
-    _ogg_free(p-offs);
-  }
-}
-
-
-void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){
-  size_t  rowsz;
-  size_t  colsz;
-  size_t  datsz;
-  char   *ret;
-  colsz=_height*sizeof(void *);
-  rowsz=_sz*_width;
-  datsz=rowsz*_height;
-  /*Alloc array and row pointers.*/
-  ret=(char *)_ogg_malloc(datsz+colsz);
-  if(ret==NULL)return NULL;
-  /*Initialize the array.*/
-  if(ret!=NULL){
-    size_t   i;
-    void   **p;
-    char    *datptr;
-    p=(void **)ret;
-    i=_height;
-    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
-  }
-  return (void **)ret;
-}
-
-void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){
-  size_t  colsz;
-  size_t  rowsz;
-  size_t  datsz;
-  char   *ret;
-  colsz=_height*sizeof(void *);
-  rowsz=_sz*_width;
-  datsz=rowsz*_height;
-  /*Alloc array and row pointers.*/
-  ret=(char *)_ogg_calloc(datsz+colsz,1);
-  if(ret==NULL)return NULL;
-  /*Initialize the array.*/
-  if(ret!=NULL){
-    size_t   i;
-    void   **p;
-    char    *datptr;
-    p=(void **)ret;
-    i=_height;
-    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
-  }
-  return (void **)ret;
-}
-
-void oc_free_2d(void *_ptr){
-  _ogg_free(_ptr);
-}
-
-/*Fills in a Y'CbCr buffer with a pointer to the image data in the first
-   buffer, but with the opposite vertical orientation.
-  _dst: The destination buffer.
-        This can be the same as _src.
-  _src: The source buffer.*/
-void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
- const th_ycbcr_buffer _src){
-  int pli;
-  for(pli=0;pli<3;pli++){
-    _dst[pli].width=_src[pli].width;
-    _dst[pli].height=_src[pli].height;
-    _dst[pli].stride=-_src[pli].stride;
-    _dst[pli].data=_src[pli].data
-     +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride;
-  }
-}
-
-const char *th_version_string(void){
-  return OC_VENDOR_STRING;
-}
-
-ogg_uint32_t th_version_number(void){
-  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB;
-}
-
-/*Determines the packet type.
-  Note that this correctly interprets a 0-byte packet as a video data packet.
-  Return: 1 for a header packet, 0 for a data packet.*/
-int th_packet_isheader(ogg_packet *_op){
-  return _op->bytes>0?_op->packet[0]>>7:0;
-}
-
-/*Determines the frame type of a video data packet.
-  Note that this correctly interprets a 0-byte packet as a delta frame.
-  Return: 1 for a key frame, 0 for a delta frame, and -1 for a header
-           packet.*/
-int th_packet_iskeyframe(ogg_packet *_op){
-  return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40);
-}
diff --git a/media/libtheora/lib/internal.h b/media/libtheora/lib/internal.h
deleted file mode 100644
index 24e1b5125..000000000
--- a/media/libtheora/lib/internal.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: internal.h 17578 2010-10-29 04:21:26Z tterribe $
-
- ********************************************************************/
-#if !defined(_internal_H)
-# define _internal_H (1)
-# include <stdlib.h>
-# include <limits.h>
-# if defined(HAVE_CONFIG_H)
-#  include "config.h"
-# endif
-# include "theora/codec.h"
-# include "theora/theora.h"
-# include "ocintrin.h"
-
-# if !defined(__GNUC_PREREQ)
-#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
-#   define __GNUC_PREREQ(_maj,_min) \
- ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
-#  else
-#   define __GNUC_PREREQ(_maj,_min) 0
-#  endif
-# endif
-
-# if defined(_MSC_VER)
-/*Disable missing EMMS warnings.*/
-#  pragma warning(disable:4799)
-/*Thank you Microsoft, I know the order of operations.*/
-#  pragma warning(disable:4554)
-# endif
-/*You, too, gcc.*/
-# if __GNUC_PREREQ(4,2)
-#  pragma GCC diagnostic ignored "-Wparentheses"
-# endif
-
-/*Some assembly constructs require aligned operands.
-  The following macros are _only_ intended for structure member declarations.
-  Although they will sometimes work on stack variables, gcc will often silently
-   ignore them.
-  A separate set of macros could be made for manual stack alignment, but we
-   don't actually require it anywhere.*/
-# if defined(OC_X86_ASM)||defined(OC_ARM_ASM)
-#  if defined(__GNUC__)
-#   define OC_ALIGN8(expr) expr __attribute__((aligned(8)))
-#   define OC_ALIGN16(expr) expr __attribute__((aligned(16)))
-#  elif defined(_MSC_VER)
-#   define OC_ALIGN8(expr) __declspec (align(8)) expr
-#   define OC_ALIGN16(expr) __declspec (align(16)) expr
-#  else
-#   error "Alignment macros required for this platform."
-#  endif
-# endif
-# if !defined(OC_ALIGN8)
-#  define OC_ALIGN8(expr) expr
-# endif
-# if !defined(OC_ALIGN16)
-#  define OC_ALIGN16(expr) expr
-# endif
-
-
-
-/*This library's version.*/
-# define OC_VENDOR_STRING "Xiph.Org libtheora 1.2.0alpha 20100924 (Ptalarbvorm)"
-
-/*Theora bitstream version.*/
-# define TH_VERSION_MAJOR (3)
-# define TH_VERSION_MINOR (2)
-# define TH_VERSION_SUB   (1)
-# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \
- ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \
- ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
- (_info)->version_subminor>=(_sub)))
-
-
-
-/*A map from the index in the zig zag scan to the coefficient number in a
-   block.*/
-extern const unsigned char OC_FZIG_ZAG[128];
-/*A map from the coefficient number in a block to its index in the zig zag
-   scan.*/
-extern const unsigned char OC_IZIG_ZAG[64];
-/*A map from physical macro block ordering to bitstream macro block
-   ordering within a super block.*/
-extern const unsigned char OC_MB_MAP[2][2];
-/*A list of the indices in the oc_mb_map array that can be valid for each of
-   the various chroma decimation types.*/
-extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
-/*The number of indices in the oc_mb_map array that can be valid for each of
-   the various chroma decimation types.*/
-extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
-
-
-
-int oc_ilog(unsigned _v);
-void *oc_aligned_malloc(size_t _sz,size_t _align);
-void oc_aligned_free(void *_ptr);
-void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz);
-void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz);
-void oc_free_2d(void *_ptr);
-
-void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
- const th_ycbcr_buffer _src);
-
-#endif
diff --git a/media/libtheora/lib/mathops.h b/media/libtheora/lib/mathops.h
deleted file mode 100644
index a1a4f9df0..000000000
--- a/media/libtheora/lib/mathops.h
+++ /dev/null
@@ -1,143 +0,0 @@
-#if !defined(_mathops_H)
-# define _mathops_H (1)
-# include <ogg/ogg.h>
-
-# if __GNUC_PREREQ(3,4)
-#  include <limits.h>
-/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from
-   "upgrading" the type of an entire expression to an (unsigned) size_t.*/
-#  if INT_MAX>=2147483647
-#   define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
-#   define OC_CLZ32(_x) (__builtin_clz(_x))
-#  elif LONG_MAX>=2147483647L
-#   define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
-#   define OC_CLZ32(_x) (__builtin_clzl(_x))
-#  endif
-#  if INT_MAX>=9223372036854775807LL
-#   define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
-#   define OC_CLZ64(_x) (__builtin_clz(_x))
-#  elif LONG_MAX>=9223372036854775807LL
-#   define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
-#   define OC_CLZ64(_x) (__builtin_clzl(_x))
-#  elif LLONG_MAX>=9223372036854775807LL|| \
-    __LONG_LONG_MAX__>=9223372036854775807LL
-#   define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
-#   define OC_CLZ64(_x) (__builtin_clzll(_x))
-#  endif
-# endif
-
-
-
-/**
- * oc_ilog32 - Integer binary logarithm of a 32-bit value.
- * @_v: A 32-bit value.
- * Returns floor(log2(_v))+1, or 0 if _v==0.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin
- *  function instead, which should be faster.
- */
-int oc_ilog32(ogg_uint32_t _v);
-/**
- * oc_ilog64 - Integer binary logarithm of a 64-bit value.
- * @_v: A 64-bit value.
- * Returns floor(log2(_v))+1, or 0 if _v==0.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin
- *  function instead, which should be faster.
- */
-int oc_ilog64(ogg_int64_t _v);
-
-
-# if defined(OC_CLZ32)
-/**
- * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
- * @_v: A non-zero 32-bit value.
- * Returns floor(log2(_v))+1.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- * If _v is zero, the return value is undefined; use OC_ILOG_32() instead.
- */
-#  define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v))
-/**
- * OC_ILOG_32 - Integer binary logarithm of a 32-bit value.
- * @_v: A 32-bit value.
- * Returns floor(log2(_v))+1, or 0 if _v==0.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- */
-#  define OC_ILOG_32(_v)   (OC_ILOGNZ_32(_v)&-!!(_v))
-# else
-#  define OC_ILOGNZ_32(_v) (oc_ilog32(_v))
-#  define OC_ILOG_32(_v)   (oc_ilog32(_v))
-# endif
-
-# if defined(CLZ64)
-/**
- * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
- * @_v: A non-zero 64-bit value.
- * Returns floor(log2(_v))+1.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- * If _v is zero, the return value is undefined; use OC_ILOG_64() instead.
- */
-#  define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
-/**
- * OC_ILOG_64 - Integer binary logarithm of a 64-bit value.
- * @_v: A 64-bit value.
- * Returns floor(log2(_v))+1, or 0 if _v==0.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- */
-#  define OC_ILOG_64(_v)   (OC_ILOGNZ_64(_v)&-!!(_v))
-# else
-#  define OC_ILOGNZ_64(_v) (oc_ilog64(_v))
-#  define OC_ILOG_64(_v)   (oc_ilog64(_v))
-# endif
-
-# define OC_STATIC_ILOG0(_v) (!!(_v))
-# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v))
-# define OC_STATIC_ILOG2(_v) \
- (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v))
-# define OC_STATIC_ILOG3(_v) \
- (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v))
-# define OC_STATIC_ILOG4(_v) \
- (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v))
-# define OC_STATIC_ILOG5(_v) \
- (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v))
-# define OC_STATIC_ILOG6(_v) \
- (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v))
-/**
- * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
- * @_v: A non-negative 32-bit constant.
- * Returns floor(log2(_v))+1, or 0 if _v==0.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- * This macro is suitable for evaluation at compile time, but it should not be
- *  used on values that can change at runtime, as it operates via exhaustive
- *  search.
- */
-# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v)))
-/**
- * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
- * @_v: A non-negative 64-bit constant.
- * Returns floor(log2(_v))+1, or 0 if _v==0.
- * This is the number of bits that would be required to represent _v in two's
- *  complement notation with all of the leading zeros stripped.
- * This macro is suitable for evaluation at compile time, but it should not be
- *  used on values that can change at runtime, as it operates via exhaustive
- *  search.
- */
-# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v)))
-
-#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57)
-#define OC_Q10(_v) ((_v)<<10)
-
-ogg_int64_t oc_bexp64(ogg_int64_t _z);
-ogg_int64_t oc_blog64(ogg_int64_t _w);
-
-ogg_uint32_t oc_bexp32_q10(int _z);
-int oc_blog32_q10(ogg_uint32_t _w);
-
-#endif
diff --git a/media/libtheora/lib/ocintrin.h b/media/libtheora/lib/ocintrin.h
deleted file mode 100644
index d49ebb215..000000000
--- a/media/libtheora/lib/ocintrin.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-/*Some common macros for potential platform-specific optimization.*/
-#include <math.h>
-#if !defined(_ocintrin_H)
-# define _ocintrin_H (1)
-
-/*Some specific platforms may have optimized intrinsic or inline assembly
-   versions of these functions which can substantially improve performance.
-  We define macros for them to allow easy incorporation of these non-ANSI
-   features.*/
-
-/*Note that we do not provide a macro for abs(), because it is provided as a
-   library function, which we assume is translated into an intrinsic to avoid
-   the function call overhead and then implemented in the smartest way for the
-   target platform.
-  With modern gcc (4.x), this is true: it uses cmov instructions if the
-   architecture supports it and branchless bit-twiddling if it does not (the
-   speed difference between the two approaches is not measurable).
-  Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
-   by Sun Microsystems, despite prior art dating back to at least 1996:
-   http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
-  On gcc 3.x, however, our assumption is not true, as abs() is translated to a
-   conditional jump, which is horrible on deeply piplined architectures (e.g.,
-   all consumer architectures for the past decade or more).
-  Also be warned that -C*abs(x) where C is a constant is mis-optimized as
-   abs(C*x) on every gcc release before 4.2.3.
-  See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */
-
-/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
-   given an appropriate architecture, but the branchless bit-twiddling versions
-   are just as fast, and do not require any special target architecture.
-  Earlier gcc versions (3.x) compiled both code to the same assembly
-   instructions, because of the way they represented ((_b)>(_a)) internally.*/
-#define OC_MAXI(_a,_b)      ((_a)-((_a)-(_b)&-((_b)>(_a))))
-#define OC_MINI(_a,_b)      ((_a)+((_b)-(_a)&-((_b)<(_a))))
-/*Clamps an integer into the given range.
-  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
-   behavior is required to meet our documented API behavior).
-  _a: The lower bound.
-  _b: The value to clamp.
-  _c: The upper boud.*/
-#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
-#define OC_CLAMP255(_x)     ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
-/*This has a chance of compiling branchless, and is just as fast as the
-   bit-twiddling method, which is slightly less portable, since it relies on a
-   sign-extended rightshift, which is not guaranteed by ANSI (but present on
-   every relevant platform).*/
-#define OC_SIGNI(_a)        (((_a)>0)-((_a)<0))
-/*Slightly more portable than relying on a sign-extended right-shift (which is
-   not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
-   compile it into the right-shift anyway.*/
-#define OC_SIGNMASK(_a)     (-((_a)<0))
-/*Divides an integer by a power of two, truncating towards 0.
-  _dividend: The integer to divide.
-  _shift:    The non-negative power of two to divide by.
-  _rmask:    (1<<_shift)-1*/
-#define OC_DIV_POW2(_dividend,_shift,_rmask)\
-  ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift))
-/*Divides _x by 65536, truncating towards 0.*/
-#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
-/*Divides _x by 2, truncating towards 0.*/
-#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
-/*Divides _x by 8, truncating towards 0.*/
-#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
-/*Divides _x by 16, truncating towards 0.*/
-#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
-/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
-   negative dividends first.
-  When _rval is (1<<_shift-1), this is equivalent to division with rounding
-   ties away from zero.*/
-#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
-  ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift))
-/*Divides a _x by 2, rounding towards even numbers.*/
-#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1)
-/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/
-#define OC_DIV_POW2_RE(_x,_shift) \
-  ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift))
-/*Swaps two integers _a and _b if _a>_b.*/
-#define OC_SORT2I(_a,_b) \
-  do{ \
-    int t__; \
-    t__=((_a)^(_b))&-((_b)<(_a)); \
-    (_a)^=t__; \
-    (_b)^=t__; \
-  } \
-  while(0)
-
-/*Accesses one of four (signed) bytes given an index.
-  This can be used to avoid small lookup tables.*/
-#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \
-  ((signed char) \
-   (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8))
-/*Accesses one of eight (unsigned) nibbles given an index.
-  This can be used to avoid small lookup tables.*/
-#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \
-  ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \
-   ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF)
-
-
-
-/*All of these macros should expect floats as arguments.*/
-#define OC_MAXF(_a,_b)      ((_a)<(_b)?(_b):(_a))
-#define OC_MINF(_a,_b)      ((_a)>(_b)?(_b):(_a))
-#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
-#define OC_FABSF(_f)        ((float)fabs(_f))
-#define OC_SQRTF(_f)        ((float)sqrt(_f))
-#define OC_POWF(_b,_e)      ((float)pow(_b,_e))
-#define OC_LOGF(_f)         ((float)log(_f))
-#define OC_IFLOORF(_f)      ((int)floor(_f))
-#define OC_ICEILF(_f)       ((int)ceil(_f))
-
-#endif
diff --git a/media/libtheora/lib/quant.c b/media/libtheora/lib/quant.c
deleted file mode 100644
index c3f3f4771..000000000
--- a/media/libtheora/lib/quant.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: quant.c 17307 2010-06-27 06:02:15Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ogg/ogg.h>
-#include "quant.h"
-#include "decint.h"
-
-/*The maximum output of the DCT with +/- 255 inputs is +/- 8157.
-  These minimum quantizers ensure the result after quantization (and after
-   prediction for DC) will be no more than +/- 510.
-  The tokenization system can handle values up to +/- 580, so there is no need
-   to do any coefficient clamping.
-  I would rather have allowed smaller quantizers and had to clamp, but these
-   minimums were required when constructing the original VP3 matrices and have
-   been formalized in the spec.*/
-static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
-static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
-
-/*Initializes the dequantization tables from a set of quantizer info.
-  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
-   be initialized as pointing to the storage reserved for them in the
-   oc_theora_state (resp. oc_enc_ctx) structure.
-  If some tables are duplicates of others, the pointers will be adjusted to
-   point to a single copy of the tables, but the storage for them will not be
-   freed.
-  If you're concerned about the memory footprint, the obvious thing to do is
-   to move the storage out of its fixed place in the structures and allocate
-   it on demand.
-  However, a much, much better option is to only store the quantization
-   matrices being used for the current frame, and to recalculate these as the
-   qi values change between frames (this is what VP3 did).*/
-void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
- int _pp_dc_scale[64],const th_quant_info *_qinfo){
-  /*Coding mode: intra or inter.*/
-  int          qti;
-  /*Y', C_b, C_r*/
-  int          pli;
-  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
-    /*Quality index.*/
-    int qi;
-    /*Range iterator.*/
-    int qri;
-    for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
-      th_quant_base base;
-      ogg_uint32_t  q;
-      int           qi_start;
-      int           qi_end;
-      memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
-       sizeof(base));
-      qi_start=qi;
-      if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
-      else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
-      /*Iterate over quality indicies in this range.*/
-      for(;;){
-        ogg_uint32_t qfac;
-        int          zzi;
-        int          ci;
-        /*In the original VP3.2 code, the rounding offset and the size of the
-           dead zone around 0 were controlled by a "sharpness" parameter.
-          The size of our dead zone is now controlled by the per-coefficient
-           quality thresholds returned by our HVS module.
-          We round down from a more accurate value when the quality of the
-           reconstruction does not fall below our threshold and it saves bits.
-          Hence, all of that VP3.2 code is gone from here, and the remaining
-           floating point code has been implemented as equivalent integer code
-           with exact precision.*/
-        qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
-        /*For postprocessing, not dequantization.*/
-        if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
-        /*Scale DC the coefficient from the proper table.*/
-        q=(qfac/100)<<2;
-        q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
-        _dequant[qi][pli][qti][0]=(ogg_uint16_t)q;
-        /*Now scale AC coefficients from the proper table.*/
-        for(zzi=1;zzi<64;zzi++){
-          q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2;
-          q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
-          _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q;
-        }
-        /*If this is a duplicate of a previous matrix, use that instead.
-          This simple check helps us improve cache coherency later.*/
-        {
-          int dupe;
-          int qtj;
-          int plj;
-          dupe=0;
-          for(qtj=0;qtj<=qti;qtj++){
-            for(plj=0;plj<(qtj<qti?3:pli);plj++){
-              if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj],
-               sizeof(oc_quant_table))){
-                dupe=1;
-                break;
-              }
-            }
-            if(dupe)break;
-          }
-          if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj];
-        }
-        if(++qi>=qi_end)break;
-        /*Interpolate the next base matrix.*/
-        for(ci=0;ci<64;ci++){
-          base[ci]=(unsigned char)(
-           (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
-           (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
-           +_qinfo->qi_ranges[qti][pli].sizes[qri])/
-           (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
-        }
-      }
-    }
-  }
-}
diff --git a/media/libtheora/lib/quant.h b/media/libtheora/lib/quant.h
deleted file mode 100644
index 49ce13a65..000000000
--- a/media/libtheora/lib/quant.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-#if !defined(_quant_H)
-# define _quant_H (1)
-# include "theora/codec.h"
-# include "ocintrin.h"
-
-typedef ogg_uint16_t   oc_quant_table[64];
-
-
-/*Maximum scaled quantizer value.*/
-#define OC_QUANT_MAX          (1024<<2)
-
-
-void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
- int _pp_dc_scale[64],const th_quant_info *_qinfo);
-
-#endif
diff --git a/media/libtheora/lib/state.c b/media/libtheora/lib/state.c
deleted file mode 100644
index 5e7b0ae65..000000000
--- a/media/libtheora/lib/state.c
+++ /dev/null
@@ -1,1260 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: state.c 17576 2010-10-29 01:07:51Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "state.h"
-#if defined(OC_DUMP_IMAGES)
-# include <stdio.h>
-# include "png.h"
-#endif
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the X and Y directions
-   (4:2:0).
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  int dx;
-  int dy;
-  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
-   +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
-  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
-   +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
-  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the Y direction.
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  int dx;
-  int dy;
-  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
-  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
-  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
-  dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
-  dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
-  _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the X direction (4:2:2).
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  int dx;
-  int dy;
-  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
-  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
-  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
-  dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
-  dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
-  _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with no chroma decimation (4:4:4).
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lmbmv: The luma macro-block level motion vector to fill in for use in
-           prediction.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  _cbmvs[0]=_lbmvs[0];
-  _cbmvs[1]=_lbmvs[1];
-  _cbmvs[2]=_lbmvs[2];
-  _cbmvs[3]=_lbmvs[3];
-}
-
-/*A table of functions used to fill in the chroma plane motion vectors for a
-   macro block when 4 different motion vectors are specified in the luma
-   plane.*/
-const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
-};
-
-
-
-/*Returns the fragment index of the top-left block in a macro block.
-  This can be used to test whether or not the whole macro block is valid.
-  _sb_map: The super block map.
-  _quadi:  The quadrant number.
-  Return: The index of the fragment of the upper left block in the macro
-   block, or -1 if the block lies outside the coded frame.*/
-static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
-  /*It so happens that under the Hilbert curve ordering described below, the
-     upper-left block in each macro block is at index 0, except in macro block
-     3, where it is at index 2.*/
-  return _sb_map[_quadi][_quadi&_quadi<<1];
-}
-
-/*Fills in the mapping from block positions to fragment numbers for a single
-   color plane.
-  This function also fills in the "valid" flag of each quadrant in the super
-   block flags.
-  _sb_maps:  The array of super block maps for the color plane.
-  _sb_flags: The array of super block flags for the color plane.
-  _frag0:    The index of the first fragment in the plane.
-  _hfrags:   The number of horizontal fragments in a coded frame.
-  _vfrags:   The number of vertical fragments in a coded frame.*/
-static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
- oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
-  /*Contains the (macro_block,block) indices for a 4x4 grid of
-     fragments.
-    The pattern is a 4x4 Hilbert space-filling curve.
-    A Hilbert curve has the nice property that as the curve grows larger, its
-     fractal dimension approaches 2.
-    The intuition is that nearby blocks in the curve are also close spatially,
-     with the previous element always an immediate neighbor, so that runs of
-     blocks should be well correlated.*/
-  static const int SB_MAP[4][4][2]={
-    {{0,0},{0,1},{3,2},{3,3}},
-    {{0,3},{0,2},{3,1},{3,0}},
-    {{1,0},{1,3},{2,0},{2,3}},
-    {{1,1},{1,2},{2,1},{2,2}}
-  };
-  ptrdiff_t  yfrag;
-  unsigned   sbi;
-  int        y;
-  sbi=0;
-  yfrag=_frag0;
-  for(y=0;;y+=4){
-    int imax;
-    int x;
-    /*Figure out how many columns of blocks in this super block lie within the
-       image.*/
-    imax=_vfrags-y;
-    if(imax>4)imax=4;
-    else if(imax<=0)break;
-    for(x=0;;x+=4,sbi++){
-      ptrdiff_t xfrag;
-      int       jmax;
-      int       quadi;
-      int       i;
-      /*Figure out how many rows of blocks in this super block lie within the
-         image.*/
-      jmax=_hfrags-x;
-      if(jmax>4)jmax=4;
-      else if(jmax<=0)break;
-      /*By default, set all fragment indices to -1.*/
-      memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
-      /*Fill in the fragment map for this super block.*/
-      xfrag=yfrag+x;
-      for(i=0;i<imax;i++){
-        int j;
-        for(j=0;j<jmax;j++){
-          _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
-        }
-        xfrag+=_hfrags;
-      }
-      /*Mark which quadrants of this super block lie within the image.*/
-      for(quadi=0;quadi<4;quadi++){
-        _sb_flags[sbi].quad_valid|=
-         (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
-      }
-    }
-    yfrag+=_hfrags<<2;
-  }
-}
-
-/*Fills in the Y plane fragment map for a macro block given the fragment
-   coordinates of its upper-left hand corner.
-  _mb_map:    The macro block map to fill.
-  _fplane: The description of the Y plane.
-  _xfrag0: The X location of the upper-left hand fragment in the luma plane.
-  _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
-static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
-  int i;
-  int j;
-  for(i=0;i<2;i++)for(j=0;j<2;j++){
-    _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
-  }
-}
-
-/*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with chroma decimated in the X and Y directions
-   (4:2:0).
-  _mb_map:  The macro block map to fill.
-  _fplanes: The descriptions of the fragment planes.
-  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
-  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
-static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
-  ptrdiff_t fragi;
-  _xfrag0>>=1;
-  _yfrag0>>=1;
-  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
-  _mb_map[1][0]=fragi+_fplanes[1].froffset;
-  _mb_map[2][0]=fragi+_fplanes[2].froffset;
-}
-
-/*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with chroma decimated in the Y direction.
-  _mb_map:  The macro block map to fill.
-  _fplanes: The descriptions of the fragment planes.
-  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
-  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
-static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
-  ptrdiff_t fragi;
-  int       j;
-  _yfrag0>>=1;
-  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
-  for(j=0;j<2;j++){
-    _mb_map[1][j]=fragi+_fplanes[1].froffset;
-    _mb_map[2][j]=fragi+_fplanes[2].froffset;
-    fragi++;
-  }
-}
-
-/*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with chroma decimated in the X direction (4:2:2).
-  _mb_map:  The macro block map to fill.
-  _fplanes: The descriptions of the fragment planes.
-  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
-  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
-static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
-  ptrdiff_t fragi;
-  int       i;
-  _xfrag0>>=1;
-  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
-  for(i=0;i<2;i++){
-    _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
-    _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
-    fragi+=_fplanes[1].nhfrags;
-  }
-}
-
-/*Fills in the chroma plane fragment maps for a macro block.
-  This version is for use with no chroma decimation (4:4:4).
-  This uses the already filled-in luma plane values.
-  _mb_map:  The macro block map to fill.
-  _fplanes: The descriptions of the fragment planes.*/
-static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane _fplanes[3]){
-  int k;
-  for(k=0;k<4;k++){
-    _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
-    _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
-  }
-}
-
-/*The function type used to fill in the chroma plane fragment maps for a
-   macro block.
-  _mb_map:  The macro block map to fill.
-  _fplanes: The descriptions of the fragment planes.
-  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
-  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
-typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
- const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
-
-/*A table of functions used to fill in the chroma plane fragment maps for a
-   macro block for each type of chrominance decimation.*/
-static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
-  oc_mb_fill_cmapping00,
-  oc_mb_fill_cmapping01,
-  oc_mb_fill_cmapping10,
-  (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
-};
-
-/*Fills in the mapping from macro blocks to their corresponding fragment
-   numbers in each plane.
-  _mb_maps:   The list of macro block maps.
-  _mb_modes:  The list of macro block modes; macro blocks completely outside
-               the coded region are marked invalid.
-  _fplanes:   The descriptions of the fragment planes.
-  _pixel_fmt: The chroma decimation type.*/
-static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
- signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
-  oc_mb_fill_cmapping_func  mb_fill_cmapping;
-  unsigned                  sbi;
-  int                       y;
-  mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
-  /*Loop through the luma plane super blocks.*/
-  for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
-    int x;
-    for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
-      int ymb;
-      /*Loop through the macro blocks in each super block in display order.*/
-      for(ymb=0;ymb<2;ymb++){
-        int xmb;
-        for(xmb=0;xmb<2;xmb++){
-          unsigned mbi;
-          int      mbx;
-          int      mby;
-          mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
-          mbx=x|xmb<<1;
-          mby=y|ymb<<1;
-          /*Initialize fragment indices to -1.*/
-          memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
-          /*Make sure this macro block is within the encoded region.*/
-          if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
-            _mb_modes[mbi]=OC_MODE_INVALID;
-            continue;
-          }
-          /*Fill in the fragment indices for the luma plane.*/
-          oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
-          /*Fill in the fragment indices for the chroma planes.*/
-          (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
-        }
-      }
-    }
-  }
-}
-
-/*Marks the fragments which fall all or partially outside the displayable
-   region of the frame.
-  _state: The Theora state containing the fragments to be marked.*/
-static void oc_state_border_init(oc_theora_state *_state){
-  oc_fragment       *frag;
-  oc_fragment       *yfrag_end;
-  oc_fragment       *xfrag_end;
-  oc_fragment_plane *fplane;
-  int                crop_x0;
-  int                crop_y0;
-  int                crop_xf;
-  int                crop_yf;
-  int                pli;
-  int                y;
-  int                x;
-  /*The method we use here is slow, but the code is dead simple and handles
-     all the special cases easily.
-    We only ever need to do it once.*/
-  /*Loop through the fragments, marking those completely outside the
-     displayable region and constructing a border mask for those that straddle
-     the border.*/
-  _state->nborders=0;
-  yfrag_end=frag=_state->frags;
-  for(pli=0;pli<3;pli++){
-    fplane=_state->fplanes+pli;
-    /*Set up the cropping rectangle for this plane.*/
-    crop_x0=_state->info.pic_x;
-    crop_xf=_state->info.pic_x+_state->info.pic_width;
-    crop_y0=_state->info.pic_y;
-    crop_yf=_state->info.pic_y+_state->info.pic_height;
-    if(pli>0){
-      if(!(_state->info.pixel_fmt&1)){
-        crop_x0=crop_x0>>1;
-        crop_xf=crop_xf+1>>1;
-      }
-      if(!(_state->info.pixel_fmt&2)){
-        crop_y0=crop_y0>>1;
-        crop_yf=crop_yf+1>>1;
-      }
-    }
-    y=0;
-    for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
-      x=0;
-      for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
-        /*First check to see if this fragment is completely outside the
-           displayable region.*/
-        /*Note the special checks for an empty cropping rectangle.
-          This guarantees that if we count a fragment as straddling the
-           border below, at least one pixel in the fragment will be inside
-           the displayable region.*/
-        if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
-         crop_x0>=crop_xf||crop_y0>=crop_yf){
-          frag->invalid=1;
-        }
-        /*Otherwise, check to see if it straddles the border.*/
-        else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
-         y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
-          ogg_int64_t mask;
-          int         npixels;
-          int         i;
-          mask=npixels=0;
-          for(i=0;i<8;i++){
-            int j;
-            for(j=0;j<8;j++){
-              if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
-                mask|=(ogg_int64_t)1<<(i<<3|j);
-                npixels++;
-              }
-            }
-          }
-          /*Search the fragment array for border info with the same pattern.
-            In general, there will be at most 8 different patterns (per
-             plane).*/
-          for(i=0;;i++){
-            if(i>=_state->nborders){
-              _state->nborders++;
-              _state->borders[i].mask=mask;
-              _state->borders[i].npixels=npixels;
-            }
-            else if(_state->borders[i].mask!=mask)continue;
-            frag->borderi=i;
-            break;
-          }
-        }
-        else frag->borderi=-1;
-      }
-    }
-  }
-}
-
-static int oc_state_frarray_init(oc_theora_state *_state){
-  int       yhfrags;
-  int       yvfrags;
-  int       chfrags;
-  int       cvfrags;
-  ptrdiff_t yfrags;
-  ptrdiff_t cfrags;
-  ptrdiff_t nfrags;
-  unsigned  yhsbs;
-  unsigned  yvsbs;
-  unsigned  chsbs;
-  unsigned  cvsbs;
-  unsigned  ysbs;
-  unsigned  csbs;
-  unsigned  nsbs;
-  size_t    nmbs;
-  int       hdec;
-  int       vdec;
-  int       pli;
-  /*Figure out the number of fragments in each plane.*/
-  /*These parameters have already been validated to be multiples of 16.*/
-  yhfrags=_state->info.frame_width>>3;
-  yvfrags=_state->info.frame_height>>3;
-  hdec=!(_state->info.pixel_fmt&1);
-  vdec=!(_state->info.pixel_fmt&2);
-  chfrags=yhfrags+hdec>>hdec;
-  cvfrags=yvfrags+vdec>>vdec;
-  yfrags=yhfrags*(ptrdiff_t)yvfrags;
-  cfrags=chfrags*(ptrdiff_t)cvfrags;
-  nfrags=yfrags+2*cfrags;
-  /*Figure out the number of super blocks in each plane.*/
-  yhsbs=yhfrags+3>>2;
-  yvsbs=yvfrags+3>>2;
-  chsbs=chfrags+3>>2;
-  cvsbs=cvfrags+3>>2;
-  ysbs=yhsbs*yvsbs;
-  csbs=chsbs*cvsbs;
-  nsbs=ysbs+2*csbs;
-  nmbs=(size_t)ysbs<<2;
-  /*Check for overflow.
-    We support the ridiculous upper limits of the specification (1048560 by
-     1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
-     but for those with 32-bit pointers (or smaller!) we have to check.
-    If the caller wants to prevent denial-of-service by imposing a more
-     reasonable upper limit on the size of attempted allocations, they must do
-     so themselves; we have no platform independent way to determine how much
-     system memory there is nor an application-independent way to decide what a
-     "reasonable" allocation is.*/
-  if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
-   ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
-    return TH_EIMPL;
-  }
-  /*Initialize the fragment array.*/
-  _state->fplanes[0].nhfrags=yhfrags;
-  _state->fplanes[0].nvfrags=yvfrags;
-  _state->fplanes[0].froffset=0;
-  _state->fplanes[0].nfrags=yfrags;
-  _state->fplanes[0].nhsbs=yhsbs;
-  _state->fplanes[0].nvsbs=yvsbs;
-  _state->fplanes[0].sboffset=0;
-  _state->fplanes[0].nsbs=ysbs;
-  _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
-  _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
-  _state->fplanes[1].froffset=yfrags;
-  _state->fplanes[2].froffset=yfrags+cfrags;
-  _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
-  _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
-  _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
-  _state->fplanes[1].sboffset=ysbs;
-  _state->fplanes[2].sboffset=ysbs+csbs;
-  _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
-  _state->nfrags=nfrags;
-  _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
-  _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
-  _state->nsbs=nsbs;
-  _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
-  _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
-  _state->nhmbs=yhsbs<<1;
-  _state->nvmbs=yvsbs<<1;
-  _state->nmbs=nmbs;
-  _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
-  _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
-  _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
-  if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
-   _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
-   _state->coded_fragis==NULL){
-    return TH_EFAULT;
-  }
-  /*Create the mapping from super blocks to fragments.*/
-  for(pli=0;pli<3;pli++){
-    oc_fragment_plane *fplane;
-    fplane=_state->fplanes+pli;
-    oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
-     _state->sb_flags+fplane->sboffset,fplane->froffset,
-     fplane->nhfrags,fplane->nvfrags);
-  }
-  /*Create the mapping from macro blocks to fragments.*/
-  oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
-   _state->fplanes,_state->info.pixel_fmt);
-  /*Initialize the invalid and borderi fields of each fragment.*/
-  oc_state_border_init(_state);
-  return 0;
-}
-
-static void oc_state_frarray_clear(oc_theora_state *_state){
-  _ogg_free(_state->coded_fragis);
-  _ogg_free(_state->mb_modes);
-  _ogg_free(_state->mb_maps);
-  _ogg_free(_state->sb_flags);
-  _ogg_free(_state->sb_maps);
-  _ogg_free(_state->frag_mvs);
-  _ogg_free(_state->frags);
-}
-
-
-/*Initializes the buffers used for reconstructed frames.
-  These buffers are padded with 16 extra pixels on each side, to allow
-   unrestricted motion vectors without special casing the boundary.
-  If chroma is decimated in either direction, the padding is reduced by a
-   factor of 2 on the appropriate sides.
-  _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
-static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
-  th_info       *info;
-  unsigned char *ref_frame_data;
-  size_t         ref_frame_data_sz;
-  size_t         ref_frame_sz;
-  size_t         yplane_sz;
-  size_t         cplane_sz;
-  int            yhstride;
-  int            yheight;
-  int            chstride;
-  int            cheight;
-  ptrdiff_t      align;
-  ptrdiff_t      yoffset;
-  ptrdiff_t      coffset;
-  ptrdiff_t     *frag_buf_offs;
-  ptrdiff_t      fragi;
-  int            hdec;
-  int            vdec;
-  int            rfi;
-  int            pli;
-  if(_nrefs<3||_nrefs>6)return TH_EINVAL;
-  info=&_state->info;
-  /*Compute the image buffer parameters for each plane.*/
-  hdec=!(info->pixel_fmt&1);
-  vdec=!(info->pixel_fmt&2);
-  yhstride=info->frame_width+2*OC_UMV_PADDING;
-  yheight=info->frame_height+2*OC_UMV_PADDING;
-  /*Require 16-byte aligned rows in the chroma planes.*/
-  chstride=(yhstride>>hdec)+15&~15;
-  cheight=yheight>>vdec;
-  yplane_sz=yhstride*(size_t)yheight;
-  cplane_sz=chstride*(size_t)cheight;
-  yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
-  coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
-  /*Although we guarantee the rows of the chroma planes are a multiple of 16
-     bytes, the initial padding on the first row may only be 8 bytes.
-    Compute the offset needed to the actual image data to a multiple of 16.*/
-  align=-coffset&15;
-  ref_frame_sz=yplane_sz+2*cplane_sz+16;
-  ref_frame_data_sz=_nrefs*ref_frame_sz;
-  /*Check for overflow.
-    The same caveats apply as for oc_state_frarray_init().*/
-  if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
-   ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
-    return TH_EIMPL;
-  }
-  ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
-  frag_buf_offs=_state->frag_buf_offs=
-   _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
-  if(ref_frame_data==NULL||frag_buf_offs==NULL){
-    _ogg_free(frag_buf_offs);
-    oc_aligned_free(ref_frame_data);
-    return TH_EFAULT;
-  }
-  /*Set up the width, height and stride for the image buffers.*/
-  _state->ref_frame_bufs[0][0].width=info->frame_width;
-  _state->ref_frame_bufs[0][0].height=info->frame_height;
-  _state->ref_frame_bufs[0][0].stride=yhstride;
-  _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
-   info->frame_width>>hdec;
-  _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
-   info->frame_height>>vdec;
-  _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
-   chstride;
-  for(rfi=1;rfi<_nrefs;rfi++){
-    memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
-     sizeof(_state->ref_frame_bufs[0]));
-  }
-  _state->ref_frame_handle=ref_frame_data;
-  /*Set up the data pointers for the image buffers.*/
-  for(rfi=0;rfi<_nrefs;rfi++){
-    _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
-    ref_frame_data+=yplane_sz+align;
-    _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
-    ref_frame_data+=cplane_sz;
-    _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
-    ref_frame_data+=cplane_sz+(16-align);
-    /*Flip the buffer upside down.
-      This allows us to decode Theora's bottom-up frames in their natural
-       order, yet return a top-down buffer with a positive stride to the user.*/
-    oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
-     _state->ref_frame_bufs[rfi]);
-  }
-  _state->ref_ystride[0]=-yhstride;
-  _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
-  /*Initialize the fragment buffer offsets.*/
-  ref_frame_data=_state->ref_frame_bufs[0][0].data;
-  fragi=0;
-  for(pli=0;pli<3;pli++){
-    th_img_plane      *iplane;
-    oc_fragment_plane *fplane;
-    unsigned char     *vpix;
-    ptrdiff_t          stride;
-    ptrdiff_t          vfragi_end;
-    int                nhfrags;
-    iplane=_state->ref_frame_bufs[0]+pli;
-    fplane=_state->fplanes+pli;
-    vpix=iplane->data;
-    vfragi_end=fplane->froffset+fplane->nfrags;
-    nhfrags=fplane->nhfrags;
-    stride=iplane->stride;
-    while(fragi<vfragi_end){
-      ptrdiff_t      hfragi_end;
-      unsigned char *hpix;
-      hpix=vpix;
-      for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
-        frag_buf_offs[fragi]=hpix-ref_frame_data;
-        hpix+=8;
-      }
-      vpix+=stride<<3;
-    }
-  }
-  /*Initialize the reference frame pointers and indices.*/
-  _state->ref_frame_idx[OC_FRAME_GOLD]=
-   _state->ref_frame_idx[OC_FRAME_PREV]=
-   _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
-   _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
-   _state->ref_frame_idx[OC_FRAME_SELF]=
-   _state->ref_frame_idx[OC_FRAME_IO]=-1;
-  _state->ref_frame_data[OC_FRAME_GOLD]=
-   _state->ref_frame_data[OC_FRAME_PREV]=
-   _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
-   _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
-   _state->ref_frame_data[OC_FRAME_SELF]=
-   _state->ref_frame_data[OC_FRAME_IO]=NULL;
-  return 0;
-}
-
-static void oc_state_ref_bufs_clear(oc_theora_state *_state){
-  _ogg_free(_state->frag_buf_offs);
-  oc_aligned_free(_state->ref_frame_handle);
-}
-
-
-void oc_state_accel_init_c(oc_theora_state *_state){
-  _state->cpu_flags=0;
-#if defined(OC_STATE_USE_VTABLE)
-  _state->opt_vtable.frag_copy=oc_frag_copy_c;
-  _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
-  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
-  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
-  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
-  _state->opt_vtable.idct8x8=oc_idct8x8_c;
-  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
-  _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
-  _state->opt_vtable.state_loop_filter_frag_rows=
-   oc_state_loop_filter_frag_rows_c;
-  _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
-#endif
-  _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
-}
-
-
-int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
-  int ret;
-  /*First validate the parameters.*/
-  if(_info==NULL)return TH_EFAULT;
-  /*The width and height of the encoded frame must be multiples of 16.
-    They must also, when divided by 16, fit into a 16-bit unsigned integer.
-    The displayable frame offset coordinates must fit into an 8-bit unsigned
-     integer.
-    Note that the offset Y in the API is specified on the opposite side from
-     how it is specified in the bitstream, because the Y axis is flipped in
-     the bitstream.
-    The displayable frame must fit inside the encoded frame.
-    The color space must be one known by the encoder.*/
-  if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
-   _info->frame_width<=0||_info->frame_width>=0x100000||
-   _info->frame_height<=0||_info->frame_height>=0x100000||
-   _info->pic_x+_info->pic_width>_info->frame_width||
-   _info->pic_y+_info->pic_height>_info->frame_height||
-   _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
-   /*Note: the following <0 comparisons may generate spurious warnings on
-      platforms where enums are unsigned.
-     We could cast them to unsigned and just use the following >= comparison,
-      but there are a number of compilers which will mis-optimize this.
-     It's better to live with the spurious warnings.*/
-   _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
-   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
-    return TH_EINVAL;
-  }
-  memset(_state,0,sizeof(*_state));
-  memcpy(&_state->info,_info,sizeof(*_info));
-  /*Invert the sense of pic_y to match Theora's right-handed coordinate
-     system.*/
-  _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
-  _state->frame_type=OC_UNKWN_FRAME;
-  oc_state_accel_init(_state);
-  ret=oc_state_frarray_init(_state);
-  if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
-  if(ret<0){
-    oc_state_frarray_clear(_state);
-    return ret;
-  }
-  /*If the keyframe_granule_shift is out of range, use the maximum allowable
-     value.*/
-  if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
-    _state->info.keyframe_granule_shift=31;
-  }
-  _state->keyframe_num=0;
-  _state->curframe_num=-1;
-  /*3.2.0 streams mark the frame index instead of the frame count.
-    This was changed with stream version 3.2.1 to conform to other Ogg
-     codecs.
-    We add an extra bias when computing granule positions for new streams.*/
-  _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
-  return 0;
-}
-
-void oc_state_clear(oc_theora_state *_state){
-  oc_state_ref_bufs_clear(_state);
-  oc_state_frarray_clear(_state);
-}
-
-
-/*Duplicates the pixels on the border of the image plane out into the
-   surrounding padding for use by unrestricted motion vectors.
-  This function only adds the left and right borders, and only for the fragment
-   rows specified.
-  _refi: The index of the reference buffer to pad.
-  _pli:  The color plane.
-  _y0:   The Y coordinate of the first row to pad.
-  _yend: The Y coordinate of the row to stop padding at.*/
-void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
- int _y0,int _yend){
-  th_img_plane  *iplane;
-  unsigned char *apix;
-  unsigned char *bpix;
-  unsigned char *epix;
-  int            stride;
-  int            hpadding;
-  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
-  iplane=_state->ref_frame_bufs[_refi]+_pli;
-  stride=iplane->stride;
-  apix=iplane->data+_y0*(ptrdiff_t)stride;
-  bpix=apix+iplane->width-1;
-  epix=iplane->data+_yend*(ptrdiff_t)stride;
-  /*Note the use of != instead of <, which allows the stride to be negative.*/
-  while(apix!=epix){
-    memset(apix-hpadding,apix[0],hpadding);
-    memset(bpix+1,bpix[0],hpadding);
-    apix+=stride;
-    bpix+=stride;
-  }
-}
-
-/*Duplicates the pixels on the border of the image plane out into the
-   surrounding padding for use by unrestricted motion vectors.
-  This function only adds the top and bottom borders, and must be called after
-   the left and right borders are added.
-  _refi:      The index of the reference buffer to pad.
-  _pli:       The color plane.*/
-void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
-  th_img_plane  *iplane;
-  unsigned char *apix;
-  unsigned char *bpix;
-  unsigned char *epix;
-  int            stride;
-  int            hpadding;
-  int            vpadding;
-  int            fullw;
-  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
-  vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
-  iplane=_state->ref_frame_bufs[_refi]+_pli;
-  stride=iplane->stride;
-  fullw=iplane->width+(hpadding<<1);
-  apix=iplane->data-hpadding;
-  bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
-  epix=apix-stride*(ptrdiff_t)vpadding;
-  while(apix!=epix){
-    memcpy(apix-stride,apix,fullw);
-    memcpy(bpix+stride,bpix,fullw);
-    apix-=stride;
-    bpix+=stride;
-  }
-}
-
-/*Duplicates the pixels on the border of the given reference image out into
-   the surrounding padding for use by unrestricted motion vectors.
-  _state: The context containing the reference buffers.
-  _refi:  The index of the reference buffer to pad.*/
-void oc_state_borders_fill(oc_theora_state *_state,int _refi){
-  int pli;
-  for(pli=0;pli<3;pli++){
-    oc_state_borders_fill_rows(_state,_refi,pli,0,
-     _state->ref_frame_bufs[_refi][pli].height);
-    oc_state_borders_fill_caps(_state,_refi,pli);
-  }
-}
-
-/*Determines the offsets in an image buffer to use for motion compensation.
-  _state:   The Theora state the offsets are to be computed with.
-  _offsets: Returns the offset for the buffer(s).
-            _offsets[0] is always set.
-            _offsets[1] is set if the motion vector has non-zero fractional
-             components.
-  _pli:     The color plane index.
-  _mv:      The motion vector.
-  Return: The number of offsets returned: 1 or 2.*/
-int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
- int _pli,oc_mv _mv){
-  /*Here is a brief description of how Theora handles motion vectors:
-    Motion vector components are specified to half-pixel accuracy in
-     undecimated directions of each plane, and quarter-pixel accuracy in
-     decimated directions.
-    Integer parts are extracted by dividing (not shifting) by the
-     appropriate amount, with truncation towards zero.
-    These integer values are used to calculate the first offset.
-
-    If either of the fractional parts are non-zero, then a second offset is
-     computed.
-    No third or fourth offsets are computed, even if both components have
-     non-zero fractional parts.
-    The second offset is computed by dividing (not shifting) by the
-     appropriate amount, always truncating _away_ from zero.*/
-#if 0
-  /*This version of the code doesn't use any tables, but is slower.*/
-  int ystride;
-  int xprec;
-  int yprec;
-  int xfrac;
-  int yfrac;
-  int offs;
-  int dx;
-  int dy;
-  ystride=_state->ref_ystride[_pli];
-  /*These two variables decide whether we are in half- or quarter-pixel
-     precision in each component.*/
-  xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
-  yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
-  dx=OC_MV_X(_mv);
-  dy=OC_MV_Y(_mv);
-  /*These two variables are either 0 if all the fractional bits are zero or -1
-     if any of them are non-zero.*/
-  xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
-  yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
-  offs=(dx>>xprec)+(dy>>yprec)*ystride;
-  if(xfrac||yfrac){
-    int xmask;
-    int ymask;
-    xmask=OC_SIGNMASK(dx);
-    ymask=OC_SIGNMASK(dy);
-    yfrac&=ystride;
-    _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
-    _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
-    return 2;
-  }
-  else{
-    _offsets[0]=offs;
-    return 1;
-  }
-#else
-  /*Using tables simplifies the code, and there's enough arithmetic to hide the
-     latencies of the memory references.*/
-  static const signed char OC_MVMAP[2][64]={
-    {
-          -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
-       -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1,  0,
-        0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
-        8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
-    },
-    {
-           -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
-       -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,  0,  0,  0,
-        0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
-        4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7
-    }
-  };
-  static const signed char OC_MVMAP2[2][64]={
-    {
-        -1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
-      0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
-      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,
-      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1
-    },
-    {
-        -1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
-      0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
-      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,
-      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1
-    }
-  };
-  int ystride;
-  int qpx;
-  int qpy;
-  int mx;
-  int my;
-  int mx2;
-  int my2;
-  int offs;
-  int dx;
-  int dy;
-  ystride=_state->ref_ystride[_pli];
-  qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
-  dx=OC_MV_X(_mv);
-  dy=OC_MV_Y(_mv);
-  my=OC_MVMAP[qpy][dy+31];
-  my2=OC_MVMAP2[qpy][dy+31];
-  qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
-  mx=OC_MVMAP[qpx][dx+31];
-  mx2=OC_MVMAP2[qpx][dx+31];
-  offs=my*ystride+mx;
-  if(mx2||my2){
-    _offsets[1]=offs+my2*ystride+mx2;
-    _offsets[0]=offs;
-    return 2;
-  }
-  _offsets[0]=offs;
-  return 1;
-#endif
-}
-
-void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
-  unsigned char *dst;
-  ptrdiff_t      frag_buf_off;
-  int            ystride;
-  int            refi;
-  /*Apply the inverse transform.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    ogg_int16_t p;
-    int         ci;
-    /*We round this dequant product (and not any of the others) because there's
-       no iDCT rounding.*/
-    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
-    /*LOOP VECTORIZES.*/
-    for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
-  }
-  else{
-    /*First, dequantize the DC coefficient.*/
-    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
-  }
-  /*Fill in the target buffer.*/
-  frag_buf_off=_state->frag_buf_offs[_fragi];
-  refi=_state->frags[_fragi].refi;
-  ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
-  if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
-  else{
-    const unsigned char *ref;
-    int                  mvoffsets[2];
-    ref=_state->ref_frame_data[refi]+frag_buf_off;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi])>1){
-      oc_frag_recon_inter2(_state,
-       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
-    }
-    else{
-      oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
-    }
-  }
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
-  int y;
-  _pix-=2;
-  for(y=0;y<8;y++){
-    int f;
-    f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
-    /*The _bv array is used to compute the function
-      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
-      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
-    f=*(_bv+(f+4>>3));
-    _pix[1]=OC_CLAMP255(_pix[1]+f);
-    _pix[2]=OC_CLAMP255(_pix[2]-f);
-    _pix+=_ystride;
-  }
-}
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
-  int x;
-  _pix-=_ystride*2;
-  for(x=0;x<8;x++){
-    int f;
-    f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
-    /*The _bv array is used to compute the function
-      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
-      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
-    f=*(_bv+(f+4>>3));
-    _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
-    _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
-  }
-}
-
-/*Initialize the bounding values array used by the loop filter.
-  _bv: Storage for the array.
-  _flimit: The filter limit as defined in Section 7.10 of the spec.*/
-void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
-  int i;
-  memset(_bv,0,sizeof(_bv[0])*256);
-  for(i=0;i<_flimit;i++){
-    if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
-    _bv[127-i]=(signed char)(-i);
-    _bv[127+i]=(signed char)(i);
-    if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
-  }
-}
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
-  The filter may be run on the bottom edge, affecting pixels in the next row of
-   fragments, so this row also needs to be available.
-  _bv:        The bounding values array.
-  _refi:      The index of the frame buffer to filter.
-  _pli:       The color plane to filter.
-  _fragy0:    The Y coordinate of the first fragment row to filter.
-  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
- signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
-  const oc_fragment_plane *fplane;
-  const oc_fragment       *frags;
-  const ptrdiff_t         *frag_buf_offs;
-  unsigned char           *ref_frame_data;
-  ptrdiff_t                fragi_top;
-  ptrdiff_t                fragi_bot;
-  ptrdiff_t                fragi0;
-  ptrdiff_t                fragi0_end;
-  int                      ystride;
-  int                      nhfrags;
-  _bv+=127;
-  fplane=_state->fplanes+_pli;
-  nhfrags=fplane->nhfrags;
-  fragi_top=fplane->froffset;
-  fragi_bot=fragi_top+fplane->nfrags;
-  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
-  fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
-  ystride=_state->ref_ystride[_pli];
-  frags=_state->frags;
-  frag_buf_offs=_state->frag_buf_offs;
-  ref_frame_data=_state->ref_frame_data[_refi];
-  /*The following loops are constructed somewhat non-intuitively on purpose.
-    The main idea is: if a block boundary has at least one coded fragment on
-     it, the filter is applied to it.
-    However, the order that the filters are applied in matters, and VP3 chose
-     the somewhat strange ordering used below.*/
-  while(fragi0<fragi0_end){
-    ptrdiff_t fragi;
-    ptrdiff_t fragi_end;
-    fragi=fragi0;
-    fragi_end=fragi+nhfrags;
-    while(fragi<fragi_end){
-      if(frags[fragi].coded){
-        unsigned char *ref;
-        ref=ref_frame_data+frag_buf_offs[fragi];
-        if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
-        if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
-        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
-          loop_filter_h(ref+8,ystride,_bv);
-        }
-        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
-          loop_filter_v(ref+(ystride<<3),ystride,_bv);
-        }
-      }
-      fragi++;
-    }
-    fragi0+=nhfrags;
-  }
-}
-
-#if defined(OC_DUMP_IMAGES)
-int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
- const char *_suf){
-  /*Dump a PNG of the reconstructed image.*/
-  png_structp    png;
-  png_infop      info;
-  png_bytep     *image;
-  FILE          *fp;
-  char           fname[16];
-  unsigned char *y_row;
-  unsigned char *u_row;
-  unsigned char *v_row;
-  unsigned char *y;
-  unsigned char *u;
-  unsigned char *v;
-  ogg_int64_t    iframe;
-  ogg_int64_t    pframe;
-  int            y_stride;
-  int            u_stride;
-  int            v_stride;
-  int            framei;
-  int            width;
-  int            height;
-  int            imgi;
-  int            imgj;
-  width=_state->info.frame_width;
-  height=_state->info.frame_height;
-  iframe=_state->granpos>>_state->info.keyframe_granule_shift;
-  pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
-  sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
-  fp=fopen(fname,"wb");
-  if(fp==NULL)return TH_EFAULT;
-  image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
-  if(image==NULL){
-    fclose(fp);
-    return TH_EFAULT;
-  }
-  png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
-  if(png==NULL){
-    oc_free_2d(image);
-    fclose(fp);
-    return TH_EFAULT;
-  }
-  info=png_create_info_struct(png);
-  if(info==NULL){
-    png_destroy_write_struct(&png,NULL);
-    oc_free_2d(image);
-    fclose(fp);
-    return TH_EFAULT;
-  }
-  if(setjmp(png_jmpbuf(png))){
-    png_destroy_write_struct(&png,&info);
-    oc_free_2d(image);
-    fclose(fp);
-    return TH_EFAULT;
-  }
-  framei=_state->ref_frame_idx[_frame];
-  y_row=_state->ref_frame_bufs[framei][0].data;
-  u_row=_state->ref_frame_bufs[framei][1].data;
-  v_row=_state->ref_frame_bufs[framei][2].data;
-  y_stride=_state->ref_frame_bufs[framei][0].stride;
-  u_stride=_state->ref_frame_bufs[framei][1].stride;
-  v_stride=_state->ref_frame_bufs[framei][2].stride;
-  /*Chroma up-sampling is just done with a box filter.
-    This is very likely what will actually be used in practice on a real
-     display, and also removes one more layer to search in for the source of
-     artifacts.
-    As an added bonus, it's dead simple.*/
-  for(imgi=height;imgi-->0;){
-    int dc;
-    y=y_row;
-    u=u_row;
-    v=v_row;
-    for(imgj=0;imgj<6*width;){
-      float    yval;
-      float    uval;
-      float    vval;
-      unsigned rval;
-      unsigned gval;
-      unsigned bval;
-      /*This is intentionally slow and very accurate.*/
-      yval=(*y-16)*(1.0F/219);
-      uval=(*u-128)*(2*(1-0.114F)/224);
-      vval=(*v-128)*(2*(1-0.299F)/224);
-      rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
-      gval=OC_CLAMPI(0,(int)(65535*(
-       yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
-      bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
-      image[imgi][imgj++]=(unsigned char)(rval>>8);
-      image[imgi][imgj++]=(unsigned char)(rval&0xFF);
-      image[imgi][imgj++]=(unsigned char)(gval>>8);
-      image[imgi][imgj++]=(unsigned char)(gval&0xFF);
-      image[imgi][imgj++]=(unsigned char)(bval>>8);
-      image[imgi][imgj++]=(unsigned char)(bval&0xFF);
-      dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
-      y++;
-      u+=dc;
-      v+=dc;
-    }
-    dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
-    y_row+=y_stride;
-    u_row+=dc&u_stride;
-    v_row+=dc&v_stride;
-  }
-  png_init_io(png,fp);
-  png_set_compression_level(png,Z_BEST_COMPRESSION);
-  png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
-   PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
-  switch(_state->info.colorspace){
-    case TH_CS_ITU_REC_470M:{
-      png_set_gAMA(png,info,2.2);
-      png_set_cHRM_fixed(png,info,31006,31616,
-       67000,32000,21000,71000,14000,8000);
-    }break;
-    case TH_CS_ITU_REC_470BG:{
-      png_set_gAMA(png,info,2.67);
-      png_set_cHRM_fixed(png,info,31271,32902,
-       64000,33000,29000,60000,15000,6000);
-    }break;
-    default:break;
-  }
-  png_set_pHYs(png,info,_state->info.aspect_numerator,
-   _state->info.aspect_denominator,0);
-  png_set_rows(png,info,image);
-  png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
-  png_write_end(png,info);
-  png_destroy_write_struct(&png,&info);
-  oc_free_2d(image);
-  fclose(fp);
-  return 0;
-}
-#endif
-
-
-
-ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
-  oc_theora_state *state;
-  state=(oc_theora_state *)_encdec;
-  if(_granpos>=0){
-    ogg_int64_t iframe;
-    ogg_int64_t pframe;
-    iframe=_granpos>>state->info.keyframe_granule_shift;
-    pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
-    /*3.2.0 streams store the frame index in the granule position.
-      3.2.1 and later store the frame count.
-      We return the index, so adjust the value if we have a 3.2.1 or later
-       stream.*/
-    return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
-  }
-  return -1;
-}
-
-double th_granule_time(void *_encdec,ogg_int64_t _granpos){
-  oc_theora_state *state;
-  state=(oc_theora_state *)_encdec;
-  if(_granpos>=0){
-    return (th_granule_frame(_encdec, _granpos)+1)*(
-     (double)state->info.fps_denominator/state->info.fps_numerator);
-  }
-  return -1;
-}
diff --git a/media/libtheora/lib/state.h b/media/libtheora/lib/state.h
deleted file mode 100644
index f176a53ce..000000000
--- a/media/libtheora/lib/state.h
+++ /dev/null
@@ -1,552 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: internal.h 17337 2010-07-19 16:08:54Z tterribe $
-
- ********************************************************************/
-#if !defined(_state_H)
-# define _state_H (1)
-# include "internal.h"
-# include "huffman.h"
-# include "quant.h"
-
-
-
-/*A single quadrant of the map from a super block to fragment numbers.*/
-typedef ptrdiff_t       oc_sb_map_quad[4];
-/*A map from a super block to fragment numbers.*/
-typedef oc_sb_map_quad  oc_sb_map[4];
-/*A single plane of the map from a macro block to fragment numbers.*/
-typedef ptrdiff_t       oc_mb_map_plane[4];
-/*A map from a macro block to fragment numbers.*/
-typedef oc_mb_map_plane oc_mb_map[3];
-/*A motion vector.*/
-typedef ogg_int16_t     oc_mv;
-
-typedef struct oc_sb_flags              oc_sb_flags;
-typedef struct oc_border_info           oc_border_info;
-typedef struct oc_fragment              oc_fragment;
-typedef struct oc_fragment_plane        oc_fragment_plane;
-typedef struct oc_base_opt_vtable       oc_base_opt_vtable;
-typedef struct oc_base_opt_data         oc_base_opt_data;
-typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
-typedef struct oc_theora_state          oc_theora_state;
-
-
-
-/*Shared accelerated functions.*/
-# if defined(OC_X86_ASM)
-#  if defined(_MSC_VER)
-#   include "x86_vc/x86int.h"
-#  else
-#   include "x86/x86int.h"
-#  endif
-# endif
-# if defined(OC_ARM_ASM)
-#  include "arm/armint.h"
-# endif
-# if defined(OC_C64X_ASM)
-#  include "c64x/c64xint.h"
-# endif
-
-# if !defined(oc_state_accel_init)
-#  define oc_state_accel_init oc_state_accel_init_c
-# endif
-# if defined(OC_STATE_USE_VTABLE)
-#  if !defined(oc_frag_copy)
-#   define oc_frag_copy(_state,_dst,_src,_ystride) \
-  ((*(_state)->opt_vtable.frag_copy)(_dst,_src,_ystride))
-#  endif
-#  if !defined(oc_frag_copy_list)
-#   define oc_frag_copy_list(_state,_dst_frame,_src_frame,_ystride, \
- _fragis,_nfragis,_frag_buf_offs) \
- ((*(_state)->opt_vtable.frag_copy_list)(_dst_frame,_src_frame,_ystride, \
-  _fragis,_nfragis,_frag_buf_offs))
-#  endif
-#  if !defined(oc_frag_recon_intra)
-#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
-  ((*(_state)->opt_vtable.frag_recon_intra)(_dst,_dst_ystride,_residue))
-#  endif
-#  if !defined(oc_frag_recon_inter)
-#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
-  ((*(_state)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
-#  endif
-#  if !defined(oc_frag_recon_inter2)
-#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
-  ((*(_state)->opt_vtable.frag_recon_inter2)(_dst, \
-   _src1,_src2,_ystride,_residue))
-#  endif
-# if !defined(oc_idct8x8)
-#   define oc_idct8x8(_state,_y,_x,_last_zzi) \
-  ((*(_state)->opt_vtable.idct8x8)(_y,_x,_last_zzi))
-#  endif
-#  if !defined(oc_state_frag_recon)
-#   define oc_state_frag_recon(_state,_fragi, \
- _pli,_dct_coeffs,_last_zzi,_dc_quant) \
-  ((*(_state)->opt_vtable.state_frag_recon)(_state,_fragi, \
-   _pli,_dct_coeffs,_last_zzi,_dc_quant))
-#  endif
-#  if !defined(oc_loop_filter_init)
-#   define oc_loop_filter_init(_state,_bv,_flimit) \
-  ((*(_state)->opt_vtable.loop_filter_init)(_bv,_flimit))
-#  endif
-#  if !defined(oc_state_loop_filter_frag_rows)
-#   define oc_state_loop_filter_frag_rows(_state, \
- _bv,_refi,_pli,_fragy0,_fragy_end) \
-  ((*(_state)->opt_vtable.state_loop_filter_frag_rows)(_state, \
-   _bv,_refi,_pli,_fragy0,_fragy_end))
-#  endif
-#  if !defined(oc_restore_fpu)
-#   define oc_restore_fpu(_state) \
-  ((*(_state)->opt_vtable.restore_fpu)())
-#  endif
-# else
-#  if !defined(oc_frag_copy)
-#   define oc_frag_copy(_state,_dst,_src,_ystride) \
-  oc_frag_copy_c(_dst,_src,_ystride)
-#  endif
-#  if !defined(oc_frag_copy_list)
-#   define oc_frag_copy_list(_state,_dst_frame,_src_frame,_ystride, \
- _fragis,_nfragis,_frag_buf_offs) \
-  oc_frag_copy_list_c(_dst_frame,_src_frame,_ystride, \
-  _fragis,_nfragis,_frag_buf_offs)
-#  endif
-#  if !defined(oc_frag_recon_intra)
-#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
-  oc_frag_recon_intra_c(_dst,_dst_ystride,_residue)
-#  endif
-#  if !defined(oc_frag_recon_inter)
-#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
-  oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
-#  endif
-#  if !defined(oc_frag_recon_inter2)
-#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
-  oc_frag_recon_inter2_c(_dst,_src1,_src2,_ystride,_residue)
-#  endif
-#  if !defined(oc_idct8x8)
-#   define oc_idct8x8(_state,_y,_x,_last_zzi) oc_idct8x8_c(_y,_x,_last_zzi)
-#  endif
-#  if !defined(oc_state_frag_recon)
-#   define oc_state_frag_recon oc_state_frag_recon_c
-#  endif
-#  if !defined(oc_loop_filter_init)
-#   define oc_loop_filter_init(_state,_bv,_flimit) \
-  oc_loop_filter_init_c(_bv,_flimit)
-#  endif
-#  if !defined(oc_state_loop_filter_frag_rows)
-#   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c
-#  endif
-#  if !defined(oc_restore_fpu)
-#   define oc_restore_fpu(_state) do{}while(0)
-#  endif
-# endif
-
-
-
-/*A keyframe.*/
-# define OC_INTRA_FRAME (0)
-/*A predicted frame.*/
-# define OC_INTER_FRAME (1)
-/*A frame of unknown type (frame type decision has not yet been made).*/
-# define OC_UNKWN_FRAME (-1)
-
-/*The amount of padding to add to the reconstructed frame buffers on all
-   sides.
-  This is used to allow unrestricted motion vectors without special casing.
-  This must be a multiple of 2.*/
-# define OC_UMV_PADDING (16)
-
-/*Frame classification indices.*/
-/*The previous golden frame.*/
-# define OC_FRAME_GOLD      (0)
-/*The previous frame.*/
-# define OC_FRAME_PREV      (1)
-/*The current frame.*/
-# define OC_FRAME_SELF      (2)
-/*Used to mark uncoded fragments (for DC prediction).*/
-# define OC_FRAME_NONE      (3)
-
-/*The input or output buffer.*/
-# define OC_FRAME_IO        (3)
-/*Uncompressed prev golden frame.*/
-# define OC_FRAME_GOLD_ORIG (4)
-/*Uncompressed previous frame. */
-# define OC_FRAME_PREV_ORIG (5)
-
-/*Macroblock modes.*/
-/*Macro block is invalid: It is never coded.*/
-# define OC_MODE_INVALID        (-1)
-/*Encoded difference from the same macro block in the previous frame.*/
-# define OC_MODE_INTER_NOMV     (0)
-/*Encoded with no motion compensated prediction.*/
-# define OC_MODE_INTRA          (1)
-/*Encoded difference from the previous frame offset by the given motion
-   vector.*/
-# define OC_MODE_INTER_MV       (2)
-/*Encoded difference from the previous frame offset by the last coded motion
-   vector.*/
-# define OC_MODE_INTER_MV_LAST  (3)
-/*Encoded difference from the previous frame offset by the second to last
-   coded motion vector.*/
-# define OC_MODE_INTER_MV_LAST2 (4)
-/*Encoded difference from the same macro block in the previous golden
-   frame.*/
-# define OC_MODE_GOLDEN_NOMV    (5)
-/*Encoded difference from the previous golden frame offset by the given motion
-   vector.*/
-# define OC_MODE_GOLDEN_MV      (6)
-/*Encoded difference from the previous frame offset by the individual motion
-   vectors given for each block.*/
-# define OC_MODE_INTER_MV_FOUR  (7)
-/*The number of (coded) modes.*/
-# define OC_NMODES              (8)
-
-/*Determines the reference frame used for a given MB mode.*/
-# define OC_FRAME_FOR_MODE(_x) \
- OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
-  OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
-
-/*Constants for the packet state machine common between encoder and decoder.*/
-
-/*Next packet to emit/read: Codec info header.*/
-# define OC_PACKET_INFO_HDR    (-3)
-/*Next packet to emit/read: Comment header.*/
-# define OC_PACKET_COMMENT_HDR (-2)
-/*Next packet to emit/read: Codec setup header.*/
-# define OC_PACKET_SETUP_HDR   (-1)
-/*No more packets to emit/read.*/
-# define OC_PACKET_DONE        (INT_MAX)
-
-
-
-#define OC_MV(_x,_y)         ((oc_mv)((_x)&0xFF|(_y)<<8))
-#define OC_MV_X(_mv)         ((signed char)(_mv))
-#define OC_MV_Y(_mv)         ((_mv)>>8)
-#define OC_MV_ADD(_mv1,_mv2) \
-  OC_MV(OC_MV_X(_mv1)+OC_MV_X(_mv2), \
-   OC_MV_Y(_mv1)+OC_MV_Y(_mv2))
-#define OC_MV_SUB(_mv1,_mv2) \
-  OC_MV(OC_MV_X(_mv1)-OC_MV_X(_mv2), \
-   OC_MV_Y(_mv1)-OC_MV_Y(_mv2))
-
-
-
-/*Super blocks are 32x32 segments of pixels in a single color plane indexed
-   in image order.
-  Internally, super blocks are broken up into four quadrants, each of which
-   contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
-  Quadrants, and the blocks within them, are indexed in a special order called
-   a "Hilbert curve" within the super block.
-
-  In order to differentiate between the Hilbert-curve indexing strategy and
-   the regular image order indexing strategy, blocks indexed in image order
-   are called "fragments".
-  Fragments are indexed in image order, left to right, then bottom to top,
-   from Y' plane to Cb plane to Cr plane.
-
-  The co-located fragments in all image planes corresponding to the location
-   of a single quadrant of a luma plane super block form a macro block.
-  Thus there is only a single set of macro blocks for all planes, each of which
-   contains between 6 and 12 fragments, depending on the pixel format.
-  Therefore macro block information is kept in a separate set of arrays from
-   super blocks to avoid unused space in the other planes.
-  The lists are indexed in super block order.
-  That is, the macro block corresponding to the macro block mbi in (luma plane)
-   super block sbi is at index (sbi<<2|mbi).
-  Thus the number of macro blocks in each dimension is always twice the number
-   of super blocks, even when only an odd number fall inside the coded frame.
-  These "extra" macro blocks are just an artifact of our internal data layout,
-   and not part of the coded stream; they are flagged with a negative MB mode.*/
-
-
-
-/*Super block information.*/
-struct oc_sb_flags{
-  unsigned char coded_fully:1;
-  unsigned char coded_partially:1;
-  unsigned char quad_valid:4;
-};
-
-
-
-/*Information about a fragment which intersects the border of the displayable
-   region.
-  This marks which pixels belong to the displayable region.*/
-struct oc_border_info{
-  /*A bit mask marking which pixels are in the displayable region.
-    Pixel (x,y) corresponds to bit (y<<3|x).*/
-  ogg_int64_t mask;
-  /*The number of pixels in the displayable region.
-    This is always positive, and always less than 64.*/
-  int         npixels;
-};
-
-
-
-/*Fragment information.*/
-struct oc_fragment{
-  /*A flag indicating whether or not this fragment is coded.*/
-  unsigned   coded:1;
-  /*A flag indicating that this entire fragment lies outside the displayable
-     region of the frame.
-    Note the contrast with an invalid macro block, which is outside the coded
-     frame, not just the displayable one.
-    There are no fragments outside the coded frame by construction.*/
-  unsigned   invalid:1;
-  /*The index of the quality index used for this fragment's AC coefficients.*/
-  unsigned   qii:4;
-  /*The index of the reference frame this fragment is predicted from.*/
-  unsigned   refi:2;
-  /*The mode of the macroblock this fragment belongs to.*/
-  unsigned   mb_mode:3;
-  /*The index of the associated border information for fragments which lie
-     partially outside the displayable region.
-    For fragments completely inside or outside this region, this is -1.
-    Note that the C standard requires an explicit signed keyword for bitfield
-     types, since some compilers may treat them as unsigned without it.*/
-  signed int borderi:5;
-  /*The prediction-corrected DC component.
-    Note that the C standard requires an explicit signed keyword for bitfield
-     types, since some compilers may treat them as unsigned without it.*/
-  signed int dc:16;
-};
-
-
-
-/*A description of each fragment plane.*/
-struct oc_fragment_plane{
-  /*The number of fragments in the horizontal direction.*/
-  int       nhfrags;
-  /*The number of fragments in the vertical direction.*/
-  int       nvfrags;
-  /*The offset of the first fragment in the plane.*/
-  ptrdiff_t froffset;
-  /*The total number of fragments in the plane.*/
-  ptrdiff_t nfrags;
-  /*The number of super blocks in the horizontal direction.*/
-  unsigned  nhsbs;
-  /*The number of super blocks in the vertical direction.*/
-  unsigned  nvsbs;
-  /*The offset of the first super block in the plane.*/
-  unsigned  sboffset;
-  /*The total number of super blocks in the plane.*/
-  unsigned  nsbs;
-};
-
-
-typedef void (*oc_state_loop_filter_frag_rows_func)(
- const oc_theora_state *_state,signed char _bv[256],int _refi,int _pli,
- int _fragy0,int _fragy_end);
-
-/*The shared (encoder and decoder) functions that have accelerated variants.*/
-struct oc_base_opt_vtable{
-  void (*frag_copy)(unsigned char *_dst,
-   const unsigned char *_src,int _ystride);
-  void (*frag_copy_list)(unsigned char *_dst_frame,
-   const unsigned char *_src_frame,int _ystride,
-   const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-  void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
-   const ogg_int16_t _residue[64]);
-  void (*frag_recon_inter)(unsigned char *_dst,
-   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
-  void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
-   const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
-  void (*idct8x8)(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-  void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
-   int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-  void (*loop_filter_init)(signed char _bv[256],int _flimit);
-  oc_state_loop_filter_frag_rows_func state_loop_filter_frag_rows;
-  void (*restore_fpu)(void);
-};
-
-/*The shared (encoder and decoder) tables that vary according to which variants
-   of the above functions are used.*/
-struct oc_base_opt_data{
-  const unsigned char *dct_fzig_zag;
-};
-
-
-/*State information common to both the encoder and decoder.*/
-struct oc_theora_state{
-  /*The stream information.*/
-  th_info             info;
-# if defined(OC_STATE_USE_VTABLE)
-  /*Table for shared accelerated functions.*/
-  oc_base_opt_vtable  opt_vtable;
-# endif
-  /*Table for shared data used by accelerated functions.*/
-  oc_base_opt_data    opt_data;
-  /*CPU flags to detect the presence of extended instruction sets.*/
-  ogg_uint32_t        cpu_flags;
-  /*The fragment plane descriptions.*/
-  oc_fragment_plane   fplanes[3];
-  /*The list of fragments, indexed in image order.*/
-  oc_fragment        *frags;
-  /*The the offset into the reference frame buffer to the upper-left pixel of
-     each fragment.*/
-  ptrdiff_t          *frag_buf_offs;
-  /*The motion vector for each fragment.*/
-  oc_mv              *frag_mvs;
-  /*The total number of fragments in a single frame.*/
-  ptrdiff_t           nfrags;
-  /*The list of super block maps, indexed in image order.*/
-  oc_sb_map          *sb_maps;
-  /*The list of super block flags, indexed in image order.*/
-  oc_sb_flags        *sb_flags;
-  /*The total number of super blocks in a single frame.*/
-  unsigned            nsbs;
-  /*The fragments from each color plane that belong to each macro block.
-    Fragments are stored in image order (left to right then top to bottom).
-    When chroma components are decimated, the extra fragments have an index of
-     -1.*/
-  oc_mb_map          *mb_maps;
-  /*The list of macro block modes.
-    A negative number indicates the macro block lies entirely outside the
-     coded frame.*/
-  signed char        *mb_modes;
-  /*The number of macro blocks in the X direction.*/
-  unsigned            nhmbs;
-  /*The number of macro blocks in the Y direction.*/
-  unsigned            nvmbs;
-  /*The total number of macro blocks.*/
-  size_t              nmbs;
-  /*The list of coded fragments, in coded order.
-    Uncoded fragments are stored in reverse order from the end of the list.*/
-  ptrdiff_t          *coded_fragis;
-  /*The number of coded fragments in each plane.*/
-  ptrdiff_t           ncoded_fragis[3];
-  /*The total number of coded fragments.*/
-  ptrdiff_t           ntotal_coded_fragis;
-  /*The actual buffers used for the reference frames.*/
-  th_ycbcr_buffer     ref_frame_bufs[6];
-  /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
-  int                 ref_frame_idx[6];
-  /*The storage for the reference frame buffers.
-    This is just ref_frame_bufs[ref_frame_idx[i]][0].data, but is cached here
-     for faster look-up.*/
-  unsigned char      *ref_frame_data[6];
-  /*The handle used to allocate the reference frame buffers.*/
-  unsigned char      *ref_frame_handle;
-  /*The strides for each plane in the reference frames.*/
-  int                 ref_ystride[3];
-  /*The number of unique border patterns.*/
-  int                 nborders;
-  /*The unique border patterns for all border fragments.
-    The borderi field of fragments which straddle the border indexes this
-     list.*/
-  oc_border_info      borders[16];
-  /*The frame number of the last keyframe.*/
-  ogg_int64_t         keyframe_num;
-  /*The frame number of the current frame.*/
-  ogg_int64_t         curframe_num;
-  /*The granpos of the current frame.*/
-  ogg_int64_t         granpos;
-  /*The type of the current frame.*/
-  signed char         frame_type;
-  /*The bias to add to the frame count when computing granule positions.*/
-  unsigned char       granpos_bias;
-  /*The number of quality indices used in the current frame.*/
-  unsigned char       nqis;
-  /*The quality indices of the current frame.*/
-  unsigned char       qis[3];
-  /*The dequantization tables, stored in zig-zag order, and indexed by
-     qi, pli, qti, and zzi.*/
-  ogg_uint16_t       *dequant_tables[64][3][2];
-  OC_ALIGN16(oc_quant_table      dequant_table_data[64][3][2]);
-  /*Loop filter strength parameters.*/
-  unsigned char       loop_filter_limits[64];
-};
-
-
-
-/*The function type used to fill in the chroma plane motion vectors for a
-   macro block when 4 different motion vectors are specified in the luma
-   plane.
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lmbmv: The luma macro-block level motion vector to fill in for use in
-           prediction.
-  _lbmvs: The luma block-level motion vectors.*/
-typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
-
-
-
-/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
-   macro block when 4 different motion vectors are specified in the luma
-   plane.*/
-extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
-
-
-
-int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
-void oc_state_clear(oc_theora_state *_state);
-void oc_state_accel_init_c(oc_theora_state *_state);
-void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
- int _y0,int _yend);
-void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
-void oc_state_borders_fill(oc_theora_state *_state,int _refi);
-void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
- th_ycbcr_buffer _img);
-int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
-int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
- int _pli,oc_mv _mv);
-
-void oc_loop_filter_init_c(signed char _bv[256],int _flimit);
-void oc_state_loop_filter(oc_theora_state *_state,int _frame);
-# if defined(OC_DUMP_IMAGES)
-int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
- const char *_suf);
-# endif
-
-/*Default pure-C implementations of shared accelerated functions.*/
-void oc_frag_copy_c(unsigned char *_dst,
- const unsigned char *_src,int _src_ystride);
-void oc_frag_copy_list_c(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t _residue[64]);
-void oc_frag_recon_inter_c(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
-void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
-void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-void oc_restore_fpu_c(void);
-
-/*We need a way to call a few encoder functions without introducing a link-time
-   dependency into the decoder, while still allowing the old alpha API which
-   does not distinguish between encoder and decoder objects to be used.
-  We do this by placing a function table at the start of the encoder object
-   which can dispatch into the encoder library.
-  We do a similar thing for the decoder in case we ever decide to split off a
-   common base library.*/
-typedef void (*oc_state_clear_func)(theora_state *_th);
-typedef int (*oc_state_control_func)(theora_state *th,int _req,
- void *_buf,size_t _buf_sz);
-typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
- ogg_int64_t _granulepos);
-typedef double (*oc_state_granule_time_func)(theora_state *_th,
- ogg_int64_t _granulepos);
-
-
-struct oc_state_dispatch_vtable{
-  oc_state_clear_func         clear;
-  oc_state_control_func       control;
-  oc_state_granule_frame_func granule_frame;
-  oc_state_granule_time_func  granule_time;
-};
-
-#endif
diff --git a/media/libtheora/lib/x86/mmxfrag.c b/media/libtheora/lib/x86/mmxfrag.c
deleted file mode 100644
index b7df1c1ec..000000000
--- a/media/libtheora/lib/x86/mmxfrag.c
+++ /dev/null
@@ -1,368 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxfrag.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of fragment reconstruction for motion compensation.
-  Originally written by Rudolf Marek.
-  Additional optimization by Nils Pipenbrinck.
-  Note: Loops are unrolled for best performance.
-  The iteration each instruction belongs to is marked in the comments as #i.*/
-#include <stddef.h>
-#include "x86int.h"
-
-#if defined(OC_X86_ASM)
-
-/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
-   between rows.*/
-# define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \
-  do{ \
-    const unsigned char *src; \
-    unsigned char       *dst; \
-    ptrdiff_t            ystride3; \
-    src=(_src); \
-    dst=(_dst); \
-    __asm__ __volatile__( \
-      /*src+0*ystride*/ \
-      "movq (%[src]),%%mm0\n\t" \
-      /*src+1*ystride*/ \
-      "movq (%[src],%[ystride]),%%mm1\n\t" \
-      /*ystride3=ystride*3*/ \
-      "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
-      /*src+2*ystride*/ \
-      "movq (%[src],%[ystride],2),%%mm2\n\t" \
-      /*src+3*ystride*/ \
-      "movq (%[src],%[ystride3]),%%mm3\n\t" \
-      /*dst+0*ystride*/ \
-      "movq %%mm0,(%[dst])\n\t" \
-      /*dst+1*ystride*/ \
-      "movq %%mm1,(%[dst],%[ystride])\n\t" \
-      /*Pointer to next 4.*/ \
-      "lea (%[src],%[ystride],4),%[src]\n\t" \
-      /*dst+2*ystride*/ \
-      "movq %%mm2,(%[dst],%[ystride],2)\n\t" \
-      /*dst+3*ystride*/ \
-      "movq %%mm3,(%[dst],%[ystride3])\n\t" \
-      /*Pointer to next 4.*/ \
-      "lea (%[dst],%[ystride],4),%[dst]\n\t" \
-      /*src+0*ystride*/ \
-      "movq (%[src]),%%mm0\n\t" \
-      /*src+1*ystride*/ \
-      "movq (%[src],%[ystride]),%%mm1\n\t" \
-      /*src+2*ystride*/ \
-      "movq (%[src],%[ystride],2),%%mm2\n\t" \
-      /*src+3*ystride*/ \
-      "movq (%[src],%[ystride3]),%%mm3\n\t" \
-      /*dst+0*ystride*/ \
-      "movq %%mm0,(%[dst])\n\t" \
-      /*dst+1*ystride*/ \
-      "movq %%mm1,(%[dst],%[ystride])\n\t" \
-      /*dst+2*ystride*/ \
-      "movq %%mm2,(%[dst],%[ystride],2)\n\t" \
-      /*dst+3*ystride*/ \
-      "movq %%mm3,(%[dst],%[ystride3])\n\t" \
-      :[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \
-      :[ystride]"r"((ptrdiff_t)(_ystride)) \
-      :"memory" \
-    ); \
-  } \
-  while(0)
-
-/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
-   between rows.*/
-void oc_frag_copy_mmx(unsigned char *_dst,
- const unsigned char *_src,int _ystride){
-  OC_FRAG_COPY_MMX(_dst,_src,_ystride);
-}
-
-/*Copies the fragments specified by the lists of fragment indices from one
-   frame to another.
-  _dst_frame:     The reference frame to copy to.
-  _src_frame:     The reference frame to copy from.
-  _ystride:       The row stride of the reference frames.
-  _fragis:        A pointer to a list of fragment indices.
-  _nfragis:       The number of fragment indices to copy.
-  _frag_buf_offs: The offsets of fragments in the reference frames.*/
-void oc_frag_copy_list_mmx(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs){
-  ptrdiff_t fragii;
-  for(fragii=0;fragii<_nfragis;fragii++){
-    ptrdiff_t frag_buf_off;
-    frag_buf_off=_frag_buf_offs[_fragis[fragii]];
-    OC_FRAG_COPY_MMX(_dst_frame+frag_buf_off,
-     _src_frame+frag_buf_off,_ystride);
-  }
-}
-
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue){
-  __asm__ __volatile__(
-    /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/
-    "pcmpeqw %%mm0,%%mm0\n\t"
-    /*#0 Load low residue.*/
-    "movq 0*8(%[residue]),%%mm1\n\t"
-    /*#0 Load high residue.*/
-    "movq 1*8(%[residue]),%%mm2\n\t"
-    /*Set mm0 to 0x8000800080008000.*/
-    "psllw $15,%%mm0\n\t"
-    /*#1 Load low residue.*/
-    "movq 2*8(%[residue]),%%mm3\n\t"
-    /*#1 Load high residue.*/
-    "movq 3*8(%[residue]),%%mm4\n\t"
-    /*Set mm0 to 0x0080008000800080.*/
-    "psrlw $8,%%mm0\n\t"
-    /*#2 Load low residue.*/
-    "movq 4*8(%[residue]),%%mm5\n\t"
-    /*#2 Load high residue.*/
-    "movq 5*8(%[residue]),%%mm6\n\t"
-    /*#0 Bias low  residue.*/
-    "paddsw %%mm0,%%mm1\n\t"
-    /*#0 Bias high residue.*/
-    "paddsw %%mm0,%%mm2\n\t"
-    /*#0 Pack to byte.*/
-    "packuswb %%mm2,%%mm1\n\t"
-    /*#1 Bias low  residue.*/
-    "paddsw %%mm0,%%mm3\n\t"
-    /*#1 Bias high residue.*/
-    "paddsw %%mm0,%%mm4\n\t"
-    /*#1 Pack to byte.*/
-    "packuswb %%mm4,%%mm3\n\t"
-    /*#2 Bias low  residue.*/
-    "paddsw %%mm0,%%mm5\n\t"
-    /*#2 Bias high residue.*/
-    "paddsw %%mm0,%%mm6\n\t"
-    /*#2 Pack to byte.*/
-    "packuswb %%mm6,%%mm5\n\t"
-    /*#0 Write row.*/
-    "movq %%mm1,(%[dst])\n\t"
-    /*#1 Write row.*/
-    "movq %%mm3,(%[dst],%[ystride])\n\t"
-    /*#2 Write row.*/
-    "movq %%mm5,(%[dst],%[ystride],2)\n\t"
-    /*#3 Load low residue.*/
-    "movq 6*8(%[residue]),%%mm1\n\t"
-    /*#3 Load high residue.*/
-    "movq 7*8(%[residue]),%%mm2\n\t"
-    /*#4 Load high residue.*/
-    "movq 8*8(%[residue]),%%mm3\n\t"
-    /*#4 Load high residue.*/
-    "movq 9*8(%[residue]),%%mm4\n\t"
-    /*#5 Load high residue.*/
-    "movq 10*8(%[residue]),%%mm5\n\t"
-    /*#5 Load high residue.*/
-    "movq 11*8(%[residue]),%%mm6\n\t"
-    /*#3 Bias low  residue.*/
-    "paddsw %%mm0,%%mm1\n\t"
-    /*#3 Bias high residue.*/
-    "paddsw %%mm0,%%mm2\n\t"
-    /*#3 Pack to byte.*/
-    "packuswb %%mm2,%%mm1\n\t"
-    /*#4 Bias low  residue.*/
-    "paddsw %%mm0,%%mm3\n\t"
-    /*#4 Bias high residue.*/
-    "paddsw %%mm0,%%mm4\n\t"
-    /*#4 Pack to byte.*/
-    "packuswb %%mm4,%%mm3\n\t"
-    /*#5 Bias low  residue.*/
-    "paddsw %%mm0,%%mm5\n\t"
-    /*#5 Bias high residue.*/
-    "paddsw %%mm0,%%mm6\n\t"
-    /*#5 Pack to byte.*/
-    "packuswb %%mm6,%%mm5\n\t"
-    /*#3 Write row.*/
-    "movq %%mm1,(%[dst],%[ystride3])\n\t"
-    /*#4 Write row.*/
-    "movq %%mm3,(%[dst4])\n\t"
-    /*#5 Write row.*/
-    "movq %%mm5,(%[dst4],%[ystride])\n\t"
-    /*#6 Load low residue.*/
-    "movq 12*8(%[residue]),%%mm1\n\t"
-    /*#6 Load high residue.*/
-    "movq 13*8(%[residue]),%%mm2\n\t"
-    /*#7 Load low residue.*/
-    "movq 14*8(%[residue]),%%mm3\n\t"
-    /*#7 Load high residue.*/
-    "movq 15*8(%[residue]),%%mm4\n\t"
-    /*#6 Bias low  residue.*/
-    "paddsw %%mm0,%%mm1\n\t"
-    /*#6 Bias high residue.*/
-    "paddsw %%mm0,%%mm2\n\t"
-    /*#6 Pack to byte.*/
-    "packuswb %%mm2,%%mm1\n\t"
-    /*#7 Bias low  residue.*/
-    "paddsw %%mm0,%%mm3\n\t"
-    /*#7 Bias high residue.*/
-    "paddsw %%mm0,%%mm4\n\t"
-    /*#7 Pack to byte.*/
-    "packuswb %%mm4,%%mm3\n\t"
-    /*#6 Write row.*/
-    "movq %%mm1,(%[dst4],%[ystride],2)\n\t"
-    /*#7 Write row.*/
-    "movq %%mm3,(%[dst4],%[ystride3])\n\t"
-    :
-    :[residue]"r"(_residue),
-     [dst]"r"(_dst),
-     [dst4]"r"(_dst+(_ystride<<2)),
-     [ystride]"r"((ptrdiff_t)_ystride),
-     [ystride3]"r"((ptrdiff_t)_ystride*3)
-    :"memory"
-  );
-}
-
-void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src,
- int _ystride,const ogg_int16_t *_residue){
-  int i;
-  /*Zero mm0.*/
-  __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::);
-  for(i=4;i-->0;){
-    __asm__ __volatile__(
-      /*#0 Load source.*/
-      "movq (%[src]),%%mm3\n\t"
-      /*#1 Load source.*/
-      "movq (%[src],%[ystride]),%%mm7\n\t"
-      /*#0 Get copy of src.*/
-      "movq %%mm3,%%mm4\n\t"
-      /*#0 Expand high source.*/
-      "punpckhbw %%mm0,%%mm4\n\t"
-      /*#0 Expand low  source.*/
-      "punpcklbw %%mm0,%%mm3\n\t"
-      /*#0 Add residue high.*/
-      "paddsw 8(%[residue]),%%mm4\n\t"
-      /*#1 Get copy of src.*/
-      "movq %%mm7,%%mm2\n\t"
-      /*#0 Add residue low.*/
-      "paddsw (%[residue]), %%mm3\n\t"
-      /*#1 Expand high source.*/
-      "punpckhbw %%mm0,%%mm2\n\t"
-      /*#0 Pack final row pixels.*/
-      "packuswb %%mm4,%%mm3\n\t"
-      /*#1 Expand low  source.*/
-      "punpcklbw %%mm0,%%mm7\n\t"
-      /*#1 Add residue low.*/
-      "paddsw 16(%[residue]),%%mm7\n\t"
-      /*#1 Add residue high.*/
-      "paddsw 24(%[residue]),%%mm2\n\t"
-      /*Advance residue.*/
-      "lea 32(%[residue]),%[residue]\n\t"
-      /*#1 Pack final row pixels.*/
-      "packuswb %%mm2,%%mm7\n\t"
-      /*Advance src.*/
-      "lea (%[src],%[ystride],2),%[src]\n\t"
-      /*#0 Write row.*/
-      "movq %%mm3,(%[dst])\n\t"
-      /*#1 Write row.*/
-      "movq %%mm7,(%[dst],%[ystride])\n\t"
-      /*Advance dst.*/
-      "lea (%[dst],%[ystride],2),%[dst]\n\t"
-      :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
-      :[ystride]"r"((ptrdiff_t)_ystride)
-      :"memory"
-    );
-  }
-}
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){
-  int i;
-  /*Zero mm7.*/
-  __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::);
-  for(i=4;i-->0;){
-    __asm__ __volatile__(
-      /*#0 Load src1.*/
-      "movq (%[src1]),%%mm0\n\t"
-      /*#0 Load src2.*/
-      "movq (%[src2]),%%mm2\n\t"
-      /*#0 Copy src1.*/
-      "movq %%mm0,%%mm1\n\t"
-      /*#0 Copy src2.*/
-      "movq %%mm2,%%mm3\n\t"
-      /*#1 Load src1.*/
-      "movq (%[src1],%[ystride]),%%mm4\n\t"
-      /*#0 Unpack lower src1.*/
-      "punpcklbw %%mm7,%%mm0\n\t"
-      /*#1 Load src2.*/
-      "movq (%[src2],%[ystride]),%%mm5\n\t"
-      /*#0 Unpack higher src1.*/
-      "punpckhbw %%mm7,%%mm1\n\t"
-      /*#0 Unpack lower src2.*/
-      "punpcklbw %%mm7,%%mm2\n\t"
-      /*#0 Unpack higher src2.*/
-      "punpckhbw %%mm7,%%mm3\n\t"
-      /*Advance src1 ptr.*/
-      "lea (%[src1],%[ystride],2),%[src1]\n\t"
-      /*Advance src2 ptr.*/
-      "lea (%[src2],%[ystride],2),%[src2]\n\t"
-      /*#0 Lower src1+src2.*/
-      "paddsw %%mm2,%%mm0\n\t"
-      /*#0 Higher src1+src2.*/
-      "paddsw %%mm3,%%mm1\n\t"
-      /*#1 Copy src1.*/
-      "movq %%mm4,%%mm2\n\t"
-      /*#0 Build lo average.*/
-      "psraw $1,%%mm0\n\t"
-      /*#1 Copy src2.*/
-      "movq %%mm5,%%mm3\n\t"
-      /*#1 Unpack lower src1.*/
-      "punpcklbw %%mm7,%%mm4\n\t"
-      /*#0 Build hi average.*/
-      "psraw $1,%%mm1\n\t"
-      /*#1 Unpack higher src1.*/
-      "punpckhbw %%mm7,%%mm2\n\t"
-      /*#0 low+=residue.*/
-      "paddsw (%[residue]),%%mm0\n\t"
-      /*#1 Unpack lower src2.*/
-      "punpcklbw %%mm7,%%mm5\n\t"
-      /*#0 high+=residue.*/
-      "paddsw 8(%[residue]),%%mm1\n\t"
-      /*#1 Unpack higher src2.*/
-      "punpckhbw %%mm7,%%mm3\n\t"
-      /*#1 Lower src1+src2.*/
-      "paddsw %%mm4,%%mm5\n\t"
-      /*#0 Pack and saturate.*/
-      "packuswb %%mm1,%%mm0\n\t"
-      /*#1 Higher src1+src2.*/
-      "paddsw %%mm2,%%mm3\n\t"
-      /*#0 Write row.*/
-      "movq %%mm0,(%[dst])\n\t"
-      /*#1 Build lo average.*/
-      "psraw $1,%%mm5\n\t"
-      /*#1 Build hi average.*/
-      "psraw $1,%%mm3\n\t"
-      /*#1 low+=residue.*/
-      "paddsw 16(%[residue]),%%mm5\n\t"
-      /*#1 high+=residue.*/
-      "paddsw 24(%[residue]),%%mm3\n\t"
-      /*#1 Pack and saturate.*/
-      "packuswb  %%mm3,%%mm5\n\t"
-      /*#1 Write row ptr.*/
-      "movq %%mm5,(%[dst],%[ystride])\n\t"
-      /*Advance residue ptr.*/
-      "add $32,%[residue]\n\t"
-      /*Advance dest ptr.*/
-      "lea (%[dst],%[ystride],2),%[dst]\n\t"
-     :[dst]"+r"(_dst),[residue]"+r"(_residue),
-      [src1]"+%r"(_src1),[src2]"+r"(_src2)
-     :[ystride]"r"((ptrdiff_t)_ystride)
-     :"memory"
-    );
-  }
-}
-
-void oc_restore_fpu_mmx(void){
-  __asm__ __volatile__("emms\n\t");
-}
-#endif
diff --git a/media/libtheora/lib/x86/mmxidct.c b/media/libtheora/lib/x86/mmxidct.c
deleted file mode 100644
index 8d61bdfb1..000000000
--- a/media/libtheora/lib/x86/mmxidct.c
+++ /dev/null
@@ -1,562 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxidct.c 17446 2010-09-23 20:06:20Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of Theora's iDCT.
-  Originally written by Rudolf Marek, based on code from On2's VP3.*/
-#include "x86int.h"
-#include "../dct.h"
-
-#if defined(OC_X86_ASM)
-
-/*These are offsets into the table of constants below.*/
-/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
-#define OC_COSINE_OFFSET (0)
-/*A row of 8's.*/
-#define OC_EIGHT_OFFSET  (56)
-
-
-
-/*38 cycles*/
-#define OC_IDCT_BEGIN(_y,_x) \
-  "#OC_IDCT_BEGIN\n\t" \
-  "movq "OC_I(3,_x)",%%mm2\n\t" \
-  "movq "OC_MEM_OFFS(0x30,c)",%%mm6\n\t" \
-  "movq %%mm2,%%mm4\n\t" \
-  "movq "OC_J(5,_x)",%%mm7\n\t" \
-  "pmulhw %%mm6,%%mm4\n\t" \
-  "movq "OC_MEM_OFFS(0x50,c)",%%mm1\n\t" \
-  "pmulhw %%mm7,%%mm6\n\t" \
-  "movq %%mm1,%%mm5\n\t" \
-  "pmulhw %%mm2,%%mm1\n\t" \
-  "movq "OC_I(1,_x)",%%mm3\n\t" \
-  "pmulhw %%mm7,%%mm5\n\t" \
-  "movq "OC_MEM_OFFS(0x10,c)",%%mm0\n\t" \
-  "paddw %%mm2,%%mm4\n\t" \
-  "paddw %%mm7,%%mm6\n\t" \
-  "paddw %%mm1,%%mm2\n\t" \
-  "movq "OC_J(7,_x)",%%mm1\n\t" \
-  "paddw %%mm5,%%mm7\n\t" \
-  "movq %%mm0,%%mm5\n\t" \
-  "pmulhw %%mm3,%%mm0\n\t" \
-  "paddw %%mm7,%%mm4\n\t" \
-  "pmulhw %%mm1,%%mm5\n\t" \
-  "movq "OC_MEM_OFFS(0x70,c)",%%mm7\n\t" \
-  "psubw %%mm2,%%mm6\n\t" \
-  "paddw %%mm3,%%mm0\n\t" \
-  "pmulhw %%mm7,%%mm3\n\t" \
-  "movq "OC_I(2,_x)",%%mm2\n\t" \
-  "pmulhw %%mm1,%%mm7\n\t" \
-  "paddw %%mm1,%%mm5\n\t" \
-  "movq %%mm2,%%mm1\n\t" \
-  "pmulhw "OC_MEM_OFFS(0x20,c)",%%mm2\n\t" \
-  "psubw %%mm5,%%mm3\n\t" \
-  "movq "OC_J(6,_x)",%%mm5\n\t" \
-  "paddw %%mm7,%%mm0\n\t" \
-  "movq %%mm5,%%mm7\n\t" \
-  "psubw %%mm4,%%mm0\n\t" \
-  "pmulhw "OC_MEM_OFFS(0x20,c)",%%mm5\n\t" \
-  "paddw %%mm1,%%mm2\n\t" \
-  "pmulhw "OC_MEM_OFFS(0x60,c)",%%mm1\n\t" \
-  "paddw %%mm4,%%mm4\n\t" \
-  "paddw %%mm0,%%mm4\n\t" \
-  "psubw %%mm6,%%mm3\n\t" \
-  "paddw %%mm7,%%mm5\n\t" \
-  "paddw %%mm6,%%mm6\n\t" \
-  "pmulhw "OC_MEM_OFFS(0x60,c)",%%mm7\n\t" \
-  "paddw %%mm3,%%mm6\n\t" \
-  "movq %%mm4,"OC_I(1,_y)"\n\t" \
-  "psubw %%mm5,%%mm1\n\t" \
-  "movq "OC_MEM_OFFS(0x40,c)",%%mm4\n\t" \
-  "movq %%mm3,%%mm5\n\t" \
-  "pmulhw %%mm4,%%mm3\n\t" \
-  "paddw %%mm2,%%mm7\n\t" \
-  "movq %%mm6,"OC_I(2,_y)"\n\t" \
-  "movq %%mm0,%%mm2\n\t" \
-  "movq "OC_I(0,_x)",%%mm6\n\t" \
-  "pmulhw %%mm4,%%mm0\n\t" \
-  "paddw %%mm3,%%mm5\n\t" \
-  "movq "OC_J(4,_x)",%%mm3\n\t" \
-  "psubw %%mm1,%%mm5\n\t" \
-  "paddw %%mm0,%%mm2\n\t" \
-  "psubw %%mm3,%%mm6\n\t" \
-  "movq %%mm6,%%mm0\n\t" \
-  "pmulhw %%mm4,%%mm6\n\t" \
-  "paddw %%mm3,%%mm3\n\t" \
-  "paddw %%mm1,%%mm1\n\t" \
-  "paddw %%mm0,%%mm3\n\t" \
-  "paddw %%mm5,%%mm1\n\t" \
-  "pmulhw %%mm3,%%mm4\n\t" \
-  "paddw %%mm0,%%mm6\n\t" \
-  "psubw %%mm2,%%mm6\n\t" \
-  "paddw %%mm2,%%mm2\n\t" \
-  "movq "OC_I(1,_y)",%%mm0\n\t" \
-  "paddw %%mm6,%%mm2\n\t" \
-  "paddw %%mm3,%%mm4\n\t" \
-  "psubw %%mm1,%%mm2\n\t" \
-  "#end OC_IDCT_BEGIN\n\t" \
-
-/*38+8=46 cycles.*/
-#define OC_ROW_IDCT(_y,_x) \
-  "#OC_ROW_IDCT\n" \
-  OC_IDCT_BEGIN(_y,_x) \
-  /*r3=D'*/ \
-  "movq "OC_I(2,_y)",%%mm3\n\t" \
-  /*r4=E'=E-G*/ \
-  "psubw %%mm7,%%mm4\n\t" \
-  /*r1=H'+H'*/ \
-  "paddw %%mm1,%%mm1\n\t" \
-  /*r7=G+G*/ \
-  "paddw %%mm7,%%mm7\n\t" \
-  /*r1=R1=A''+H'*/ \
-  "paddw %%mm2,%%mm1\n\t" \
-  /*r7=G'=E+G*/ \
-  "paddw %%mm4,%%mm7\n\t" \
-  /*r4=R4=E'-D'*/ \
-  "psubw %%mm3,%%mm4\n\t" \
-  "paddw %%mm3,%%mm3\n\t" \
-  /*r6=R6=F'-B''*/ \
-  "psubw %%mm5,%%mm6\n\t" \
-  "paddw %%mm5,%%mm5\n\t" \
-  /*r3=R3=E'+D'*/ \
-  "paddw %%mm4,%%mm3\n\t" \
-  /*r5=R5=F'+B''*/ \
-  "paddw %%mm6,%%mm5\n\t" \
-  /*r7=R7=G'-C'*/ \
-  "psubw %%mm0,%%mm7\n\t" \
-  "paddw %%mm0,%%mm0\n\t" \
-  /*Save R1.*/ \
-  "movq %%mm1,"OC_I(1,_y)"\n\t" \
-  /*r0=R0=G.+C.*/ \
-  "paddw %%mm7,%%mm0\n\t" \
-  "#end OC_ROW_IDCT\n\t" \
-
-/*The following macro does two 4x4 transposes in place.
-  At entry, we assume:
-    r0 = a3 a2 a1 a0
-  I(1) = b3 b2 b1 b0
-    r2 = c3 c2 c1 c0
-    r3 = d3 d2 d1 d0
-
-    r4 = e3 e2 e1 e0
-    r5 = f3 f2 f1 f0
-    r6 = g3 g2 g1 g0
-    r7 = h3 h2 h1 h0
-
-  At exit, we have:
-  I(0) = d0 c0 b0 a0
-  I(1) = d1 c1 b1 a1
-  I(2) = d2 c2 b2 a2
-  I(3) = d3 c3 b3 a3
-
-  J(4) = h0 g0 f0 e0
-  J(5) = h1 g1 f1 e1
-  J(6) = h2 g2 f2 e2
-  J(7) = h3 g3 f3 e3
-
-  I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
-  J(4) J(5) J(6) J(7) is the transpose of r4  r5  r6 r7.
-
-  Since r1 is free at entry, we calculate the Js first.*/
-/*19 cycles.*/
-#define OC_TRANSPOSE(_y) \
-  "#OC_TRANSPOSE\n\t" \
-  "movq %%mm4,%%mm1\n\t" \
-  "punpcklwd %%mm5,%%mm4\n\t" \
-  "movq %%mm0,"OC_I(0,_y)"\n\t" \
-  "punpckhwd %%mm5,%%mm1\n\t" \
-  "movq %%mm6,%%mm0\n\t" \
-  "punpcklwd %%mm7,%%mm6\n\t" \
-  "movq %%mm4,%%mm5\n\t" \
-  "punpckldq %%mm6,%%mm4\n\t" \
-  "punpckhdq %%mm6,%%mm5\n\t" \
-  "movq %%mm1,%%mm6\n\t" \
-  "movq %%mm4,"OC_J(4,_y)"\n\t" \
-  "punpckhwd %%mm7,%%mm0\n\t" \
-  "movq %%mm5,"OC_J(5,_y)"\n\t" \
-  "punpckhdq %%mm0,%%mm6\n\t" \
-  "movq "OC_I(0,_y)",%%mm4\n\t" \
-  "punpckldq %%mm0,%%mm1\n\t" \
-  "movq "OC_I(1,_y)",%%mm5\n\t" \
-  "movq %%mm4,%%mm0\n\t" \
-  "movq %%mm6,"OC_J(7,_y)"\n\t" \
-  "punpcklwd %%mm5,%%mm0\n\t" \
-  "movq %%mm1,"OC_J(6,_y)"\n\t" \
-  "punpckhwd %%mm5,%%mm4\n\t" \
-  "movq %%mm2,%%mm5\n\t" \
-  "punpcklwd %%mm3,%%mm2\n\t" \
-  "movq %%mm0,%%mm1\n\t" \
-  "punpckldq %%mm2,%%mm0\n\t" \
-  "punpckhdq %%mm2,%%mm1\n\t" \
-  "movq %%mm4,%%mm2\n\t" \
-  "movq %%mm0,"OC_I(0,_y)"\n\t" \
-  "punpckhwd %%mm3,%%mm5\n\t" \
-  "movq %%mm1,"OC_I(1,_y)"\n\t" \
-  "punpckhdq %%mm5,%%mm4\n\t" \
-  "punpckldq %%mm5,%%mm2\n\t" \
-  "movq %%mm4,"OC_I(3,_y)"\n\t" \
-  "movq %%mm2,"OC_I(2,_y)"\n\t" \
-  "#end OC_TRANSPOSE\n\t" \
-
-/*38+19=57 cycles.*/
-#define OC_COLUMN_IDCT(_y) \
-  "#OC_COLUMN_IDCT\n" \
-  OC_IDCT_BEGIN(_y,_y) \
-  "paddw "OC_MEM_OFFS(0x00,c)",%%mm2\n\t" \
-  /*r1=H'+H'*/ \
-  "paddw %%mm1,%%mm1\n\t" \
-  /*r1=R1=A''+H'*/ \
-  "paddw %%mm2,%%mm1\n\t" \
-  /*r2=NR2*/ \
-  "psraw $4,%%mm2\n\t" \
-  /*r4=E'=E-G*/ \
-  "psubw %%mm7,%%mm4\n\t" \
-  /*r1=NR1*/ \
-  "psraw $4,%%mm1\n\t" \
-  /*r3=D'*/ \
-  "movq "OC_I(2,_y)",%%mm3\n\t" \
-  /*r7=G+G*/ \
-  "paddw %%mm7,%%mm7\n\t" \
-  /*Store NR2 at I(2).*/ \
-  "movq %%mm2,"OC_I(2,_y)"\n\t" \
-  /*r7=G'=E+G*/ \
-  "paddw %%mm4,%%mm7\n\t" \
-  /*Store NR1 at I(1).*/ \
-  "movq %%mm1,"OC_I(1,_y)"\n\t" \
-  /*r4=R4=E'-D'*/ \
-  "psubw %%mm3,%%mm4\n\t" \
-  "paddw "OC_MEM_OFFS(0x00,c)",%%mm4\n\t" \
-  /*r3=D'+D'*/ \
-  "paddw %%mm3,%%mm3\n\t" \
-  /*r3=R3=E'+D'*/ \
-  "paddw %%mm4,%%mm3\n\t" \
-  /*r4=NR4*/ \
-  "psraw $4,%%mm4\n\t" \
-  /*r6=R6=F'-B''*/ \
-  "psubw %%mm5,%%mm6\n\t" \
-  /*r3=NR3*/ \
-  "psraw $4,%%mm3\n\t" \
-  "paddw "OC_MEM_OFFS(0x00,c)",%%mm6\n\t" \
-  /*r5=B''+B''*/ \
-  "paddw %%mm5,%%mm5\n\t" \
-  /*r5=R5=F'+B''*/ \
-  "paddw %%mm6,%%mm5\n\t" \
-  /*r6=NR6*/ \
-  "psraw $4,%%mm6\n\t" \
-  /*Store NR4 at J(4).*/ \
-  "movq %%mm4,"OC_J(4,_y)"\n\t" \
-  /*r5=NR5*/ \
-  "psraw $4,%%mm5\n\t" \
-  /*Store NR3 at I(3).*/ \
-  "movq %%mm3,"OC_I(3,_y)"\n\t" \
-  /*r7=R7=G'-C'*/ \
-  "psubw %%mm0,%%mm7\n\t" \
-  "paddw "OC_MEM_OFFS(0x00,c)",%%mm7\n\t" \
-  /*r0=C'+C'*/ \
-  "paddw %%mm0,%%mm0\n\t" \
-  /*r0=R0=G'+C'*/ \
-  "paddw %%mm7,%%mm0\n\t" \
-  /*r7=NR7*/ \
-  "psraw $4,%%mm7\n\t" \
-  /*Store NR6 at J(6).*/ \
-  "movq %%mm6,"OC_J(6,_y)"\n\t" \
-  /*r0=NR0*/ \
-  "psraw $4,%%mm0\n\t" \
-  /*Store NR5 at J(5).*/ \
-  "movq %%mm5,"OC_J(5,_y)"\n\t" \
-  /*Store NR7 at J(7).*/ \
-  "movq %%mm7,"OC_J(7,_y)"\n\t" \
-  /*Store NR0 at I(0).*/ \
-  "movq %%mm0,"OC_I(0,_y)"\n\t" \
-  "#end OC_COLUMN_IDCT\n\t" \
-
-static void oc_idct8x8_slow_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  /*This routine accepts an 8x8 matrix, but in partially transposed form.
-    Every 4x4 block is transposed.*/
-  __asm__ __volatile__(
-#define OC_I(_k,_y)   OC_MEM_OFFS((_k)*16,_y)
-#define OC_J(_k,_y)   OC_MEM_OFFS(((_k)-4)*16+8,_y)
-    OC_ROW_IDCT(y,x)
-    OC_TRANSPOSE(y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y)   OC_MEM_OFFS((_k)*16+64,_y)
-#define OC_J(_k,_y)   OC_MEM_OFFS(((_k)-4)*16+72,_y)
-    OC_ROW_IDCT(y,x)
-    OC_TRANSPOSE(y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y)   OC_MEM_OFFS((_k)*16,_y)
-#define OC_J(_k,_y)   OC_I(_k,_y)
-    OC_COLUMN_IDCT(y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y)   OC_MEM_OFFS((_k)*16+8,_y)
-#define OC_J(_k,_y)   OC_I(_k,_y)
-    OC_COLUMN_IDCT(y)
-#undef  OC_I
-#undef  OC_J
-    :[y]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_y,64)
-    :[x]"m"OC_CONST_ARRAY_OPERAND(ogg_int16_t,_x,64),
-     [c]"m"OC_CONST_ARRAY_OPERAND(ogg_int16_t,OC_IDCT_CONSTS,128)
-  );
-  if(_x!=_y){
-    int i;
-    __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::);
-    for(i=0;i<4;i++){
-      __asm__ __volatile__(
-        "movq %%mm0,"OC_MEM_OFFS(0x00,x)"\n\t"
-        "movq %%mm0,"OC_MEM_OFFS(0x08,x)"\n\t"
-        "movq %%mm0,"OC_MEM_OFFS(0x10,x)"\n\t"
-        "movq %%mm0,"OC_MEM_OFFS(0x18,x)"\n\t"
-        :[x]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_x+16*i,16)
-      );
-    }
-  }
-}
-
-/*25 cycles.*/
-#define OC_IDCT_BEGIN_10(_y,_x) \
- "#OC_IDCT_BEGIN_10\n\t" \
- "movq "OC_I(3,_x)",%%mm2\n\t" \
- "nop\n\t" \
- "movq "OC_MEM_OFFS(0x30,c)",%%mm6\n\t" \
- "movq %%mm2,%%mm4\n\t" \
- "movq "OC_MEM_OFFS(0x50,c)",%%mm1\n\t" \
- "pmulhw %%mm6,%%mm4\n\t" \
- "movq "OC_I(1,_x)",%%mm3\n\t" \
- "pmulhw %%mm2,%%mm1\n\t" \
- "movq "OC_MEM_OFFS(0x10,c)",%%mm0\n\t" \
- "paddw %%mm2,%%mm4\n\t" \
- "pxor %%mm6,%%mm6\n\t" \
- "paddw %%mm1,%%mm2\n\t" \
- "movq "OC_I(2,_x)",%%mm5\n\t" \
- "pmulhw %%mm3,%%mm0\n\t" \
- "movq %%mm5,%%mm1\n\t" \
- "paddw %%mm3,%%mm0\n\t" \
- "pmulhw "OC_MEM_OFFS(0x70,c)",%%mm3\n\t" \
- "psubw %%mm2,%%mm6\n\t" \
- "pmulhw "OC_MEM_OFFS(0x20,c)",%%mm5\n\t" \
- "psubw %%mm4,%%mm0\n\t" \
- "movq "OC_I(2,_x)",%%mm7\n\t" \
- "paddw %%mm4,%%mm4\n\t" \
- "paddw %%mm5,%%mm7\n\t" \
- "paddw %%mm0,%%mm4\n\t" \
- "pmulhw "OC_MEM_OFFS(0x60,c)",%%mm1\n\t" \
- "psubw %%mm6,%%mm3\n\t" \
- "movq %%mm4,"OC_I(1,_y)"\n\t" \
- "paddw %%mm6,%%mm6\n\t" \
- "movq "OC_MEM_OFFS(0x40,c)",%%mm4\n\t" \
- "paddw %%mm3,%%mm6\n\t" \
- "movq %%mm3,%%mm5\n\t" \
- "pmulhw %%mm4,%%mm3\n\t" \
- "movq %%mm6,"OC_I(2,_y)"\n\t" \
- "movq %%mm0,%%mm2\n\t" \
- "movq "OC_I(0,_x)",%%mm6\n\t" \
- "pmulhw %%mm4,%%mm0\n\t" \
- "paddw %%mm3,%%mm5\n\t" \
- "paddw %%mm0,%%mm2\n\t" \
- "psubw %%mm1,%%mm5\n\t" \
- "pmulhw %%mm4,%%mm6\n\t" \
- "paddw "OC_I(0,_x)",%%mm6\n\t" \
- "paddw %%mm1,%%mm1\n\t" \
- "movq %%mm6,%%mm4\n\t" \
- "paddw %%mm5,%%mm1\n\t" \
- "psubw %%mm2,%%mm6\n\t" \
- "paddw %%mm2,%%mm2\n\t" \
- "movq "OC_I(1,_y)",%%mm0\n\t" \
- "paddw %%mm6,%%mm2\n\t" \
- "psubw %%mm1,%%mm2\n\t" \
- "nop\n\t" \
- "#end OC_IDCT_BEGIN_10\n\t" \
-
-/*25+8=33 cycles.*/
-#define OC_ROW_IDCT_10(_y,_x) \
- "#OC_ROW_IDCT_10\n\t" \
- OC_IDCT_BEGIN_10(_y,_x) \
- /*r3=D'*/ \
- "movq "OC_I(2,_y)",%%mm3\n\t" \
- /*r4=E'=E-G*/ \
- "psubw %%mm7,%%mm4\n\t" \
- /*r1=H'+H'*/ \
- "paddw %%mm1,%%mm1\n\t" \
- /*r7=G+G*/ \
- "paddw %%mm7,%%mm7\n\t" \
- /*r1=R1=A''+H'*/ \
- "paddw %%mm2,%%mm1\n\t" \
- /*r7=G'=E+G*/ \
- "paddw %%mm4,%%mm7\n\t" \
- /*r4=R4=E'-D'*/ \
- "psubw %%mm3,%%mm4\n\t" \
- "paddw %%mm3,%%mm3\n\t" \
- /*r6=R6=F'-B''*/ \
- "psubw %%mm5,%%mm6\n\t" \
- "paddw %%mm5,%%mm5\n\t" \
- /*r3=R3=E'+D'*/ \
- "paddw %%mm4,%%mm3\n\t" \
- /*r5=R5=F'+B''*/ \
- "paddw %%mm6,%%mm5\n\t" \
- /*r7=R7=G'-C'*/ \
- "psubw %%mm0,%%mm7\n\t" \
- "paddw %%mm0,%%mm0\n\t" \
- /*Save R1.*/ \
- "movq %%mm1,"OC_I(1,_y)"\n\t" \
- /*r0=R0=G'+C'*/ \
- "paddw %%mm7,%%mm0\n\t" \
- "#end OC_ROW_IDCT_10\n\t" \
-
-/*25+19=44 cycles'*/
-#define OC_COLUMN_IDCT_10(_y) \
- "#OC_COLUMN_IDCT_10\n\t" \
- OC_IDCT_BEGIN_10(_y,_y) \
- "paddw "OC_MEM_OFFS(0x00,c)",%%mm2\n\t" \
- /*r1=H'+H'*/ \
- "paddw %%mm1,%%mm1\n\t" \
- /*r1=R1=A''+H'*/ \
- "paddw %%mm2,%%mm1\n\t" \
- /*r2=NR2*/ \
- "psraw $4,%%mm2\n\t" \
- /*r4=E'=E-G*/ \
- "psubw %%mm7,%%mm4\n\t" \
- /*r1=NR1*/ \
- "psraw $4,%%mm1\n\t" \
- /*r3=D'*/ \
- "movq "OC_I(2,_y)",%%mm3\n\t" \
- /*r7=G+G*/ \
- "paddw %%mm7,%%mm7\n\t" \
- /*Store NR2 at I(2).*/ \
- "movq %%mm2,"OC_I(2,_y)"\n\t" \
- /*r7=G'=E+G*/ \
- "paddw %%mm4,%%mm7\n\t" \
- /*Store NR1 at I(1).*/ \
- "movq %%mm1,"OC_I(1,_y)"\n\t" \
- /*r4=R4=E'-D'*/ \
- "psubw %%mm3,%%mm4\n\t" \
- "paddw "OC_MEM_OFFS(0x00,c)",%%mm4\n\t" \
- /*r3=D'+D'*/ \
- "paddw %%mm3,%%mm3\n\t" \
- /*r3=R3=E'+D'*/ \
- "paddw %%mm4,%%mm3\n\t" \
- /*r4=NR4*/ \
- "psraw $4,%%mm4\n\t" \
- /*r6=R6=F'-B''*/ \
- "psubw %%mm5,%%mm6\n\t" \
- /*r3=NR3*/ \
- "psraw $4,%%mm3\n\t" \
- "paddw "OC_MEM_OFFS(0x00,c)",%%mm6\n\t" \
- /*r5=B''+B''*/ \
- "paddw %%mm5,%%mm5\n\t" \
- /*r5=R5=F'+B''*/ \
- "paddw %%mm6,%%mm5\n\t" \
- /*r6=NR6*/ \
- "psraw $4,%%mm6\n\t" \
- /*Store NR4 at J(4).*/ \
- "movq %%mm4,"OC_J(4,_y)"\n\t" \
- /*r5=NR5*/ \
- "psraw $4,%%mm5\n\t" \
- /*Store NR3 at I(3).*/ \
- "movq %%mm3,"OC_I(3,_y)"\n\t" \
- /*r7=R7=G'-C'*/ \
- "psubw %%mm0,%%mm7\n\t" \
- "paddw "OC_MEM_OFFS(0x00,c)",%%mm7\n\t" \
- /*r0=C'+C'*/ \
- "paddw %%mm0,%%mm0\n\t" \
- /*r0=R0=G'+C'*/ \
- "paddw %%mm7,%%mm0\n\t" \
- /*r7=NR7*/ \
- "psraw $4,%%mm7\n\t" \
- /*Store NR6 at J(6).*/ \
- "movq %%mm6,"OC_J(6,_y)"\n\t" \
- /*r0=NR0*/ \
- "psraw $4,%%mm0\n\t" \
- /*Store NR5 at J(5).*/ \
- "movq %%mm5,"OC_J(5,_y)"\n\t" \
- /*Store NR7 at J(7).*/ \
- "movq %%mm7,"OC_J(7,_y)"\n\t" \
- /*Store NR0 at I(0).*/ \
- "movq %%mm0,"OC_I(0,_y)"\n\t" \
- "#end OC_COLUMN_IDCT_10\n\t" \
-
-static void oc_idct8x8_10_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  __asm__ __volatile__(
-#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16,_y)
-#define OC_J(_k,_y) OC_MEM_OFFS(((_k)-4)*16+8,_y)
-    /*Done with dequant, descramble, and partial transpose.
-      Now do the iDCT itself.*/
-    OC_ROW_IDCT_10(y,x)
-    OC_TRANSPOSE(y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16,_y)
-#define OC_J(_k,_y) OC_I(_k,_y)
-    OC_COLUMN_IDCT_10(y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16+8,_y)
-#define OC_J(_k,_y) OC_I(_k,_y)
-    OC_COLUMN_IDCT_10(y)
-#undef  OC_I
-#undef  OC_J
-    :[y]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_y,64)
-    :[x]"m"OC_CONST_ARRAY_OPERAND(ogg_int16_t,_x,64),
-     [c]"m"OC_CONST_ARRAY_OPERAND(ogg_int16_t,OC_IDCT_CONSTS,128)
-  );
-  if(_x!=_y){
-    __asm__ __volatile__(
-      "pxor %%mm0,%%mm0\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x00,x)"\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x10,x)"\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x20,x)"\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x30,x)"\n\t"
-      :[x]"+m"OC_ARRAY_OPERAND(ogg_int16_t,_x,28)
-    );
-  }
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.*/
-void oc_idct8x8_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
-  /*_last_zzi is subtly different from an actual count of the number of
-     coefficients we decoded for this block.
-    It contains the value of zzi BEFORE the final token in the block was
-     decoded.
-    In most cases this is an EOB token (the continuation of an EOB run from a
-     previous block counts), and so this is the same as the coefficient count.
-    However, in the case that the last token was NOT an EOB token, but filled
-     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
-    Provided the last token was not a pure zero run, the minimum value it can
-     be is 46, and so that doesn't affect any of the cases in this routine.
-    However, if the last token WAS a pure zero run of length 63, then _last_zzi
-     will be 1 while the number of coefficients decoded is 64.
-    Thus, we will trigger the following special case, where the real
-     coefficient count would not.
-    Note also that a zero run of length 64 will give _last_zzi a value of 0,
-     but we still process the DC coefficient, which might have a non-zero value
-     due to DC prediction.
-    Although convoluted, this is arguably the correct behavior: it allows us to
-     use a smaller transform when the block ends with a long zero run instead
-     of a normal EOB token.
-    It could be smarter... multiple separate zero runs at the end of a block
-     will fool it, but an encoder that generates these really deserves what it
-     gets.
-    Needless to say we inherited this approach from VP3.*/
-  /*Then perform the iDCT.*/
-  if(_last_zzi<=10)oc_idct8x8_10_mmx(_y,_x);
-  else oc_idct8x8_slow_mmx(_y,_x);
-}
-
-#endif
diff --git a/media/libtheora/lib/x86/mmxloop.h b/media/libtheora/lib/x86/mmxloop.h
deleted file mode 100644
index 1f6090b56..000000000
--- a/media/libtheora/lib/x86/mmxloop.h
+++ /dev/null
@@ -1,318 +0,0 @@
-#if !defined(_x86_mmxloop_H)
-# define _x86_mmxloop_H (1)
-# include <stddef.h>
-# include "x86int.h"
-
-#if defined(OC_X86_ASM)
-
-/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}.
-  On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and
-   mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/
-#define OC_LOOP_FILTER8_MMX \
-  "#OC_LOOP_FILTER8_MMX\n\t" \
-  /*mm7=0*/ \
-  "pxor %%mm7,%%mm7\n\t" \
-  /*mm6:mm0={a0,...,a7}*/ \
-  "movq %%mm0,%%mm6\n\t" \
-  "punpcklbw %%mm7,%%mm0\n\t" \
-  "punpckhbw %%mm7,%%mm6\n\t" \
-  /*mm3:mm5={d0,...,d7}*/ \
-  "movq %%mm3,%%mm5\n\t" \
-  "punpcklbw %%mm7,%%mm3\n\t" \
-  "punpckhbw %%mm7,%%mm5\n\t" \
-  /*mm6:mm0={a0-d0,...,a7-d7}*/ \
-  "psubw %%mm3,%%mm0\n\t" \
-  "psubw %%mm5,%%mm6\n\t" \
-  /*mm3:mm1={b0,...,b7}*/ \
-  "movq %%mm1,%%mm3\n\t" \
-  "punpcklbw %%mm7,%%mm1\n\t" \
-  "movq %%mm2,%%mm4\n\t" \
-  "punpckhbw %%mm7,%%mm3\n\t" \
-  /*mm5:mm4={c0,...,c7}*/ \
-  "movq %%mm2,%%mm5\n\t" \
-  "punpcklbw %%mm7,%%mm4\n\t" \
-  "punpckhbw %%mm7,%%mm5\n\t" \
-  /*mm7={3}x4 \
-    mm5:mm4={c0-b0,...,c7-b7}*/ \
-  "pcmpeqw %%mm7,%%mm7\n\t" \
-  "psubw %%mm1,%%mm4\n\t" \
-  "psrlw $14,%%mm7\n\t" \
-  "psubw %%mm3,%%mm5\n\t" \
-  /*Scale by 3.*/ \
-  "pmullw %%mm7,%%mm4\n\t" \
-  "pmullw %%mm7,%%mm5\n\t" \
-  /*mm7={4}x4 \
-    mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \
-  "psrlw $1,%%mm7\n\t" \
-  "paddw %%mm0,%%mm4\n\t" \
-  "psllw $2,%%mm7\n\t" \
-  "movq (%[ll]),%%mm0\n\t" \
-  "paddw %%mm6,%%mm5\n\t" \
-  /*R_i has the range [-127,128], so we compute -R_i instead. \
-    mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \
-  "psubw %%mm7,%%mm4\n\t" \
-  "psubw %%mm7,%%mm5\n\t" \
-  "psraw $3,%%mm4\n\t" \
-  "psraw $3,%%mm5\n\t" \
-  "pcmpeqb %%mm7,%%mm7\n\t" \
-  "packsswb %%mm5,%%mm4\n\t" \
-  "pxor %%mm6,%%mm6\n\t" \
-  "pxor %%mm7,%%mm4\n\t" \
-  "packuswb %%mm3,%%mm1\n\t" \
-  /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \
-  /*There's no unsigned byte+signed byte with unsigned saturation op code, so \
-     we have to split things by sign (the other option is to work in 16 bits, \
-     but working in 8 bits gives much better parallelism). \
-    We compute abs(R_i), but save a mask of which terms were negative in mm6. \
-    Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \
-    Finally, we split mm4 into positive and negative pieces using the mask in \
-     mm6, and add and subtract them as appropriate.*/ \
-  /*mm4=abs(-R_i)*/ \
-  /*mm7=255-2*L*/ \
-  "pcmpgtb %%mm4,%%mm6\n\t" \
-  "psubb %%mm0,%%mm7\n\t" \
-  "pxor %%mm6,%%mm4\n\t" \
-  "psubb %%mm0,%%mm7\n\t" \
-  "psubb %%mm6,%%mm4\n\t" \
-  /*mm7=255-max(2*L-abs(R_i),0)*/ \
-  "paddusb %%mm4,%%mm7\n\t" \
-  /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \
-  "paddusb %%mm7,%%mm4\n\t" \
-  "psubusb %%mm7,%%mm4\n\t" \
-  /*Now split mm4 by the original sign of -R_i.*/ \
-  "movq %%mm4,%%mm5\n\t" \
-  "pand %%mm6,%%mm4\n\t" \
-  "pandn %%mm5,%%mm6\n\t" \
-  /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \
-  /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \
-  "paddusb %%mm4,%%mm1\n\t" \
-  "psubusb %%mm4,%%mm2\n\t" \
-  "psubusb %%mm6,%%mm1\n\t" \
-  "paddusb %%mm6,%%mm2\n\t" \
-
-/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}.
-  On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and
-   mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}.
-  All other MMX registers are clobbered.*/
-#define OC_LOOP_FILTER8_MMXEXT \
-  "#OC_LOOP_FILTER8_MMXEXT\n\t" \
-  /*R_i=(a_i-3*b_i+3*c_i-d_i+4>>3) has the range [-127,128], so we compute \
-     -R_i=(-a_i+3*b_i-3*c_i+d_i+3>>3) instead.*/ \
-  /*This first part is based on the transformation \
-      f = -(3*(c-b)+a-d+4>>3) \
-        = -(3*(c+255-b)+(a+255-d)+4-1020>>3) \
-        = -(3*(c+~b)+(a+~d)-1016>>3) \
-        = 127-(3*(c+~b)+(a+~d)>>3) \
-        = 128+~(3*(c+~b)+(a+~d)>>3) (mod 256). \
-    Although pavgb(a,b) = (a+b+1>>1) (biased up), we rely heavily on the \
-     fact that ~pavgb(~a,~b) = (a+b>>1) (biased down). \
-    Using this, the last expression above can be computed in 8 bits of working \
-     precision via: \
-      u = ~pavgb(~b,c); \
-      v = pavgb(b,~c); \
-      This mask is 0 or 0xFF, and controls whether t is biased up or down: \
-      m = u-v; \
-      t = m^pavgb(m^~a,m^d); \
-      f = 128+pavgb(pavgb(t,u),v); \
-    This required some careful analysis to ensure that carries are propagated \
-     correctly in all cases, but has been checked exhaustively.*/ \
-  /*input (a, b, c, d, ., ., ., .)*/ \
-  /*ff=0xFF; \
-    u=b; \
-    v=c; \
-    ll=255-2*L;*/ \
-  "pcmpeqb %%mm7,%%mm7\n\t" \
-  "movq %%mm1,%%mm4\n\t" \
-  "movq %%mm2,%%mm5\n\t" \
-  "movq (%[ll]),%%mm6\n\t" \
-  /*allocated u, v, ll, ff: (a, b, c, d, u, v, ll, ff)*/ \
-  /*u^=ff; \
-    v^=ff;*/ \
-  "pxor %%mm7,%%mm4\n\t" \
-  "pxor %%mm7,%%mm5\n\t" \
-  /*allocated ll: (a, b, c, d, u, v, ll, ff)*/ \
-  /*u=pavgb(u,c); \
-    v=pavgb(v,b);*/ \
-  "pavgb %%mm2,%%mm4\n\t" \
-  "pavgb %%mm1,%%mm5\n\t" \
-  /*u^=ff; \
-    a^=ff;*/ \
-  "pxor %%mm7,%%mm4\n\t" \
-  "pxor %%mm7,%%mm0\n\t" \
-  /*m=u-v;*/ \
-  "psubb %%mm5,%%mm4\n\t" \
-  /*freed u, allocated m: (a, b, c, d, m, v, ll, ff)*/ \
-  /*a^=m; \
-    d^=m;*/ \
-  "pxor %%mm4,%%mm0\n\t" \
-  "pxor %%mm4,%%mm3\n\t" \
-  /*t=pavgb(a,d);*/ \
-  "pavgb %%mm3,%%mm0\n\t" \
-  "psllw $7,%%mm7\n\t" \
-  /*freed a, d, ff, allocated t, of: (t, b, c, ., m, v, ll, of)*/ \
-  /*t^=m; \
-    u=m+v;*/ \
-  "pxor %%mm4,%%mm0\n\t" \
-  "paddb %%mm5,%%mm4\n\t" \
-  /*freed t, m, allocated f, u: (f, b, c, ., u, v, ll, of)*/ \
-  /*f=pavgb(f,u); \
-    of=128;*/ \
-  "pavgb %%mm4,%%mm0\n\t" \
-  "packsswb %%mm7,%%mm7\n\t" \
-  /*freed u, ff, allocated ll: (f, b, c, ., ll, v, ll, of)*/ \
-  /*f=pavgb(f,v);*/ \
-  "pavgb %%mm5,%%mm0\n\t" \
-  "movq %%mm7,%%mm3\n\t" \
-  "movq %%mm6,%%mm4\n\t" \
-  /*freed v, allocated of: (f, b, c, of, ll, ., ll, of)*/ \
-  /*Now compute lflim of R_i=-(128+mm0) cf. Section 7.10 of the sepc.*/ \
-  /*There's no unsigned byte+signed byte with unsigned saturation op code, so \
-     we have to split things by sign (the other option is to work in 16 bits, \
-     but staying in 8 bits gives much better parallelism).*/ \
-  /*Instead of adding the offset of 128 in mm3, we use it to split mm0. \
-    This is the same number of instructions as computing a mask and splitting \
-     after the lflim computation, but has shorter dependency chains.*/ \
-  /*mm0=R_i<0?-R_i:0 (denoted abs(R_i<0))\
-    mm3=R_i>0?R_i:0* (denoted abs(R_i>0))*/ \
-  "psubusb %%mm0,%%mm3\n\t" \
-  "psubusb %%mm7,%%mm0\n\t" \
-  /*mm6=255-max(2*L-abs(R_i<0),0) \
-    mm4=255-max(2*L-abs(R_i>0),0)*/ \
-  "paddusb %%mm3,%%mm4\n\t" \
-  "paddusb %%mm0,%%mm6\n\t" \
-  /*mm0=min(abs(R_i<0),max(2*L-abs(R_i<0),0)) \
-    mm3=min(abs(R_i>0),max(2*L-abs(R_i>0),0))*/ \
-  "paddusb %%mm4,%%mm3\n\t" \
-  "paddusb %%mm6,%%mm0\n\t" \
-  "psubusb %%mm4,%%mm3\n\t" \
-  "psubusb %%mm6,%%mm0\n\t" \
-  /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \
-  /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \
-  "paddusb %%mm3,%%mm1\n\t" \
-  "psubusb %%mm3,%%mm2\n\t" \
-  "psubusb %%mm0,%%mm1\n\t" \
-  "paddusb %%mm0,%%mm2\n\t" \
-
-#define OC_LOOP_FILTER_V(_filter,_pix,_ystride,_ll) \
-  do{ \
-    ptrdiff_t ystride3__; \
-    __asm__ __volatile__( \
-      /*mm0={a0,...,a7}*/ \
-      "movq (%[pix]),%%mm0\n\t" \
-      /*ystride3=_ystride*3*/ \
-      "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
-      /*mm3={d0,...,d7}*/ \
-      "movq (%[pix],%[ystride3]),%%mm3\n\t" \
-      /*mm1={b0,...,b7}*/ \
-      "movq (%[pix],%[ystride]),%%mm1\n\t" \
-      /*mm2={c0,...,c7}*/ \
-      "movq (%[pix],%[ystride],2),%%mm2\n\t" \
-      _filter \
-      /*Write it back out.*/ \
-      "movq %%mm1,(%[pix],%[ystride])\n\t" \
-      "movq %%mm2,(%[pix],%[ystride],2)\n\t" \
-      :[ystride3]"=&r"(ystride3__) \
-      :[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \
-       [ll]"r"(_ll) \
-      :"memory" \
-    ); \
-  } \
-  while(0)
-
-#define OC_LOOP_FILTER_H(_filter,_pix,_ystride,_ll) \
-  do{ \
-    unsigned char *pix__; \
-    ptrdiff_t      ystride3__; \
-    ptrdiff_t      d__; \
-    pix__=(_pix)-2; \
-    __asm__ __volatile__( \
-      /*x x x x d0 c0 b0 a0*/ \
-      "movd (%[pix]),%%mm0\n\t" \
-      /*x x x x d1 c1 b1 a1*/ \
-      "movd (%[pix],%[ystride]),%%mm1\n\t" \
-      /*ystride3=_ystride*3*/ \
-      "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
-      /*x x x x d2 c2 b2 a2*/ \
-      "movd (%[pix],%[ystride],2),%%mm2\n\t" \
-      /*x x x x d3 c3 b3 a3*/ \
-      "lea (%[pix],%[ystride],4),%[d]\n\t" \
-      "movd (%[pix],%[ystride3]),%%mm3\n\t" \
-      /*x x x x d4 c4 b4 a4*/ \
-      "movd (%[d]),%%mm4\n\t" \
-      /*x x x x d5 c5 b5 a5*/ \
-      "movd (%[d],%[ystride]),%%mm5\n\t" \
-      /*x x x x d6 c6 b6 a6*/ \
-      "movd (%[d],%[ystride],2),%%mm6\n\t" \
-      /*x x x x d7 c7 b7 a7*/ \
-      "movd (%[d],%[ystride3]),%%mm7\n\t" \
-      /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \
-      "punpcklbw %%mm1,%%mm0\n\t" \
-      /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \
-      "punpcklbw %%mm3,%%mm2\n\t" \
-      /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \
-      "movq %%mm0,%%mm3\n\t" \
-      /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \
-      "punpcklwd %%mm2,%%mm0\n\t" \
-      /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \
-      "punpckhwd %%mm2,%%mm3\n\t" \
-      /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \
-      "movq %%mm0,%%mm1\n\t" \
-      /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \
-      "punpcklbw %%mm5,%%mm4\n\t" \
-      /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \
-      "punpcklbw %%mm7,%%mm6\n\t" \
-      /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \
-      "movq %%mm4,%%mm5\n\t" \
-      /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \
-      "punpcklwd %%mm6,%%mm4\n\t" \
-      /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \
-      "punpckhwd %%mm6,%%mm5\n\t" \
-      /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \
-      "movq %%mm3,%%mm2\n\t" \
-      /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \
-      "punpckldq %%mm4,%%mm0\n\t" \
-      /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \
-      "punpckhdq %%mm4,%%mm1\n\t" \
-      /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \
-      "punpckldq %%mm5,%%mm2\n\t" \
-      /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \
-      "punpckhdq %%mm5,%%mm3\n\t" \
-      _filter \
-      /*mm2={b0+R_0'',...,b7+R_7''}*/ \
-      "movq %%mm1,%%mm0\n\t" \
-      /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \
-      "punpcklbw %%mm2,%%mm1\n\t" \
-      /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \
-      "punpckhbw %%mm2,%%mm0\n\t" \
-      /*[d]=c1 b1 c0 b0*/ \
-      "movd %%mm1,%[d]\n\t" \
-      "movw %w[d],1(%[pix])\n\t" \
-      "psrlq $32,%%mm1\n\t" \
-      "shr $16,%[d]\n\t" \
-      "movw %w[d],1(%[pix],%[ystride])\n\t" \
-      /*[d]=c3 b3 c2 b2*/ \
-      "movd %%mm1,%[d]\n\t" \
-      "movw %w[d],1(%[pix],%[ystride],2)\n\t" \
-      "shr $16,%[d]\n\t" \
-      "movw %w[d],1(%[pix],%[ystride3])\n\t" \
-      "lea (%[pix],%[ystride],4),%[pix]\n\t" \
-      /*[d]=c5 b5 c4 b4*/ \
-      "movd %%mm0,%[d]\n\t" \
-      "movw %w[d],1(%[pix])\n\t" \
-      "psrlq $32,%%mm0\n\t" \
-      "shr $16,%[d]\n\t" \
-      "movw %w[d],1(%[pix],%[ystride])\n\t" \
-      /*[d]=c7 b7 c6 b6*/ \
-      "movd %%mm0,%[d]\n\t" \
-      "movw %w[d],1(%[pix],%[ystride],2)\n\t" \
-      "shr $16,%[d]\n\t" \
-      "movw %w[d],1(%[pix],%[ystride3])\n\t" \
-      :[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \
-      :[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \
-      :"memory" \
-    ); \
-  } \
-  while(0)
-
-# endif
-#endif
diff --git a/media/libtheora/lib/x86/mmxstate.c b/media/libtheora/lib/x86/mmxstate.c
deleted file mode 100644
index 0b9586f94..000000000
--- a/media/libtheora/lib/x86/mmxstate.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of complete fragment reconstruction algorithm.
-  Originally written by Rudolf Marek.*/
-#include <string.h>
-#include "x86int.h"
-#include "mmxloop.h"
-
-#if defined(OC_X86_ASM)
-
-void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
-  unsigned char *dst;
-  ptrdiff_t      frag_buf_off;
-  int            ystride;
-  int            refi;
-  /*Apply the inverse transform.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    /*Note that this value must be unsigned, to keep the __asm__ block from
-       sign-extending it when it puts it in a register.*/
-    ogg_uint16_t p;
-    int          i;
-    /*We round this dequant product (and not any of the others) because there's
-       no iDCT rounding.*/
-    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
-    /*Fill _dct_coeffs with p.*/
-    __asm__ __volatile__(
-      /*mm0=0000 0000 0000 AAAA*/
-      "movd %[p],%%mm0\n\t"
-      /*mm0=0000 0000 AAAA AAAA*/
-      "punpcklwd %%mm0,%%mm0\n\t"
-      /*mm0=AAAA AAAA AAAA AAAA*/
-      "punpckldq %%mm0,%%mm0\n\t"
-      :
-      :[p]"r"((unsigned)p)
-    );
-    for(i=0;i<4;i++){
-      __asm__ __volatile__(
-        "movq %%mm0,"OC_MEM_OFFS(0x00,y)"\n\t"
-        "movq %%mm0,"OC_MEM_OFFS(0x08,y)"\n\t"
-        "movq %%mm0,"OC_MEM_OFFS(0x10,y)"\n\t"
-        "movq %%mm0,"OC_MEM_OFFS(0x18,y)"\n\t"
-        :[y]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_dct_coeffs+64+16*i,16)
-      );
-    }
-  }
-  else{
-    /*Dequantize the DC coefficient.*/
-    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
-  }
-  /*Fill in the target buffer.*/
-  frag_buf_off=_state->frag_buf_offs[_fragi];
-  refi=_state->frags[_fragi].refi;
-  ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
-  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
-  else{
-    const unsigned char *ref;
-    int                  mvoffsets[2];
-    ref=_state->ref_frame_data[refi]+frag_buf_off;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi])>1){
-      oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
-       _dct_coeffs+64);
-    }
-    else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
-  }
-}
-
-/*We copy these entire function to inline the actual MMX routines so that we
-   use only a single indirect call.*/
-
-void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
-  memset(_bv,_flimit,8);
-}
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
-  The filter may be run on the bottom edge, affecting pixels in the next row of
-   fragments, so this row also needs to be available.
-  _bv:        The bounding values array.
-  _refi:      The index of the frame buffer to filter.
-  _pli:       The color plane to filter.
-  _fragy0:    The Y coordinate of the first fragment row to filter.
-  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
-  OC_ALIGN8(unsigned char   ll[8]);
-  const oc_fragment_plane *fplane;
-  const oc_fragment       *frags;
-  const ptrdiff_t         *frag_buf_offs;
-  unsigned char           *ref_frame_data;
-  ptrdiff_t                fragi_top;
-  ptrdiff_t                fragi_bot;
-  ptrdiff_t                fragi0;
-  ptrdiff_t                fragi0_end;
-  int                      ystride;
-  int                      nhfrags;
-  memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
-  fplane=_state->fplanes+_pli;
-  nhfrags=fplane->nhfrags;
-  fragi_top=fplane->froffset;
-  fragi_bot=fragi_top+fplane->nfrags;
-  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
-  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
-  ystride=_state->ref_ystride[_pli];
-  frags=_state->frags;
-  frag_buf_offs=_state->frag_buf_offs;
-  ref_frame_data=_state->ref_frame_data[_refi];
-  /*The following loops are constructed somewhat non-intuitively on purpose.
-    The main idea is: if a block boundary has at least one coded fragment on
-     it, the filter is applied to it.
-    However, the order that the filters are applied in matters, and VP3 chose
-     the somewhat strange ordering used below.*/
-  while(fragi0<fragi0_end){
-    ptrdiff_t fragi;
-    ptrdiff_t fragi_end;
-    fragi=fragi0;
-    fragi_end=fragi+nhfrags;
-    while(fragi<fragi_end){
-      if(frags[fragi].coded){
-        unsigned char *ref;
-        ref=ref_frame_data+frag_buf_offs[fragi];
-        if(fragi>fragi0){
-          OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
-        }
-        if(fragi0>fragi_top){
-          OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
-        }
-        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
-          OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref+8,ystride,ll);
-        }
-        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
-          OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref+(ystride<<3),ystride,ll);
-        }
-      }
-      fragi++;
-    }
-    fragi0+=nhfrags;
-  }
-}
-
-void oc_loop_filter_init_mmxext(signed char _bv[256],int _flimit){
-  memset(_bv,~(_flimit<<1),8);
-}
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
-  The filter may be run on the bottom edge, affecting pixels in the next row of
-   fragments, so this row also needs to be available.
-  _bv:        The bounding values array.
-  _refi:      The index of the frame buffer to filter.
-  _pli:       The color plane to filter.
-  _fragy0:    The Y coordinate of the first fragment row to filter.
-  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmxext(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
-  const oc_fragment_plane *fplane;
-  const oc_fragment       *frags;
-  const ptrdiff_t         *frag_buf_offs;
-  unsigned char           *ref_frame_data;
-  ptrdiff_t                fragi_top;
-  ptrdiff_t                fragi_bot;
-  ptrdiff_t                fragi0;
-  ptrdiff_t                fragi0_end;
-  int                      ystride;
-  int                      nhfrags;
-  fplane=_state->fplanes+_pli;
-  nhfrags=fplane->nhfrags;
-  fragi_top=fplane->froffset;
-  fragi_bot=fragi_top+fplane->nfrags;
-  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
-  fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
-  ystride=_state->ref_ystride[_pli];
-  frags=_state->frags;
-  frag_buf_offs=_state->frag_buf_offs;
-  ref_frame_data=_state->ref_frame_data[_refi];
-  /*The following loops are constructed somewhat non-intuitively on purpose.
-    The main idea is: if a block boundary has at least one coded fragment on
-     it, the filter is applied to it.
-    However, the order that the filters are applied in matters, and VP3 chose
-     the somewhat strange ordering used below.*/
-  while(fragi0<fragi0_end){
-    ptrdiff_t fragi;
-    ptrdiff_t fragi_end;
-    fragi=fragi0;
-    fragi_end=fragi+nhfrags;
-    while(fragi<fragi_end){
-      if(frags[fragi].coded){
-        unsigned char *ref;
-        ref=ref_frame_data+frag_buf_offs[fragi];
-        if(fragi>fragi0){
-          OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
-        }
-        if(fragi0>fragi_top){
-          OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
-        }
-        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
-          OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref+8,ystride,_bv);
-        }
-        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
-          OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref+(ystride<<3),ystride,_bv);
-        }
-      }
-      fragi++;
-    }
-    fragi0+=nhfrags;
-  }
-}
-
-#endif
diff --git a/media/libtheora/lib/x86/sse2idct.c b/media/libtheora/lib/x86/sse2idct.c
deleted file mode 100644
index 5f8523fa5..000000000
--- a/media/libtheora/lib/x86/sse2idct.c
+++ /dev/null
@@ -1,460 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $
-
- ********************************************************************/
-
-/*SSE2 acceleration of Theora's iDCT.*/
-#include "x86int.h"
-#include "sse2trans.h"
-#include "../dct.h"
-
-#if defined(OC_X86_ASM)
-
-/*A table of constants used by the MMX routines.*/
-const unsigned short __attribute__((aligned(16),used)) OC_IDCT_CONSTS[64]={
-        8,      8,      8,      8,      8,      8,      8,      8,
-  OC_C1S7,OC_C1S7,OC_C1S7,OC_C1S7,OC_C1S7,OC_C1S7,OC_C1S7,OC_C1S7,
-  OC_C2S6,OC_C2S6,OC_C2S6,OC_C2S6,OC_C2S6,OC_C2S6,OC_C2S6,OC_C2S6,
-  OC_C3S5,OC_C3S5,OC_C3S5,OC_C3S5,OC_C3S5,OC_C3S5,OC_C3S5,OC_C3S5,
-  OC_C4S4,OC_C4S4,OC_C4S4,OC_C4S4,OC_C4S4,OC_C4S4,OC_C4S4,OC_C4S4,
-  OC_C5S3,OC_C5S3,OC_C5S3,OC_C5S3,OC_C5S3,OC_C5S3,OC_C5S3,OC_C5S3,
-  OC_C6S2,OC_C6S2,OC_C6S2,OC_C6S2,OC_C6S2,OC_C6S2,OC_C6S2,OC_C6S2,
-  OC_C7S1,OC_C7S1,OC_C7S1,OC_C7S1,OC_C7S1,OC_C7S1,OC_C7S1,OC_C7S1
-};
-
-
-/*Performs the first three stages of the iDCT.
-  xmm2, xmm6, xmm3, and xmm5 must contain the corresponding rows of the input
-   (accessed in that order).
-  The remaining rows must be in _x at their corresponding locations.
-  On output, xmm7 down to xmm4 contain rows 0 through 3, and xmm0 up to xmm3
-   contain rows 4 through 7.*/
-#define OC_IDCT_8x8_ABC(_x) \
-  "#OC_IDCT_8x8_ABC\n\t" \
-  /*Stage 1:*/ \
-  /*2-3 rotation by 6pi/16. \
-    xmm4=xmm7=C6, xmm0=xmm1=C2, xmm2=X2, xmm6=X6.*/ \
-  "movdqa "OC_MEM_OFFS(0x20,c)",%%xmm1\n\t" \
-  "movdqa "OC_MEM_OFFS(0x60,c)",%%xmm4\n\t" \
-  "movdqa %%xmm1,%%xmm0\n\t" \
-  "pmulhw %%xmm2,%%xmm1\n\t" \
-  "movdqa %%xmm4,%%xmm7\n\t" \
-  "pmulhw %%xmm6,%%xmm0\n\t" \
-  "pmulhw %%xmm2,%%xmm7\n\t" \
-  "pmulhw %%xmm6,%%xmm4\n\t" \
-  "paddw %%xmm6,%%xmm0\n\t" \
-  "movdqa "OC_MEM_OFFS(0x30,c)",%%xmm6\n\t" \
-  "paddw %%xmm1,%%xmm2\n\t" \
-  "psubw %%xmm0,%%xmm7\n\t" \
-  "movdqa %%xmm7,"OC_MEM_OFFS(0x00,buf)"\n\t" \
-  "paddw %%xmm4,%%xmm2\n\t" \
-  "movdqa "OC_MEM_OFFS(0x50,c)",%%xmm4\n\t" \
-  "movdqa %%xmm2,"OC_MEM_OFFS(0x10,buf)"\n\t" \
-  /*5-6 rotation by 3pi/16. \
-    xmm4=xmm2=C5, xmm1=xmm6=C3, xmm3=X3, xmm5=X5.*/ \
-  "movdqa %%xmm4,%%xmm2\n\t" \
-  "movdqa %%xmm6,%%xmm1\n\t" \
-  "pmulhw %%xmm3,%%xmm4\n\t" \
-  "pmulhw %%xmm5,%%xmm1\n\t" \
-  "pmulhw %%xmm3,%%xmm6\n\t" \
-  "pmulhw %%xmm5,%%xmm2\n\t" \
-  "paddw %%xmm3,%%xmm4\n\t" \
-  "paddw %%xmm5,%%xmm3\n\t" \
-  "paddw %%xmm6,%%xmm3\n\t" \
-  "movdqa "OC_MEM_OFFS(0x70,_x)",%%xmm6\n\t" \
-  "paddw %%xmm5,%%xmm1\n\t" \
-  "movdqa "OC_MEM_OFFS(0x10,_x)",%%xmm5\n\t" \
-  "paddw %%xmm3,%%xmm2\n\t" \
-  "movdqa "OC_MEM_OFFS(0x70,c)",%%xmm3\n\t" \
-  "psubw %%xmm4,%%xmm1\n\t" \
-  "movdqa "OC_MEM_OFFS(0x10,c)",%%xmm4\n\t" \
-  /*4-7 rotation by 7pi/16. \
-    xmm4=xmm7=C1, xmm3=xmm0=C7, xmm5=X1, xmm6=X7.*/ \
-  "movdqa %%xmm3,%%xmm0\n\t" \
-  "movdqa %%xmm4,%%xmm7\n\t" \
-  "pmulhw %%xmm5,%%xmm3\n\t" \
-  "pmulhw %%xmm5,%%xmm7\n\t" \
-  "pmulhw %%xmm6,%%xmm4\n\t" \
-  "pmulhw %%xmm6,%%xmm0\n\t" \
-  "paddw %%xmm6,%%xmm4\n\t" \
-  "movdqa "OC_MEM_OFFS(0x40,_x)",%%xmm6\n\t" \
-  "paddw %%xmm5,%%xmm7\n\t" \
-  "psubw %%xmm4,%%xmm3\n\t" \
-  "movdqa "OC_MEM_OFFS(0x40,c)",%%xmm4\n\t" \
-  "paddw %%xmm7,%%xmm0\n\t" \
-  "movdqa "OC_MEM_OFFS(0x00,_x)",%%xmm7\n\t" \
-  /*0-1 butterfly. \
-    xmm4=xmm5=C4, xmm7=X0, xmm6=X4.*/ \
-  "paddw %%xmm7,%%xmm6\n\t" \
-  "movdqa %%xmm4,%%xmm5\n\t" \
-  "pmulhw %%xmm6,%%xmm4\n\t" \
-  "paddw %%xmm7,%%xmm7\n\t" \
-  "psubw %%xmm6,%%xmm7\n\t" \
-  "paddw %%xmm6,%%xmm4\n\t" \
-  /*Stage 2:*/ \
-  /*4-5 butterfly: xmm3=t[4], xmm1=t[5] \
-    7-6 butterfly: xmm2=t[6], xmm0=t[7]*/ \
-  "movdqa %%xmm3,%%xmm6\n\t" \
-  "paddw %%xmm1,%%xmm3\n\t" \
-  "psubw %%xmm1,%%xmm6\n\t" \
-  "movdqa %%xmm5,%%xmm1\n\t" \
-  "pmulhw %%xmm7,%%xmm5\n\t" \
-  "paddw %%xmm7,%%xmm5\n\t" \
-  "movdqa %%xmm0,%%xmm7\n\t" \
-  "paddw %%xmm2,%%xmm0\n\t" \
-  "psubw %%xmm2,%%xmm7\n\t" \
-  "movdqa %%xmm1,%%xmm2\n\t" \
-  "pmulhw %%xmm6,%%xmm1\n\t" \
-  "pmulhw %%xmm7,%%xmm2\n\t" \
-  "paddw %%xmm6,%%xmm1\n\t" \
-  "movdqa "OC_MEM_OFFS(0x00,buf)",%%xmm6\n\t" \
-  "paddw %%xmm7,%%xmm2\n\t" \
-  "movdqa "OC_MEM_OFFS(0x10,buf)",%%xmm7\n\t" \
-  /*Stage 3: \
-    6-5 butterfly: xmm1=t[5], xmm2=t[6] -> xmm1=t[6]+t[5], xmm2=t[6]-t[5] \
-    0-3 butterfly: xmm4=t[0], xmm7=t[3] -> xmm7=t[0]+t[3], xmm4=t[0]-t[3] \
-    1-2 butterfly: xmm5=t[1], xmm6=t[2] -> xmm6=t[1]+t[2], xmm5=t[1]-t[2]*/ \
-  "paddw %%xmm2,%%xmm1\n\t" \
-  "paddw %%xmm5,%%xmm6\n\t" \
-  "paddw %%xmm4,%%xmm7\n\t" \
-  "paddw %%xmm2,%%xmm2\n\t" \
-  "paddw %%xmm4,%%xmm4\n\t" \
-  "paddw %%xmm5,%%xmm5\n\t" \
-  "psubw %%xmm1,%%xmm2\n\t" \
-  "psubw %%xmm7,%%xmm4\n\t" \
-  "psubw %%xmm6,%%xmm5\n\t" \
-
-/*Performs the last stage of the iDCT.
-  On input, xmm7 down to xmm4 contain rows 0 through 3, and xmm0 up to xmm3
-   contain rows 4 through 7.
-  On output, xmm0 through xmm7 contain the corresponding rows.*/
-#define OC_IDCT_8x8_D \
-  "#OC_IDCT_8x8_D\n\t" \
-  /*Stage 4: \
-    0-7 butterfly: xmm7=t[0], xmm0=t[7] -> xmm0=t[0]+t[7], xmm7=t[0]-t[7] \
-    1-6 butterfly: xmm6=t[1], xmm1=t[6] -> xmm1=t[1]+t[6], xmm6=t[1]-t[6] \
-    2-5 butterfly: xmm5=t[2], xmm2=t[5] -> xmm2=t[2]+t[5], xmm5=t[2]-t[5] \
-    3-4 butterfly: xmm4=t[3], xmm3=t[4] -> xmm3=t[3]+t[4], xmm4=t[3]-t[4]*/ \
-  "psubw %%xmm0,%%xmm7\n\t" \
-  "psubw %%xmm1,%%xmm6\n\t" \
-  "psubw %%xmm2,%%xmm5\n\t" \
-  "psubw %%xmm3,%%xmm4\n\t" \
-  "paddw %%xmm0,%%xmm0\n\t" \
-  "paddw %%xmm1,%%xmm1\n\t" \
-  "paddw %%xmm2,%%xmm2\n\t" \
-  "paddw %%xmm3,%%xmm3\n\t" \
-  "paddw %%xmm7,%%xmm0\n\t" \
-  "paddw %%xmm6,%%xmm1\n\t" \
-  "paddw %%xmm5,%%xmm2\n\t" \
-  "paddw %%xmm4,%%xmm3\n\t" \
-
-/*Performs the last stage of the iDCT.
-  On input, xmm7 down to xmm4 contain rows 0 through 3, and xmm0 up to xmm3
-   contain rows 4 through 7.
-  On output, xmm0 through xmm7 contain the corresponding rows.*/
-#define OC_IDCT_8x8_D_STORE \
-  "#OC_IDCT_8x8_D_STORE\n\t" \
-  /*Stage 4: \
-    0-7 butterfly: xmm7=t[0], xmm0=t[7] -> xmm0=t[0]+t[7], xmm7=t[0]-t[7] \
-    1-6 butterfly: xmm6=t[1], xmm1=t[6] -> xmm1=t[1]+t[6], xmm6=t[1]-t[6] \
-    2-5 butterfly: xmm5=t[2], xmm2=t[5] -> xmm2=t[2]+t[5], xmm5=t[2]-t[5] \
-    3-4 butterfly: xmm4=t[3], xmm3=t[4] -> xmm3=t[3]+t[4], xmm4=t[3]-t[4]*/ \
-  "psubw %%xmm3,%%xmm4\n\t" \
-  "movdqa %%xmm4,"OC_MEM_OFFS(0x40,y)"\n\t" \
-  "movdqa "OC_MEM_OFFS(0x00,c)",%%xmm4\n\t" \
-  "psubw %%xmm0,%%xmm7\n\t" \
-  "psubw %%xmm1,%%xmm6\n\t" \
-  "psubw %%xmm2,%%xmm5\n\t" \
-  "paddw %%xmm4,%%xmm7\n\t" \
-  "paddw %%xmm4,%%xmm6\n\t" \
-  "paddw %%xmm4,%%xmm5\n\t" \
-  "paddw "OC_MEM_OFFS(0x40,y)",%%xmm4\n\t" \
-  "paddw %%xmm0,%%xmm0\n\t" \
-  "paddw %%xmm1,%%xmm1\n\t" \
-  "paddw %%xmm2,%%xmm2\n\t" \
-  "paddw %%xmm3,%%xmm3\n\t" \
-  "paddw %%xmm7,%%xmm0\n\t" \
-  "paddw %%xmm6,%%xmm1\n\t" \
-  "psraw $4,%%xmm0\n\t" \
-  "paddw %%xmm5,%%xmm2\n\t" \
-  "movdqa %%xmm0,"OC_MEM_OFFS(0x00,y)"\n\t" \
-  "psraw $4,%%xmm1\n\t" \
-  "paddw %%xmm4,%%xmm3\n\t" \
-  "movdqa %%xmm1,"OC_MEM_OFFS(0x10,y)"\n\t" \
-  "psraw $4,%%xmm2\n\t" \
-  "movdqa %%xmm2,"OC_MEM_OFFS(0x20,y)"\n\t" \
-  "psraw $4,%%xmm3\n\t" \
-  "movdqa %%xmm3,"OC_MEM_OFFS(0x30,y)"\n\t" \
-  "psraw $4,%%xmm4\n\t" \
-  "movdqa %%xmm4,"OC_MEM_OFFS(0x40,y)"\n\t" \
-  "psraw $4,%%xmm5\n\t" \
-  "movdqa %%xmm5,"OC_MEM_OFFS(0x50,y)"\n\t" \
-  "psraw $4,%%xmm6\n\t" \
-  "movdqa %%xmm6,"OC_MEM_OFFS(0x60,y)"\n\t" \
-  "psraw $4,%%xmm7\n\t" \
-  "movdqa %%xmm7,"OC_MEM_OFFS(0x70,y)"\n\t" \
-
-static void oc_idct8x8_slow_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  OC_ALIGN16(ogg_int16_t buf[16]);
-  /*This routine accepts an 8x8 matrix pre-transposed.*/
-  __asm__ __volatile__(
-    /*Load rows 2, 3, 5, and 6 for the first stage of the iDCT.*/
-    "movdqa "OC_MEM_OFFS(0x20,x)",%%xmm2\n\t"
-    "movdqa "OC_MEM_OFFS(0x60,x)",%%xmm6\n\t"
-    "movdqa "OC_MEM_OFFS(0x30,x)",%%xmm3\n\t"
-    "movdqa "OC_MEM_OFFS(0x50,x)",%%xmm5\n\t"
-    OC_IDCT_8x8_ABC(x)
-    OC_IDCT_8x8_D
-    OC_TRANSPOSE_8x8
-    /*Clear out rows 0, 1, 4, and 7 for the first stage of the iDCT.*/
-    "movdqa %%xmm7,"OC_MEM_OFFS(0x70,y)"\n\t"
-    "movdqa %%xmm4,"OC_MEM_OFFS(0x40,y)"\n\t"
-    "movdqa %%xmm1,"OC_MEM_OFFS(0x10,y)"\n\t"
-    "movdqa %%xmm0,"OC_MEM_OFFS(0x00,y)"\n\t"
-    OC_IDCT_8x8_ABC(y)
-    OC_IDCT_8x8_D_STORE
-    :[buf]"=m"(OC_ARRAY_OPERAND(ogg_int16_t,buf,16)),
-     [y]"=m"(OC_ARRAY_OPERAND(ogg_int16_t,_y,64))
-    :[x]"m"(OC_CONST_ARRAY_OPERAND(ogg_int16_t,_x,64)),
-     [c]"m"(OC_CONST_ARRAY_OPERAND(ogg_int16_t,OC_IDCT_CONSTS,128))
-  );
-  if(_x!=_y){
-    int i;
-    __asm__ __volatile__("pxor %%xmm0,%%xmm0\n\t"::);
-    /*Clear input data for next block (decoder only).*/
-    for(i=0;i<2;i++){
-      __asm__ __volatile__(
-        "movdqa %%xmm0,"OC_MEM_OFFS(0x00,x)"\n\t"
-        "movdqa %%xmm0,"OC_MEM_OFFS(0x10,x)"\n\t"
-        "movdqa %%xmm0,"OC_MEM_OFFS(0x20,x)"\n\t"
-        "movdqa %%xmm0,"OC_MEM_OFFS(0x30,x)"\n\t"
-        :[x]"=m"(OC_ARRAY_OPERAND(ogg_int16_t,_x+i*32,32))
-      );
-    }
-  }
-}
-
-/*For the first step of the 10-coefficient version of the 8x8 iDCT, we only
-   need to work with four columns at a time.
-  Doing this in MMX is faster on processors with a 64-bit data path.*/
-#define OC_IDCT_8x8_10_MMX \
-  "#OC_IDCT_8x8_10_MMX\n\t" \
-  /*Stage 1:*/ \
-  /*2-3 rotation by 6pi/16. \
-    mm7=C6, mm6=C2, mm2=X2, X6=0.*/ \
-  "movq "OC_MEM_OFFS(0x60,c)",%%mm7\n\t" \
-  "movq "OC_MEM_OFFS(0x20,c)",%%mm6\n\t" \
-  "pmulhw %%mm2,%%mm6\n\t" \
-  "pmulhw %%mm2,%%mm7\n\t" \
-  "movq "OC_MEM_OFFS(0x50,c)",%%mm5\n\t" \
-  "paddw %%mm6,%%mm2\n\t" \
-  "movq %%mm2,"OC_MEM_OFFS(0x10,buf)"\n\t" \
-  "movq "OC_MEM_OFFS(0x30,c)",%%mm2\n\t" \
-  "movq %%mm7,"OC_MEM_OFFS(0x00,buf)"\n\t" \
-  /*5-6 rotation by 3pi/16. \
-    mm5=C5, mm2=C3, mm3=X3, X5=0.*/ \
-  "pmulhw %%mm3,%%mm5\n\t" \
-  "pmulhw %%mm3,%%mm2\n\t" \
-  "movq "OC_MEM_OFFS(0x10,c)",%%mm7\n\t" \
-  "paddw %%mm3,%%mm5\n\t" \
-  "paddw %%mm3,%%mm2\n\t" \
-  "movq "OC_MEM_OFFS(0x70,c)",%%mm3\n\t" \
-  /*4-7 rotation by 7pi/16. \
-    mm7=C1, mm3=C7, mm1=X1, X7=0.*/ \
-  "pmulhw %%mm1,%%mm3\n\t" \
-  "pmulhw %%mm1,%%mm7\n\t" \
-  "movq "OC_MEM_OFFS(0x40,c)",%%mm4\n\t" \
-  "movq %%mm3,%%mm6\n\t" \
-  "paddw %%mm1,%%mm7\n\t" \
-  /*0-1 butterfly. \
-    mm4=C4, mm0=X0, X4=0.*/ \
-  /*Stage 2:*/ \
-  /*4-5 butterfly: mm3=t[4], mm5=t[5] \
-    7-6 butterfly: mm2=t[6], mm7=t[7]*/ \
-  "psubw %%mm5,%%mm3\n\t" \
-  "paddw %%mm5,%%mm6\n\t" \
-  "movq %%mm4,%%mm1\n\t" \
-  "pmulhw %%mm0,%%mm4\n\t" \
-  "paddw %%mm0,%%mm4\n\t" \
-  "movq %%mm7,%%mm0\n\t" \
-  "movq %%mm4,%%mm5\n\t" \
-  "paddw %%mm2,%%mm0\n\t" \
-  "psubw %%mm2,%%mm7\n\t" \
-  "movq %%mm1,%%mm2\n\t" \
-  "pmulhw %%mm6,%%mm1\n\t" \
-  "pmulhw %%mm7,%%mm2\n\t" \
-  "paddw %%mm6,%%mm1\n\t" \
-  "movq "OC_MEM_OFFS(0x00,buf)",%%mm6\n\t" \
-  "paddw %%mm7,%%mm2\n\t" \
-  "movq "OC_MEM_OFFS(0x10,buf)",%%mm7\n\t" \
-  /*Stage 3: \
-    6-5 butterfly: mm1=t[5], mm2=t[6] -> mm1=t[6]+t[5], mm2=t[6]-t[5] \
-    0-3 butterfly: mm4=t[0], mm7=t[3] -> mm7=t[0]+t[3], mm4=t[0]-t[3] \
-    1-2 butterfly: mm5=t[1], mm6=t[2] -> mm6=t[1]+t[2], mm5=t[1]-t[2]*/ \
-  "paddw %%mm2,%%mm1\n\t" \
-  "paddw %%mm5,%%mm6\n\t" \
-  "paddw %%mm4,%%mm7\n\t" \
-  "paddw %%mm2,%%mm2\n\t" \
-  "paddw %%mm4,%%mm4\n\t" \
-  "paddw %%mm5,%%mm5\n\t" \
-  "psubw %%mm1,%%mm2\n\t" \
-  "psubw %%mm7,%%mm4\n\t" \
-  "psubw %%mm6,%%mm5\n\t" \
-  /*Stage 4: \
-    0-7 butterfly: mm7=t[0], mm0=t[7] -> mm0=t[0]+t[7], mm7=t[0]-t[7] \
-    1-6 butterfly: mm6=t[1], mm1=t[6] -> mm1=t[1]+t[6], mm6=t[1]-t[6] \
-    2-5 butterfly: mm5=t[2], mm2=t[5] -> mm2=t[2]+t[5], mm5=t[2]-t[5] \
-    3-4 butterfly: mm4=t[3], mm3=t[4] -> mm3=t[3]+t[4], mm4=t[3]-t[4]*/ \
-  "psubw %%mm0,%%mm7\n\t" \
-  "psubw %%mm1,%%mm6\n\t" \
-  "psubw %%mm2,%%mm5\n\t" \
-  "psubw %%mm3,%%mm4\n\t" \
-  "paddw %%mm0,%%mm0\n\t" \
-  "paddw %%mm1,%%mm1\n\t" \
-  "paddw %%mm2,%%mm2\n\t" \
-  "paddw %%mm3,%%mm3\n\t" \
-  "paddw %%mm7,%%mm0\n\t" \
-  "paddw %%mm6,%%mm1\n\t" \
-  "paddw %%mm5,%%mm2\n\t" \
-  "paddw %%mm4,%%mm3\n\t" \
-
-#define OC_IDCT_8x8_10_ABC \
-  "#OC_IDCT_8x8_10_ABC\n\t" \
-  /*Stage 1:*/ \
-  /*2-3 rotation by 6pi/16. \
-    xmm7=C6, xmm6=C2, xmm2=X2, X6=0.*/ \
-  "movdqa "OC_MEM_OFFS(0x60,c)",%%xmm7\n\t" \
-  "movdqa "OC_MEM_OFFS(0x20,c)",%%xmm6\n\t" \
-  "pmulhw %%xmm2,%%xmm6\n\t" \
-  "pmulhw %%xmm2,%%xmm7\n\t" \
-  "movdqa "OC_MEM_OFFS(0x50,c)",%%xmm5\n\t" \
-  "paddw %%xmm6,%%xmm2\n\t" \
-  "movdqa %%xmm2,"OC_MEM_OFFS(0x10,buf)"\n\t" \
-  "movdqa "OC_MEM_OFFS(0x30,c)",%%xmm2\n\t" \
-  "movdqa %%xmm7,"OC_MEM_OFFS(0x00,buf)"\n\t" \
-  /*5-6 rotation by 3pi/16. \
-    xmm5=C5, xmm2=C3, xmm3=X3, X5=0.*/ \
-  "pmulhw %%xmm3,%%xmm5\n\t" \
-  "pmulhw %%xmm3,%%xmm2\n\t" \
-  "movdqa "OC_MEM_OFFS(0x10,c)",%%xmm7\n\t" \
-  "paddw %%xmm3,%%xmm5\n\t" \
-  "paddw %%xmm3,%%xmm2\n\t" \
-  "movdqa "OC_MEM_OFFS(0x70,c)",%%xmm3\n\t" \
-  /*4-7 rotation by 7pi/16. \
-    xmm7=C1, xmm3=C7, xmm1=X1, X7=0.*/ \
-  "pmulhw %%xmm1,%%xmm3\n\t" \
-  "pmulhw %%xmm1,%%xmm7\n\t" \
-  "movdqa "OC_MEM_OFFS(0x40,c)",%%xmm4\n\t" \
-  "movdqa %%xmm3,%%xmm6\n\t" \
-  "paddw %%xmm1,%%xmm7\n\t" \
-  /*0-1 butterfly. \
-    xmm4=C4, xmm0=X0, X4=0.*/ \
-  /*Stage 2:*/ \
-  /*4-5 butterfly: xmm3=t[4], xmm5=t[5] \
-    7-6 butterfly: xmm2=t[6], xmm7=t[7]*/ \
-  "psubw %%xmm5,%%xmm3\n\t" \
-  "paddw %%xmm5,%%xmm6\n\t" \
-  "movdqa %%xmm4,%%xmm1\n\t" \
-  "pmulhw %%xmm0,%%xmm4\n\t" \
-  "paddw %%xmm0,%%xmm4\n\t" \
-  "movdqa %%xmm7,%%xmm0\n\t" \
-  "movdqa %%xmm4,%%xmm5\n\t" \
-  "paddw %%xmm2,%%xmm0\n\t" \
-  "psubw %%xmm2,%%xmm7\n\t" \
-  "movdqa %%xmm1,%%xmm2\n\t" \
-  "pmulhw %%xmm6,%%xmm1\n\t" \
-  "pmulhw %%xmm7,%%xmm2\n\t" \
-  "paddw %%xmm6,%%xmm1\n\t" \
-  "movdqa "OC_MEM_OFFS(0x00,buf)",%%xmm6\n\t" \
-  "paddw %%xmm7,%%xmm2\n\t" \
-  "movdqa "OC_MEM_OFFS(0x10,buf)",%%xmm7\n\t" \
-  /*Stage 3: \
-    6-5 butterfly: xmm1=t[5], xmm2=t[6] -> xmm1=t[6]+t[5], xmm2=t[6]-t[5] \
-    0-3 butterfly: xmm4=t[0], xmm7=t[3] -> xmm7=t[0]+t[3], xmm4=t[0]-t[3] \
-    1-2 butterfly: xmm5=t[1], xmm6=t[2] -> xmm6=t[1]+t[2], xmm5=t[1]-t[2]*/ \
-  "paddw %%xmm2,%%xmm1\n\t" \
-  "paddw %%xmm5,%%xmm6\n\t" \
-  "paddw %%xmm4,%%xmm7\n\t" \
-  "paddw %%xmm2,%%xmm2\n\t" \
-  "paddw %%xmm4,%%xmm4\n\t" \
-  "paddw %%xmm5,%%xmm5\n\t" \
-  "psubw %%xmm1,%%xmm2\n\t" \
-  "psubw %%xmm7,%%xmm4\n\t" \
-  "psubw %%xmm6,%%xmm5\n\t" \
-
-static void oc_idct8x8_10_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  OC_ALIGN16(ogg_int16_t buf[16]);
-  /*This routine accepts an 8x8 matrix pre-transposed.*/
-  __asm__ __volatile__(
-    "movq "OC_MEM_OFFS(0x20,x)",%%mm2\n\t"
-    "movq "OC_MEM_OFFS(0x30,x)",%%mm3\n\t"
-    "movq "OC_MEM_OFFS(0x10,x)",%%mm1\n\t"
-    "movq "OC_MEM_OFFS(0x00,x)",%%mm0\n\t"
-    OC_IDCT_8x8_10_MMX
-    OC_TRANSPOSE_8x4_MMX2SSE
-    OC_IDCT_8x8_10_ABC
-    OC_IDCT_8x8_D_STORE
-    :[buf]"=m"(OC_ARRAY_OPERAND(short,buf,16)),
-     [y]"=m"(OC_ARRAY_OPERAND(ogg_int16_t,_y,64))
-    :[x]"m"OC_CONST_ARRAY_OPERAND(ogg_int16_t,_x,64),
-     [c]"m"(OC_CONST_ARRAY_OPERAND(ogg_int16_t,OC_IDCT_CONSTS,128))
-  );
-  if(_x!=_y){
-    /*Clear input data for next block (decoder only).*/
-    __asm__ __volatile__(
-      "pxor %%mm0,%%mm0\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x00,x)"\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x10,x)"\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x20,x)"\n\t"
-      "movq %%mm0,"OC_MEM_OFFS(0x30,x)"\n\t"
-      :[x]"+m"(OC_ARRAY_OPERAND(ogg_int16_t,_x,28))
-    );
-  }
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.*/
-void oc_idct8x8_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
-  /*_last_zzi is subtly different from an actual count of the number of
-     coefficients we decoded for this block.
-    It contains the value of zzi BEFORE the final token in the block was
-     decoded.
-    In most cases this is an EOB token (the continuation of an EOB run from a
-     previous block counts), and so this is the same as the coefficient count.
-    However, in the case that the last token was NOT an EOB token, but filled
-     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
-    Provided the last token was not a pure zero run, the minimum value it can
-     be is 46, and so that doesn't affect any of the cases in this routine.
-    However, if the last token WAS a pure zero run of length 63, then _last_zzi
-     will be 1 while the number of coefficients decoded is 64.
-    Thus, we will trigger the following special case, where the real
-     coefficient count would not.
-    Note also that a zero run of length 64 will give _last_zzi a value of 0,
-     but we still process the DC coefficient, which might have a non-zero value
-     due to DC prediction.
-    Although convoluted, this is arguably the correct behavior: it allows us to
-     use a smaller transform when the block ends with a long zero run instead
-     of a normal EOB token.
-    It could be smarter... multiple separate zero runs at the end of a block
-     will fool it, but an encoder that generates these really deserves what it
-     gets.
-    Needless to say we inherited this approach from VP3.*/
-  /*Then perform the iDCT.*/
-  if(_last_zzi<=10)oc_idct8x8_10_sse2(_y,_x);
-  else oc_idct8x8_slow_sse2(_y,_x);
-}
-
-#endif
diff --git a/media/libtheora/lib/x86/sse2trans.h b/media/libtheora/lib/x86/sse2trans.h
deleted file mode 100644
index e76da5140..000000000
--- a/media/libtheora/lib/x86/sse2trans.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: sse2trans.h 15675 2009-02-06 09:43:27Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_sse2trans_H)
-# define _x86_sse2trans_H (1)
-# include "x86int.h"
-
-# if defined(OC_X86_64_ASM)
-/*On x86-64 we can transpose in-place without spilling registers.
-  By clever choices of the order to apply the butterflies and the order of
-   their outputs, we can take the rows in order and output the columns in order
-   without any extra operations and using just one temporary register.*/
-#  define OC_TRANSPOSE_8x8 \
- "#OC_TRANSPOSE_8x8\n\t" \
- "movdqa %%xmm4,%%xmm8\n\t" \
- /*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \
- "punpcklwd %%xmm5,%%xmm4\n\t" \
- /*xmm8 = f7 e7 f6 e6 f5 e5 f4 e4*/ \
- "punpckhwd %%xmm5,%%xmm8\n\t" \
- /*xmm5 is free.*/ \
- "movdqa %%xmm0,%%xmm5\n\t" \
- /*xmm0 = b3 a3 b2 a2 b1 a1 b0 a0*/ \
- "punpcklwd %%xmm1,%%xmm0\n\t" \
- /*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \
- "punpckhwd %%xmm1,%%xmm5\n\t" \
- /*xmm1 is free.*/ \
- "movdqa %%xmm6,%%xmm1\n\t" \
- /*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \
- "punpcklwd %%xmm7,%%xmm6\n\t" \
- /*xmm1 = h7 g7 h6 g6 h5 g5 h4 g4*/ \
- "punpckhwd %%xmm7,%%xmm1\n\t" \
- /*xmm7 is free.*/ \
- "movdqa %%xmm2,%%xmm7\n\t" \
- /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \
- "punpckhwd %%xmm3,%%xmm2\n\t" \
- /*xmm7 = d3 c3 d2 c2 d1 c1 d0 c0*/ \
- "punpcklwd %%xmm3,%%xmm7\n\t" \
- /*xmm3 is free.*/ \
- "movdqa %%xmm0,%%xmm3\n\t" \
- /*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \
- "punpckldq %%xmm7,%%xmm0\n\t" \
- /*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \
- "punpckhdq %%xmm7,%%xmm3\n\t" \
- /*xmm7 is free.*/ \
- "movdqa %%xmm5,%%xmm7\n\t" \
- /*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \
- "punpckldq %%xmm2,%%xmm5\n\t" \
- /*xmm7 = d7 c7 b7 a7 d6 c6 b6 a6*/ \
- "punpckhdq %%xmm2,%%xmm7\n\t" \
- /*xmm2 is free.*/ \
- "movdqa %%xmm4,%%xmm2\n\t" \
- /*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \
- "punpckhdq %%xmm6,%%xmm4\n\t" \
- /*xmm2 = h1 g1 f1 e1 h0 g0 f0 e0*/ \
- "punpckldq %%xmm6,%%xmm2\n\t" \
- /*xmm6 is free.*/ \
- "movdqa %%xmm8,%%xmm6\n\t" \
- /*xmm6 = h5 g5 f5 e5 h4 g4 f4 e4*/ \
- "punpckldq %%xmm1,%%xmm6\n\t" \
- /*xmm8 = h7 g7 f7 e7 h6 g6 f6 e6*/ \
- "punpckhdq %%xmm1,%%xmm8\n\t" \
- /*xmm1 is free.*/ \
- "movdqa %%xmm0,%%xmm1\n\t" \
- /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \
- "punpcklqdq %%xmm2,%%xmm0\n\t" \
- /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \
- "punpckhqdq %%xmm2,%%xmm1\n\t" \
- /*xmm2 is free.*/ \
- "movdqa %%xmm3,%%xmm2\n\t" \
- /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \
- "punpckhqdq %%xmm4,%%xmm3\n\t" \
- /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \
- "punpcklqdq %%xmm4,%%xmm2\n\t" \
- /*xmm4 is free.*/ \
- "movdqa %%xmm5,%%xmm4\n\t" \
- /*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \
- "punpckhqdq %%xmm6,%%xmm5\n\t" \
- /*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \
- "punpcklqdq %%xmm6,%%xmm4\n\t" \
- /*xmm6 is free.*/ \
- "movdqa %%xmm7,%%xmm6\n\t" \
- /*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \
- "punpckhqdq %%xmm8,%%xmm7\n\t" \
- /*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \
- "punpcklqdq %%xmm8,%%xmm6\n\t" \
- /*xmm8 is free.*/ \
-
-# else
-/*Otherwise, we need to spill some values to %[buf] temporarily.
-  Again, the butterflies are carefully arranged to get the columns to come out
-   in order, minimizing register spills and maximizing the delay between a load
-   and when the value loaded is actually used.*/
-#  define OC_TRANSPOSE_8x8 \
- "#OC_TRANSPOSE_8x8\n\t" \
- /*buf[0] = a7 a6 a5 a4 a3 a2 a1 a0*/ \
- "movdqa %%xmm0,"OC_MEM_OFFS(0x00,buf)"\n\t" \
- /*xmm0 is free.*/ \
- "movdqa %%xmm2,%%xmm0\n\t" \
- /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \
- "punpckhwd %%xmm3,%%xmm2\n\t" \
- /*xmm0 = d3 c3 d2 c2 d1 c1 d0 c0*/ \
- "punpcklwd %%xmm3,%%xmm0\n\t" \
- /*xmm3 = a7 a6 a5 a4 a3 a2 a1 a0*/ \
- "movdqa "OC_MEM_OFFS(0x00,buf)",%%xmm3\n\t" \
- /*buf[1] = d7 c7 d6 c6 d5 c5 d4 c4*/ \
- "movdqa %%xmm2,"OC_MEM_OFFS(0x10,buf)"\n\t" \
- /*xmm2 is free.*/ \
- "movdqa %%xmm6,%%xmm2\n\t" \
- /*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \
- "punpcklwd %%xmm7,%%xmm6\n\t" \
- /*xmm2 = h7 g7 h6 g6 h5 g5 h4 g4*/ \
- "punpckhwd %%xmm7,%%xmm2\n\t" \
- /*xmm7 is free.*/ \
- "movdqa %%xmm4,%%xmm7\n\t" \
- /*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \
- "punpcklwd %%xmm5,%%xmm4\n\t" \
- /*xmm7 = f7 e7 f6 e6 f5 e5 f4 e4*/ \
- "punpckhwd %%xmm5,%%xmm7\n\t" \
- /*xmm5 is free.*/ \
- "movdqa %%xmm3,%%xmm5\n\t" \
- /*xmm3 = b3 a3 b2 a2 b1 a1 b0 a0*/ \
- "punpcklwd %%xmm1,%%xmm3\n\t" \
- /*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \
- "punpckhwd %%xmm1,%%xmm5\n\t" \
- /*xmm1 is free.*/ \
- "movdqa %%xmm7,%%xmm1\n\t" \
- /*xmm7 = h5 g5 f5 e5 h4 g4 f4 e4*/ \
- "punpckldq %%xmm2,%%xmm7\n\t" \
- /*xmm1 = h7 g7 f7 e7 h6 g6 f6 e6*/ \
- "punpckhdq %%xmm2,%%xmm1\n\t" \
- /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \
- "movdqa "OC_MEM_OFFS(0x10,buf)",%%xmm2\n\t" \
- /*buf[0] = h7 g7 f7 e7 h6 g6 f6 e6*/ \
- "movdqa %%xmm1,"OC_MEM_OFFS(0x00,buf)"\n\t" \
- /*xmm1 is free.*/ \
- "movdqa %%xmm3,%%xmm1\n\t" \
- /*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \
- "punpckhdq %%xmm0,%%xmm3\n\t" \
- /*xmm1 = d1 c1 b1 a1 d0 c0 b0 a0*/ \
- "punpckldq %%xmm0,%%xmm1\n\t" \
- /*xmm0 is free.*/ \
- "movdqa %%xmm4,%%xmm0\n\t" \
- /*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \
- "punpckhdq %%xmm6,%%xmm4\n\t" \
- /*xmm0 = h1 g1 f1 e1 h0 g0 f0 e0*/ \
- "punpckldq %%xmm6,%%xmm0\n\t" \
- /*xmm6 is free.*/ \
- "movdqa %%xmm5,%%xmm6\n\t" \
- /*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \
- "punpckldq %%xmm2,%%xmm5\n\t" \
- /*xmm6 = d7 c7 b7 a7 d6 c6 b6 a6*/ \
- "punpckhdq %%xmm2,%%xmm6\n\t" \
- /*xmm2 is free.*/ \
- "movdqa %%xmm1,%%xmm2\n\t" \
- /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \
- "punpckhqdq %%xmm0,%%xmm1\n\t" \
- /*xmm2 = h0 g0 f0 e0 d0 c0 b0 a0*/ \
- "punpcklqdq %%xmm0,%%xmm2\n\t" \
- /*xmm0 = h7 g7 f7 e7 h6 g6 f6 e6*/ \
- "movdqa "OC_MEM_OFFS(0x00,buf)",%%xmm0\n\t" \
- /*buf[1] = h0 g0 f0 e0 d0 c0 b0 a0*/ \
- "movdqa %%xmm2,"OC_MEM_OFFS(0x10,buf)"\n\t" \
- /*xmm2 is free.*/ \
- "movdqa %%xmm3,%%xmm2\n\t" \
- /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \
- "punpckhqdq %%xmm4,%%xmm3\n\t" \
- /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \
- "punpcklqdq %%xmm4,%%xmm2\n\t" \
- /*xmm4 is free.*/ \
- "movdqa %%xmm5,%%xmm4\n\t" \
- /*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \
- "punpckhqdq %%xmm7,%%xmm5\n\t" \
- /*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \
- "punpcklqdq %%xmm7,%%xmm4\n\t" \
- /*xmm7 is free.*/ \
- "movdqa %%xmm6,%%xmm7\n\t" \
- /*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \
- "punpcklqdq %%xmm0,%%xmm6\n\t" \
- /*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \
- "punpckhqdq %%xmm0,%%xmm7\n\t" \
- /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \
- "movdqa "OC_MEM_OFFS(0x10,buf)",%%xmm0\n\t" \
-
-# endif
-
-/*Transpose 4 values in each of 8 MMX registers into 8 values in the first
-   four SSE registers.
-  No need to be clever here; we have plenty of room.*/
-#  define OC_TRANSPOSE_8x4_MMX2SSE \
- "#OC_TRANSPOSE_8x4_MMX2SSE\n\t" \
- "movq2dq %%mm0,%%xmm0\n\t" \
- "movq2dq %%mm1,%%xmm1\n\t" \
- /*xmmA = b3 a3 b2 a2 b1 a1 b0 a0*/ \
- "punpcklwd %%xmm1,%%xmm0\n\t" \
- "movq2dq %%mm2,%%xmm3\n\t" \
- "movq2dq %%mm3,%%xmm2\n\t" \
- /*xmmC = d3 c3 d2 c2 d1 c1 d0 c0*/ \
- "punpcklwd %%xmm2,%%xmm3\n\t" \
- "movq2dq %%mm4,%%xmm4\n\t" \
- "movq2dq %%mm5,%%xmm5\n\t" \
- /*xmmE = f3 e3 f2 e2 f1 e1 f0 e0*/ \
- "punpcklwd %%xmm5,%%xmm4\n\t" \
- "movq2dq %%mm6,%%xmm7\n\t" \
- "movq2dq %%mm7,%%xmm6\n\t" \
- /*xmmG = h3 g3 h2 g2 h1 g1 h0 g0*/ \
- "punpcklwd %%xmm6,%%xmm7\n\t" \
- "movdqa %%xmm0,%%xmm2\n\t" \
- /*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \
- "punpckldq %%xmm3,%%xmm0\n\t" \
- /*xmm2 = d3 c3 b3 a3 d2 c2 b2 a2*/ \
- "punpckhdq %%xmm3,%%xmm2\n\t" \
- "movdqa %%xmm4,%%xmm5\n\t" \
- /*xmm4 = h1 g1 f1 e1 h0 g0 f0 e0*/ \
- "punpckldq %%xmm7,%%xmm4\n\t" \
- /*xmm3 = h3 g3 f3 e3 h2 g2 f2 e2*/ \
- "punpckhdq %%xmm7,%%xmm5\n\t" \
- "movdqa %%xmm0,%%xmm1\n\t" \
- /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \
- "punpcklqdq %%xmm4,%%xmm0\n\t" \
- /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \
- "punpckhqdq %%xmm4,%%xmm1\n\t" \
- "movdqa %%xmm2,%%xmm3\n\t" \
- /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \
- "punpcklqdq %%xmm5,%%xmm2\n\t" \
- /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \
- "punpckhqdq %%xmm5,%%xmm3\n\t" \
-
-#endif
diff --git a/media/libtheora/lib/x86/x86cpu.c b/media/libtheora/lib/x86/x86cpu.c
deleted file mode 100644
index c3a20b319..000000000
--- a/media/libtheora/lib/x86/x86cpu.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
- CPU capability detection for x86 processors.
-  Originally written by Rudolf Marek.
-
- function:
-  last mod: $Id: x86cpu.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#include "x86cpu.h"
-
-#if !defined(OC_X86_ASM)
-ogg_uint32_t oc_cpu_flags_get(void){
-  return 0;
-}
-#else
-# if defined(__amd64__)||defined(__x86_64__)
-/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
-   compiling with -fPIC.*/
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  __asm__ __volatile__( \
-   "cpuid\n\t" \
-   :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
-   :"a"(_op) \
-   :"cc" \
-  )
-# else
-/*On x86-32, not so much.*/
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  __asm__ __volatile__( \
-   "xchgl %%ebx,%[ebx]\n\t" \
-   "cpuid\n\t" \
-   "xchgl %%ebx,%[ebx]\n\t" \
-   :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
-   :"a"(_op) \
-   :"cc" \
-  )
-# endif
-
-static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
-  ogg_uint32_t flags;
-  /*If there isn't even MMX, give up.*/
-  if(!(_edx&0x00800000))return 0;
-  flags=OC_CPU_X86_MMX;
-  if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
-  if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
-  if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
-  if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
-  if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
-  if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
-  return flags;
-}
-
-static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
-  ogg_uint32_t flags;
-  /*If there isn't even MMX, give up.*/
-  if(!(_edx&0x00800000))return 0;
-  flags=OC_CPU_X86_MMX;
-  if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
-  if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
-  if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
-  if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
-  if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
-  return flags;
-}
-
-ogg_uint32_t oc_cpu_flags_get(void){
-  ogg_uint32_t flags;
-  ogg_uint32_t eax;
-  ogg_uint32_t ebx;
-  ogg_uint32_t ecx;
-  ogg_uint32_t edx;
-# if !defined(__amd64__)&&!defined(__x86_64__)
-  /*Not all x86-32 chips support cpuid, so we have to check.*/
-  __asm__ __volatile__(
-   "pushfl\n\t"
-   "pushfl\n\t"
-   "popl %[a]\n\t"
-   "movl %[a],%[b]\n\t"
-   "xorl $0x200000,%[a]\n\t"
-   "pushl %[a]\n\t"
-   "popfl\n\t"
-   "pushfl\n\t"
-   "popl %[a]\n\t"
-   "popfl\n\t"
-   :[a]"=r"(eax),[b]"=r"(ebx)
-   :
-   :"cc"
-  );
-  /*No cpuid.*/
-  if(eax==ebx)return 0;
-# endif
-  cpuid(0,eax,ebx,ecx,edx);
-  /*         l e t n          I e n i          u n e G*/
-  if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
-   /*      6 8 x M          T e n i          u n e G*/
-   ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
-    int family;
-    int model;
-    /*Intel, Transmeta (tested with Crusoe TM5800):*/
-    cpuid(1,eax,ebx,ecx,edx);
-    flags=oc_parse_intel_flags(edx,ecx);
-    family=(eax>>8)&0xF;
-    model=(eax>>4)&0xF;
-    /*The SSE unit on the Pentium M and Core Duo is much slower than the MMX
-       unit, so don't use it.*/
-    if(family==6&&(model==9||model==13||model==14)){
-      flags&=~(OC_CPU_X86_SSE2|OC_CPU_X86_PNI);
-    }
-  }
-  /*              D M A c          i t n e          h t u A*/
-  else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
-   /*      C S N            y b   e          d o e G*/
-   ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
-    /*AMD, Geode:*/
-    cpuid(0x80000000,eax,ebx,ecx,edx);
-    if(eax<0x80000001)flags=0;
-    else{
-      cpuid(0x80000001,eax,ebx,ecx,edx);
-      flags=oc_parse_amd_flags(edx,ecx);
-    }
-    /*Also check for SSE.*/
-    cpuid(1,eax,ebx,ecx,edx);
-    flags|=oc_parse_intel_flags(edx,ecx);
-  }
-  /*Technically some VIA chips can be configured in the BIOS to return any
-     string here the user wants.
-    There is a special detection method that can be used to identify such
-     processors, but in my opinion, if the user really wants to change it, they
-     deserve what they get.*/
-  /*              s l u a          H r u a          t n e C*/
-  else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
-    /*VIA:*/
-    /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
-       chips (thanks to the engineers from Centaur Technology who provided it).
-      These chips support Intel-like cpuid info.
-      The C3-2 (Nehemiah) cores appear to, as well.*/
-    cpuid(1,eax,ebx,ecx,edx);
-    flags=oc_parse_intel_flags(edx,ecx);
-    if(eax>=0x80000001){
-      /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
-        We need to check this even if the Intel test succeeds to pick up 3DNow!
-         support on these processors.
-        Unlike actual AMD processors, we cannot _rely_ on this info, since
-         some cores (e.g., the 693 stepping of the Nehemiah) claim to support
-         this function, yet return edx=0, despite the Intel test indicating
-         MMX support.
-        Therefore the features detected here are strictly added to those
-         detected by the Intel test.*/
-      /*TODO: How about earlier chips?*/
-      cpuid(0x80000001,eax,ebx,ecx,edx);
-      /*Note: As of the C7, this function returns Intel-style extended feature
-         flags, not AMD-style.
-        Currently, this only defines bits 11, 20, and 29 (0x20100800), which
-         do not conflict with any of the AMD flags we inspect.
-        For the remaining bits, Intel tells us, "Do not count on their value",
-         but VIA assures us that they will all be zero (at least on the C7 and
-         Isaiah chips).
-        In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
-         (0xC0C00000) for something else, we will have to add code to detect
-         the model to decide when it is appropriate to inspect them.*/
-      flags|=oc_parse_amd_flags(edx,ecx);
-    }
-  }
-  else{
-    /*Implement me.*/
-    flags=0;
-  }
-  return flags;
-}
-#endif
diff --git a/media/libtheora/lib/x86/x86cpu.h b/media/libtheora/lib/x86/x86cpu.h
deleted file mode 100644
index 153a48d89..000000000
--- a/media/libtheora/lib/x86/x86cpu.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
- function:
-    last mod: $Id: x86cpu.h 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_x86cpu_H)
-# define _x86_x86cpu_H (1)
-#include "../internal.h"
-
-#define OC_CPU_X86_MMX      (1<<0)
-#define OC_CPU_X86_3DNOW    (1<<1)
-#define OC_CPU_X86_3DNOWEXT (1<<2)
-#define OC_CPU_X86_MMXEXT   (1<<3)
-#define OC_CPU_X86_SSE      (1<<4)
-#define OC_CPU_X86_SSE2     (1<<5)
-#define OC_CPU_X86_PNI      (1<<6)
-#define OC_CPU_X86_SSSE3    (1<<7)
-#define OC_CPU_X86_SSE4_1   (1<<8)
-#define OC_CPU_X86_SSE4_2   (1<<9)
-#define OC_CPU_X86_SSE4A    (1<<10)
-#define OC_CPU_X86_SSE5     (1<<11)
-
-ogg_uint32_t oc_cpu_flags_get(void);
-
-#endif
diff --git a/media/libtheora/lib/x86/x86int.h b/media/libtheora/lib/x86/x86int.h
deleted file mode 100644
index 35bfb0a02..000000000
--- a/media/libtheora/lib/x86/x86int.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86int.h 17578 2010-10-29 04:21:26Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_x86int_H)
-# define _x86_x86int_H (1)
-# include "../internal.h"
-
-# if defined(OC_X86_ASM)
-#  define oc_state_accel_init oc_state_accel_init_x86
-#  if defined(OC_X86_64_ASM)
-/*x86-64 guarantees SIMD support up through at least SSE2.
-  If the best routine we have available only needs SSE2 (which at the moment
-   covers all of them), then we can avoid runtime detection and the indirect
-   call.*/
-#   define oc_frag_copy(_state,_dst,_src,_ystride) \
-  oc_frag_copy_mmx(_dst,_src,_ystride)
-#   define oc_frag_copy_list(_state,_dst_frame,_src_frame,_ystride, \
- _fragis,_nfragis,_frag_buf_offs) \
-  oc_frag_copy_list_mmx(_dst_frame,_src_frame,_ystride, \
-   _fragis,_nfragis,_frag_buf_offs)
-#   define oc_frag_recon_intra(_state,_dst,_ystride,_residue) \
-  oc_frag_recon_intra_mmx(_dst,_ystride,_residue)
-#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
-  oc_frag_recon_inter_mmx(_dst,_src,_ystride,_residue)
-#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
-  oc_frag_recon_inter2_mmx(_dst,_src1,_src2,_ystride,_residue)
-#   define oc_idct8x8(_state,_y,_x,_last_zzi) \
-  oc_idct8x8_sse2(_y,_x,_last_zzi)
-#   define oc_state_frag_recon oc_state_frag_recon_mmx
-#   define oc_loop_filter_init(_state,_bv,_flimit) \
-  oc_loop_filter_init_mmxext(_bv,_flimit)
-#   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_mmxext
-#   define oc_restore_fpu(_state) \
-  oc_restore_fpu_mmx()
-#  else
-#   define OC_STATE_USE_VTABLE (1)
-#  endif
-# endif
-
-# include "../state.h"
-# include "x86cpu.h"
-
-/*Converts the expression in the argument to a string.*/
-#define OC_M2STR(_s) #_s
-
-/*Memory operands do not always include an offset.
-  To avoid warnings, we force an offset with %H (which adds 8).*/
-# if __GNUC_PREREQ(4,0)
-#  define OC_MEM_OFFS(_offs,_name) \
-  OC_M2STR(_offs-8+%H[_name])
-# endif
-/*If your gcc version does't support %H, then you get to suffer the warnings.
-  Note that Apple's gas breaks on things like _offs+(%esp): it throws away the
-   whole offset, instead of substituting in 0 for the missing operand to +.*/
-# if !defined(OC_MEM_OFFS)
-#  define OC_MEM_OFFS(_offs,_name) \
-  OC_M2STR(_offs+%[_name])
-# endif
-
-/*Declare an array operand with an exact size.
-  This tells gcc we're going to clobber this memory region, without having to
-   clobber all of "memory" and lets us access local buffers directly using the
-   stack pointer, without allocating a separate register to point to them.*/
-#define OC_ARRAY_OPERAND(_type,_ptr,_size) \
-  (*({ \
-    struct{_type array_value__[(_size)];} *array_addr__=(void *)(_ptr); \
-    array_addr__; \
-  }))
-
-/*Declare an array operand with an exact size.
-  This tells gcc we're going to clobber this memory region, without having to
-   clobber all of "memory" and lets us access local buffers directly using the
-   stack pointer, without allocating a separate register to point to them.*/
-#define OC_CONST_ARRAY_OPERAND(_type,_ptr,_size) \
-  (*({ \
-    const struct{_type array_value__[(_size)];} *array_addr__= \
-     (const void *)(_ptr); \
-    array_addr__; \
-  }))
-
-extern const unsigned short __attribute__((aligned(16))) OC_IDCT_CONSTS[64];
-
-void oc_state_accel_init_x86(oc_theora_state *_state);
-
-void oc_frag_copy_mmx(unsigned char *_dst,
- const unsigned char *_src,int _ystride);
-void oc_frag_copy_list_mmx(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_mmx(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
-void oc_idct8x8_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_idct8x8_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit);
-void oc_loop_filter_init_mmxext(signed char _bv[256],int _flimit);
-void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-void oc_state_loop_filter_frag_rows_mmxext(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-void oc_restore_fpu_mmx(void);
-
-#endif
diff --git a/media/libtheora/lib/x86/x86state.c b/media/libtheora/lib/x86/x86state.c
deleted file mode 100644
index a3d37267f..000000000
--- a/media/libtheora/lib/x86/x86state.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86state.c 17421 2010-09-22 16:46:18Z giles $
-
- ********************************************************************/
-
-#include "x86int.h"
-
-#if defined(OC_X86_ASM)
-
-/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
-   each quadrant of the destination.*/
-static const unsigned char OC_FZIG_ZAG_MMX[128]={
-   0, 8, 1, 2, 9,16,24,17,
-  10, 3,32,11,18,25, 4,12,
-   5,26,19,40,33,34,41,48,
-  27, 6,13,20,28,21,14, 7,
-  56,49,42,35,43,50,57,36,
-  15,22,29,30,23,44,37,58,
-  51,59,38,45,52,31,60,53,
-  46,39,47,54,61,62,55,63,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64
-};
-
-/*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into
-   the destination.*/
-static const unsigned char OC_FZIG_ZAG_SSE2[128]={
-   0, 8, 1, 2, 9,16,24,17,
-  10, 3, 4,11,18,25,32,40,
-  33,26,19,12, 5, 6,13,20,
-  27,34,41,48,56,49,42,35,
-  28,21,14, 7,15,22,29,36,
-  43,50,57,58,51,44,37,30,
-  23,31,38,45,52,59,60,53,
-  46,39,47,54,61,62,55,63,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64
-};
-
-void oc_state_accel_init_x86(oc_theora_state *_state){
-  oc_state_accel_init_c(_state);
-  _state->cpu_flags=oc_cpu_flags_get();
-# if defined(OC_STATE_USE_VTABLE)
-  if(_state->cpu_flags&OC_CPU_X86_MMX){
-    _state->opt_vtable.frag_copy=oc_frag_copy_mmx;
-    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_mmx;
-    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
-    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
-    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
-    _state->opt_vtable.idct8x8=oc_idct8x8_mmx;
-    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
-    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_mmx;
-    _state->opt_vtable.state_loop_filter_frag_rows=
-     oc_state_loop_filter_frag_rows_mmx;
-    _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
-    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
-  }
-  if(_state->cpu_flags&OC_CPU_X86_MMXEXT){
-    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_mmxext;
-    _state->opt_vtable.state_loop_filter_frag_rows=
-     oc_state_loop_filter_frag_rows_mmxext;
-  }
-  if(_state->cpu_flags&OC_CPU_X86_SSE2){
-    _state->opt_vtable.idct8x8=oc_idct8x8_sse2;
-# endif
-    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_SSE2;
-# if defined(OC_STATE_USE_VTABLE)
-  }
-# endif
-}
-#endif
diff --git a/media/libtheora/lib/x86_vc/mmxfrag.c b/media/libtheora/lib/x86_vc/mmxfrag.c
deleted file mode 100644
index c16b026ff..000000000
--- a/media/libtheora/lib/x86_vc/mmxfrag.c
+++ /dev/null
@@ -1,416 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxfrag.c 17446 2010-09-23 20:06:20Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of fragment reconstruction for motion compensation.
-  Originally written by Rudolf Marek.
-  Additional optimization by Nils Pipenbrinck.
-  Note: Loops are unrolled for best performance.
-  The iteration each instruction belongs to is marked in the comments as #i.*/
-#include <stddef.h>
-#include "x86int.h"
-
-#if defined(OC_X86_ASM)
-
-/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
-   between rows.*/
-# define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \
-  do{ \
-    const unsigned char *src; \
-    unsigned char       *dst; \
-    src=(_src); \
-    dst=(_dst); \
-    __asm  mov SRC,src \
-    __asm  mov DST,dst \
-    __asm  mov YSTRIDE,_ystride \
-    /*src+0*ystride*/ \
-    __asm  movq mm0,[SRC] \
-    /*src+1*ystride*/ \
-    __asm  movq mm1,[SRC+YSTRIDE] \
-    /*ystride3=ystride*3*/ \
-    __asm  lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \
-    /*src+2*ystride*/ \
-    __asm  movq mm2,[SRC+YSTRIDE*2] \
-    /*src+3*ystride*/ \
-    __asm  movq mm3,[SRC+YSTRIDE3] \
-    /*dst+0*ystride*/ \
-    __asm  movq [DST],mm0 \
-    /*dst+1*ystride*/ \
-    __asm  movq [DST+YSTRIDE],mm1 \
-    /*Pointer to next 4.*/ \
-    __asm  lea SRC,[SRC+YSTRIDE*4] \
-    /*dst+2*ystride*/ \
-    __asm  movq [DST+YSTRIDE*2],mm2 \
-    /*dst+3*ystride*/ \
-    __asm  movq [DST+YSTRIDE3],mm3 \
-    /*Pointer to next 4.*/ \
-    __asm  lea DST,[DST+YSTRIDE*4] \
-    /*src+0*ystride*/ \
-    __asm  movq mm0,[SRC] \
-    /*src+1*ystride*/ \
-    __asm  movq mm1,[SRC+YSTRIDE] \
-    /*src+2*ystride*/ \
-    __asm  movq mm2,[SRC+YSTRIDE*2] \
-    /*src+3*ystride*/ \
-    __asm  movq mm3,[SRC+YSTRIDE3] \
-    /*dst+0*ystride*/ \
-    __asm  movq [DST],mm0 \
-    /*dst+1*ystride*/ \
-    __asm  movq [DST+YSTRIDE],mm1 \
-    /*dst+2*ystride*/ \
-    __asm  movq [DST+YSTRIDE*2],mm2 \
-    /*dst+3*ystride*/ \
-    __asm  movq [DST+YSTRIDE3],mm3 \
-  } \
-  while(0)
-
-/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
-   between rows.*/
-void oc_frag_copy_mmx(unsigned char *_dst,
- const unsigned char *_src,int _ystride){
-#define SRC edx
-#define DST eax
-#define YSTRIDE ecx
-#define YSTRIDE3 esi
-  OC_FRAG_COPY_MMX(_dst,_src,_ystride);
-#undef SRC
-#undef DST
-#undef YSTRIDE
-#undef YSTRIDE3
-}
-
-/*Copies the fragments specified by the lists of fragment indices from one
-   frame to another.
-  _dst_frame:     The reference frame to copy to.
-  _src_frame:     The reference frame to copy from.
-  _ystride:       The row stride of the reference frames.
-  _fragis:        A pointer to a list of fragment indices.
-  _nfragis:       The number of fragment indices to copy.
-  _frag_buf_offs: The offsets of fragments in the reference frames.*/
-void oc_frag_copy_list_mmx(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs){
-  ptrdiff_t fragii;
-  for(fragii=0;fragii<_nfragis;fragii++){
-    ptrdiff_t frag_buf_off;
-    frag_buf_off=_frag_buf_offs[_fragis[fragii]];
-#define SRC edx
-#define DST eax
-#define YSTRIDE ecx
-#define YSTRIDE3 edi
-    OC_FRAG_COPY_MMX(_dst_frame+frag_buf_off,
-     _src_frame+frag_buf_off,_ystride);
-#undef SRC
-#undef DST
-#undef YSTRIDE
-#undef YSTRIDE3
-  }
-}
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue){
-  __asm{
-#define DST edx
-#define DST4 esi
-#define YSTRIDE eax
-#define YSTRIDE3 edi
-#define RESIDUE ecx
-    mov DST,_dst
-    mov YSTRIDE,_ystride
-    mov RESIDUE,_residue
-    lea DST4,[DST+YSTRIDE*4]
-    lea YSTRIDE3,[YSTRIDE+YSTRIDE*2]
-    /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/
-    pcmpeqw mm0,mm0
-    /*#0 Load low residue.*/
-    movq mm1,[0*8+RESIDUE]
-    /*#0 Load high residue.*/
-    movq mm2,[1*8+RESIDUE]
-    /*Set mm0 to 0x8000800080008000.*/
-    psllw mm0,15
-    /*#1 Load low residue.*/
-    movq mm3,[2*8+RESIDUE]
-    /*#1 Load high residue.*/
-    movq mm4,[3*8+RESIDUE]
-    /*Set mm0 to 0x0080008000800080.*/
-    psrlw mm0,8
-    /*#2 Load low residue.*/
-    movq mm5,[4*8+RESIDUE]
-    /*#2 Load high residue.*/
-    movq mm6,[5*8+RESIDUE]
-    /*#0 Bias low  residue.*/
-    paddsw mm1,mm0
-    /*#0 Bias high residue.*/
-    paddsw mm2,mm0
-    /*#0 Pack to byte.*/
-    packuswb mm1,mm2
-    /*#1 Bias low  residue.*/
-    paddsw mm3,mm0
-    /*#1 Bias high residue.*/
-    paddsw mm4,mm0
-    /*#1 Pack to byte.*/
-    packuswb mm3,mm4
-    /*#2 Bias low  residue.*/
-    paddsw mm5,mm0
-    /*#2 Bias high residue.*/
-    paddsw mm6,mm0
-    /*#2 Pack to byte.*/
-    packuswb mm5,mm6
-    /*#0 Write row.*/
-    movq [DST],mm1
-    /*#1 Write row.*/
-    movq [DST+YSTRIDE],mm3
-    /*#2 Write row.*/
-    movq [DST+YSTRIDE*2],mm5
-    /*#3 Load low residue.*/
-    movq mm1,[6*8+RESIDUE]
-    /*#3 Load high residue.*/
-    movq mm2,[7*8+RESIDUE]
-    /*#4 Load high residue.*/
-    movq mm3,[8*8+RESIDUE]
-    /*#4 Load high residue.*/
-    movq mm4,[9*8+RESIDUE]
-    /*#5 Load high residue.*/
-    movq mm5,[10*8+RESIDUE]
-    /*#5 Load high residue.*/
-    movq mm6,[11*8+RESIDUE]
-    /*#3 Bias low  residue.*/
-    paddsw mm1,mm0
-    /*#3 Bias high residue.*/
-    paddsw mm2,mm0
-    /*#3 Pack to byte.*/
-    packuswb mm1,mm2
-    /*#4 Bias low  residue.*/
-    paddsw mm3,mm0
-    /*#4 Bias high residue.*/
-    paddsw mm4,mm0
-    /*#4 Pack to byte.*/
-    packuswb mm3,mm4
-    /*#5 Bias low  residue.*/
-    paddsw mm5,mm0
-    /*#5 Bias high residue.*/
-    paddsw mm6,mm0
-    /*#5 Pack to byte.*/
-    packuswb mm5,mm6
-    /*#3 Write row.*/
-    movq [DST+YSTRIDE3],mm1
-    /*#4 Write row.*/
-    movq [DST4],mm3
-    /*#5 Write row.*/
-    movq [DST4+YSTRIDE],mm5
-    /*#6 Load low residue.*/
-    movq mm1,[12*8+RESIDUE]
-    /*#6 Load high residue.*/
-    movq mm2,[13*8+RESIDUE]
-    /*#7 Load low residue.*/
-    movq mm3,[14*8+RESIDUE]
-    /*#7 Load high residue.*/
-    movq mm4,[15*8+RESIDUE]
-    /*#6 Bias low  residue.*/
-    paddsw mm1,mm0
-    /*#6 Bias high residue.*/
-    paddsw mm2,mm0
-    /*#6 Pack to byte.*/
-    packuswb mm1,mm2
-    /*#7 Bias low  residue.*/
-    paddsw mm3,mm0
-    /*#7 Bias high residue.*/
-    paddsw mm4,mm0
-    /*#7 Pack to byte.*/
-    packuswb mm3,mm4
-    /*#6 Write row.*/
-    movq [DST4+YSTRIDE*2],mm1
-    /*#7 Write row.*/
-    movq [DST4+YSTRIDE3],mm3
-#undef DST
-#undef DST4
-#undef YSTRIDE
-#undef YSTRIDE3
-#undef RESIDUE
-  }
-}
-
-void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src,
- int _ystride,const ogg_int16_t *_residue){
-  int i;
-  /*Zero mm0.*/
-  __asm pxor mm0,mm0;
-  for(i=4;i-->0;){
-    __asm{
-#define DST edx
-#define SRC ecx
-#define YSTRIDE edi
-#define RESIDUE eax
-      mov DST,_dst
-      mov SRC,_src
-      mov YSTRIDE,_ystride
-      mov RESIDUE,_residue
-      /*#0 Load source.*/
-      movq mm3,[SRC]
-      /*#1 Load source.*/
-      movq mm7,[SRC+YSTRIDE]
-      /*#0 Get copy of src.*/
-      movq mm4,mm3
-      /*#0 Expand high source.*/
-      punpckhbw mm4,mm0
-      /*#0 Expand low  source.*/
-      punpcklbw mm3,mm0
-      /*#0 Add residue high.*/
-      paddsw mm4,[8+RESIDUE]
-      /*#1 Get copy of src.*/
-      movq mm2,mm7
-      /*#0 Add residue low.*/
-      paddsw  mm3,[RESIDUE]
-      /*#1 Expand high source.*/
-      punpckhbw mm2,mm0
-      /*#0 Pack final row pixels.*/
-      packuswb mm3,mm4
-      /*#1 Expand low  source.*/
-      punpcklbw mm7,mm0
-      /*#1 Add residue low.*/
-      paddsw mm7,[16+RESIDUE]
-      /*#1 Add residue high.*/
-      paddsw mm2,[24+RESIDUE]
-      /*Advance residue.*/
-      lea RESIDUE,[32+RESIDUE]
-      /*#1 Pack final row pixels.*/
-      packuswb mm7,mm2
-      /*Advance src.*/
-      lea SRC,[SRC+YSTRIDE*2]
-      /*#0 Write row.*/
-      movq [DST],mm3
-      /*#1 Write row.*/
-      movq [DST+YSTRIDE],mm7
-      /*Advance dst.*/
-      lea DST,[DST+YSTRIDE*2]
-      mov _residue,RESIDUE
-      mov _dst,DST
-      mov _src,SRC
-#undef DST
-#undef SRC
-#undef YSTRIDE
-#undef RESIDUE
-    }
-  }
-}
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){
-  int i;
-  /*Zero mm7.*/
-  __asm pxor mm7,mm7;
-  for(i=4;i-->0;){
-    __asm{
-#define SRC1 ecx
-#define SRC2 edi
-#define YSTRIDE esi
-#define RESIDUE edx
-#define DST eax
-      mov YSTRIDE,_ystride
-      mov DST,_dst
-      mov RESIDUE,_residue
-      mov SRC1,_src1
-      mov SRC2,_src2
-      /*#0 Load src1.*/
-      movq mm0,[SRC1]
-      /*#0 Load src2.*/
-      movq mm2,[SRC2]
-      /*#0 Copy src1.*/
-      movq mm1,mm0
-      /*#0 Copy src2.*/
-      movq mm3,mm2
-      /*#1 Load src1.*/
-      movq mm4,[SRC1+YSTRIDE]
-      /*#0 Unpack lower src1.*/
-      punpcklbw mm0,mm7
-      /*#1 Load src2.*/
-      movq mm5,[SRC2+YSTRIDE]
-      /*#0 Unpack higher src1.*/
-      punpckhbw mm1,mm7
-      /*#0 Unpack lower src2.*/
-      punpcklbw mm2,mm7
-      /*#0 Unpack higher src2.*/
-      punpckhbw mm3,mm7
-      /*Advance src1 ptr.*/
-      lea SRC1,[SRC1+YSTRIDE*2]
-      /*Advance src2 ptr.*/
-      lea SRC2,[SRC2+YSTRIDE*2]
-      /*#0 Lower src1+src2.*/
-      paddsw mm0,mm2
-      /*#0 Higher src1+src2.*/
-      paddsw mm1,mm3
-      /*#1 Copy src1.*/
-      movq mm2,mm4
-      /*#0 Build lo average.*/
-      psraw mm0,1
-      /*#1 Copy src2.*/
-      movq mm3,mm5
-      /*#1 Unpack lower src1.*/
-      punpcklbw mm4,mm7
-      /*#0 Build hi average.*/
-      psraw mm1,1
-      /*#1 Unpack higher src1.*/
-      punpckhbw mm2,mm7
-      /*#0 low+=residue.*/
-      paddsw mm0,[RESIDUE]
-      /*#1 Unpack lower src2.*/
-      punpcklbw mm5,mm7
-      /*#0 high+=residue.*/
-      paddsw mm1,[8+RESIDUE]
-      /*#1 Unpack higher src2.*/
-      punpckhbw mm3,mm7
-      /*#1 Lower src1+src2.*/
-      paddsw mm5,mm4
-      /*#0 Pack and saturate.*/
-      packuswb mm0,mm1
-      /*#1 Higher src1+src2.*/
-      paddsw mm3,mm2
-      /*#0 Write row.*/
-      movq [DST],mm0
-      /*#1 Build lo average.*/
-      psraw mm5,1
-      /*#1 Build hi average.*/
-      psraw mm3,1
-      /*#1 low+=residue.*/
-      paddsw mm5,[16+RESIDUE]
-      /*#1 high+=residue.*/
-      paddsw mm3,[24+RESIDUE]
-      /*#1 Pack and saturate.*/
-      packuswb  mm5,mm3
-      /*#1 Write row ptr.*/
-      movq [DST+YSTRIDE],mm5
-      /*Advance residue ptr.*/
-      add RESIDUE,32
-      /*Advance dest ptr.*/
-      lea DST,[DST+YSTRIDE*2]
-      mov _dst,DST
-      mov _residue,RESIDUE
-      mov _src1,SRC1
-      mov _src2,SRC2
-#undef SRC1
-#undef SRC2
-#undef YSTRIDE
-#undef RESIDUE
-#undef DST
-    }
-  }
-}
-
-void oc_restore_fpu_mmx(void){
-  __asm emms;
-}
-
-#endif
diff --git a/media/libtheora/lib/x86_vc/mmxidct.c b/media/libtheora/lib/x86_vc/mmxidct.c
deleted file mode 100644
index 53a9ac7f3..000000000
--- a/media/libtheora/lib/x86_vc/mmxidct.c
+++ /dev/null
@@ -1,597 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxidct.c 17446 2010-09-23 20:06:20Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of Theora's iDCT.
-  Originally written by Rudolf Marek, based on code from On2's VP3.*/
-#include "x86int.h"
-#include "../dct.h"
-
-#if defined(OC_X86_ASM)
-
-/*These are offsets into the table of constants below.*/
-/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
-#define OC_COSINE_OFFSET (8)
-/*A row of 8's.*/
-#define OC_EIGHT_OFFSET  (0)
-
-
-
-/*A table of constants used by the MMX routines.*/
-static const OC_ALIGN16(ogg_uint16_t) OC_IDCT_CONSTS[(1+7)*4]={
-      8,    8,    8,    8,
-  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
-  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
-  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
-  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
-  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
-  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
-  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
-  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
-  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
-  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
-  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
-  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
-  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
-  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1
-};
-
-/*38 cycles*/
-#define OC_IDCT_BEGIN(_y,_x) __asm{ \
-  __asm movq mm2,OC_I(3,_x) \
-  __asm movq mm6,OC_C(3) \
-  __asm movq mm4,mm2 \
-  __asm movq mm7,OC_J(5,_x) \
-  __asm pmulhw mm4,mm6 \
-  __asm movq mm1,OC_C(5) \
-  __asm pmulhw mm6,mm7 \
-  __asm movq mm5,mm1 \
-  __asm pmulhw mm1,mm2 \
-  __asm movq mm3,OC_I(1,_x) \
-  __asm pmulhw mm5,mm7 \
-  __asm movq mm0,OC_C(1) \
-  __asm paddw mm4,mm2 \
-  __asm paddw mm6,mm7 \
-  __asm paddw mm2,mm1 \
-  __asm movq mm1,OC_J(7,_x) \
-  __asm paddw mm7,mm5 \
-  __asm movq mm5,mm0 \
-  __asm pmulhw mm0,mm3 \
-  __asm paddw mm4,mm7 \
-  __asm pmulhw mm5,mm1 \
-  __asm movq mm7,OC_C(7) \
-  __asm psubw mm6,mm2 \
-  __asm paddw mm0,mm3 \
-  __asm pmulhw mm3,mm7 \
-  __asm movq mm2,OC_I(2,_x) \
-  __asm pmulhw mm7,mm1 \
-  __asm paddw mm5,mm1 \
-  __asm movq mm1,mm2 \
-  __asm pmulhw mm2,OC_C(2) \
-  __asm psubw mm3,mm5 \
-  __asm movq mm5,OC_J(6,_x) \
-  __asm paddw mm0,mm7 \
-  __asm movq mm7,mm5 \
-  __asm psubw mm0,mm4 \
-  __asm pmulhw mm5,OC_C(2) \
-  __asm paddw mm2,mm1 \
-  __asm pmulhw mm1,OC_C(6) \
-  __asm paddw mm4,mm4 \
-  __asm paddw mm4,mm0 \
-  __asm psubw mm3,mm6 \
-  __asm paddw mm5,mm7 \
-  __asm paddw mm6,mm6 \
-  __asm pmulhw mm7,OC_C(6) \
-  __asm paddw mm6,mm3 \
-  __asm movq OC_I(1,_y),mm4 \
-  __asm psubw mm1,mm5 \
-  __asm movq mm4,OC_C(4) \
-  __asm movq mm5,mm3 \
-  __asm pmulhw mm3,mm4 \
-  __asm paddw mm7,mm2 \
-  __asm movq OC_I(2,_y),mm6 \
-  __asm movq mm2,mm0 \
-  __asm movq mm6,OC_I(0,_x) \
-  __asm pmulhw mm0,mm4 \
-  __asm paddw mm5,mm3 \
-  __asm movq mm3,OC_J(4,_x) \
-  __asm psubw mm5,mm1 \
-  __asm paddw mm2,mm0 \
-  __asm psubw mm6,mm3 \
-  __asm movq mm0,mm6 \
-  __asm pmulhw mm6,mm4 \
-  __asm paddw mm3,mm3 \
-  __asm paddw mm1,mm1 \
-  __asm paddw mm3,mm0 \
-  __asm paddw mm1,mm5 \
-  __asm pmulhw mm4,mm3 \
-  __asm paddw mm6,mm0 \
-  __asm psubw mm6,mm2 \
-  __asm paddw mm2,mm2 \
-  __asm movq mm0,OC_I(1,_y) \
-  __asm paddw mm2,mm6 \
-  __asm paddw mm4,mm3 \
-  __asm psubw mm2,mm1 \
-}
-
-/*38+8=46 cycles.*/
-#define OC_ROW_IDCT(_y,_x) __asm{ \
-  OC_IDCT_BEGIN(_y,_x) \
-  /*r3=D'*/ \
-  __asm  movq mm3,OC_I(2,_y) \
-  /*r4=E'=E-G*/ \
-  __asm  psubw mm4,mm7 \
-  /*r1=H'+H'*/ \
-  __asm  paddw mm1,mm1 \
-  /*r7=G+G*/ \
-  __asm  paddw mm7,mm7 \
-  /*r1=R1=A''+H'*/ \
-  __asm  paddw mm1,mm2 \
-  /*r7=G'=E+G*/ \
-  __asm  paddw mm7,mm4 \
-  /*r4=R4=E'-D'*/ \
-  __asm  psubw mm4,mm3 \
-  __asm  paddw mm3,mm3 \
-  /*r6=R6=F'-B''*/ \
-  __asm  psubw mm6,mm5 \
-  __asm  paddw mm5,mm5 \
-  /*r3=R3=E'+D'*/ \
-  __asm  paddw mm3,mm4 \
-  /*r5=R5=F'+B''*/ \
-  __asm  paddw mm5,mm6 \
-  /*r7=R7=G'-C'*/ \
-  __asm  psubw mm7,mm0 \
-  __asm  paddw mm0,mm0 \
-  /*Save R1.*/ \
-  __asm  movq OC_I(1,_y),mm1 \
-  /*r0=R0=G.+C.*/ \
-  __asm  paddw mm0,mm7 \
-}
-
-/*The following macro does two 4x4 transposes in place.
-  At entry, we assume:
-    r0 = a3 a2 a1 a0
-  I(1) = b3 b2 b1 b0
-    r2 = c3 c2 c1 c0
-    r3 = d3 d2 d1 d0
-
-    r4 = e3 e2 e1 e0
-    r5 = f3 f2 f1 f0
-    r6 = g3 g2 g1 g0
-    r7 = h3 h2 h1 h0
-
-  At exit, we have:
-  I(0) = d0 c0 b0 a0
-  I(1) = d1 c1 b1 a1
-  I(2) = d2 c2 b2 a2
-  I(3) = d3 c3 b3 a3
-
-  J(4) = h0 g0 f0 e0
-  J(5) = h1 g1 f1 e1
-  J(6) = h2 g2 f2 e2
-  J(7) = h3 g3 f3 e3
-
-  I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
-  J(4) J(5) J(6) J(7) is the transpose of r4  r5  r6 r7.
-
-  Since r1 is free at entry, we calculate the Js first.*/
-/*19 cycles.*/
-#define OC_TRANSPOSE(_y) __asm{ \
-  __asm movq mm1,mm4 \
-  __asm punpcklwd mm4,mm5 \
-  __asm movq OC_I(0,_y),mm0 \
-  __asm punpckhwd mm1,mm5 \
-  __asm movq mm0,mm6 \
-  __asm punpcklwd mm6,mm7 \
-  __asm movq mm5,mm4 \
-  __asm punpckldq mm4,mm6 \
-  __asm punpckhdq mm5,mm6 \
-  __asm movq mm6,mm1 \
-  __asm movq OC_J(4,_y),mm4 \
-  __asm punpckhwd mm0,mm7 \
-  __asm movq OC_J(5,_y),mm5 \
-  __asm punpckhdq mm6,mm0 \
-  __asm movq mm4,OC_I(0,_y) \
-  __asm punpckldq mm1,mm0 \
-  __asm movq mm5,OC_I(1,_y) \
-  __asm movq mm0,mm4 \
-  __asm movq OC_J(7,_y),mm6 \
-  __asm punpcklwd mm0,mm5 \
-  __asm movq OC_J(6,_y),mm1 \
-  __asm punpckhwd mm4,mm5 \
-  __asm movq mm5,mm2 \
-  __asm punpcklwd mm2,mm3 \
-  __asm movq mm1,mm0 \
-  __asm punpckldq mm0,mm2 \
-  __asm punpckhdq mm1,mm2 \
-  __asm movq mm2,mm4 \
-  __asm movq OC_I(0,_y),mm0 \
-  __asm punpckhwd mm5,mm3 \
-  __asm movq OC_I(1,_y),mm1 \
-  __asm punpckhdq mm4,mm5 \
-  __asm punpckldq mm2,mm5 \
-  __asm movq OC_I(3,_y),mm4 \
-  __asm movq OC_I(2,_y),mm2 \
-}
-
-/*38+19=57 cycles.*/
-#define OC_COLUMN_IDCT(_y) __asm{ \
-  OC_IDCT_BEGIN(_y,_y) \
-  __asm paddw mm2,OC_8 \
-  /*r1=H'+H'*/ \
-  __asm paddw mm1,mm1 \
-  /*r1=R1=A''+H'*/ \
-  __asm paddw mm1,mm2 \
-  /*r2=NR2*/ \
-  __asm psraw mm2,4 \
-  /*r4=E'=E-G*/ \
-  __asm psubw mm4,mm7 \
-  /*r1=NR1*/ \
-  __asm psraw mm1,4 \
-  /*r3=D'*/ \
-  __asm movq mm3,OC_I(2,_y) \
-  /*r7=G+G*/ \
-  __asm paddw mm7,mm7 \
-  /*Store NR2 at I(2).*/ \
-  __asm movq OC_I(2,_y),mm2 \
-  /*r7=G'=E+G*/ \
-  __asm paddw mm7,mm4 \
-  /*Store NR1 at I(1).*/ \
-  __asm movq OC_I(1,_y),mm1 \
-  /*r4=R4=E'-D'*/ \
-  __asm psubw mm4,mm3 \
-  __asm paddw mm4,OC_8 \
-  /*r3=D'+D'*/ \
-  __asm paddw mm3,mm3 \
-  /*r3=R3=E'+D'*/ \
-  __asm paddw mm3,mm4 \
-  /*r4=NR4*/ \
-  __asm psraw mm4,4 \
-  /*r6=R6=F'-B''*/ \
-  __asm psubw mm6,mm5 \
-  /*r3=NR3*/ \
-  __asm psraw mm3,4 \
-  __asm paddw mm6,OC_8 \
-  /*r5=B''+B''*/ \
-  __asm paddw mm5,mm5 \
-  /*r5=R5=F'+B''*/ \
-  __asm paddw mm5,mm6 \
-  /*r6=NR6*/ \
-  __asm psraw mm6,4 \
-  /*Store NR4 at J(4).*/ \
-  __asm movq OC_J(4,_y),mm4 \
-  /*r5=NR5*/ \
-  __asm psraw mm5,4 \
-  /*Store NR3 at I(3).*/ \
-  __asm movq OC_I(3,_y),mm3 \
-  /*r7=R7=G'-C'*/ \
-  __asm psubw mm7,mm0 \
-  __asm paddw mm7,OC_8 \
-  /*r0=C'+C'*/ \
-  __asm paddw mm0,mm0 \
-  /*r0=R0=G'+C'*/ \
-  __asm paddw mm0,mm7 \
-  /*r7=NR7*/ \
-  __asm psraw mm7,4 \
-  /*Store NR6 at J(6).*/ \
-  __asm movq OC_J(6,_y),mm6 \
-  /*r0=NR0*/ \
-  __asm psraw mm0,4 \
-  /*Store NR5 at J(5).*/ \
-  __asm movq OC_J(5,_y),mm5 \
-  /*Store NR7 at J(7).*/ \
-  __asm movq OC_J(7,_y),mm7 \
-  /*Store NR0 at I(0).*/ \
-  __asm movq OC_I(0,_y),mm0 \
-}
-
-#define OC_MID(_m,_i) [CONSTS+_m+(_i)*8]
-#define OC_C(_i)      OC_MID(OC_COSINE_OFFSET,_i-1)
-#define OC_8          OC_MID(OC_EIGHT_OFFSET,0)
-
-static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  int i;
-  /*This routine accepts an 8x8 matrix, but in partially transposed form.
-    Every 4x4 block is transposed.*/
-  __asm{
-#define CONSTS eax
-#define Y edx
-#define X ecx
-    mov CONSTS,offset OC_IDCT_CONSTS
-    mov Y,_y
-    mov X,_x
-#define OC_I(_k,_y)   [(_y)+(_k)*16]
-#define OC_J(_k,_y)   [(_y)+((_k)-4)*16+8]
-    OC_ROW_IDCT(Y,X)
-    OC_TRANSPOSE(Y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y)   [(_y)+(_k)*16+64]
-#define OC_J(_k,_y)   [(_y)+((_k)-4)*16+72]
-    OC_ROW_IDCT(Y,X)
-    OC_TRANSPOSE(Y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y)   [(_y)+(_k)*16]
-#define OC_J(_k,_y)   OC_I(_k,_y)
-    OC_COLUMN_IDCT(Y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y)   [(_y)+(_k)*16+8]
-#define OC_J(_k,_y)   OC_I(_k,_y)
-    OC_COLUMN_IDCT(Y)
-#undef  OC_I
-#undef  OC_J
-#undef  CONSTS
-#undef  Y
-#undef  X
-  }
-  if(_x!=_y){
-    int i;
-    __asm pxor mm0,mm0;
-    for(i=0;i<4;i++){
-      ogg_int16_t *x;
-      x=_x+16*i;
-#define X ecx
-      __asm{
-        mov X,x
-        movq [X+0x00],mm0
-        movq [X+0x08],mm0
-        movq [X+0x10],mm0
-        movq [X+0x18],mm0
-      }
-#undef  X
-    }
-  }
-}
-
-/*25 cycles.*/
-#define OC_IDCT_BEGIN_10(_y,_x) __asm{ \
-  __asm movq mm2,OC_I(3,_x) \
-  __asm nop \
-  __asm movq mm6,OC_C(3) \
-  __asm movq mm4,mm2 \
-  __asm movq mm1,OC_C(5) \
-  __asm pmulhw mm4,mm6 \
-  __asm movq mm3,OC_I(1,_x) \
-  __asm pmulhw mm1,mm2 \
-  __asm movq mm0,OC_C(1) \
-  __asm paddw mm4,mm2 \
-  __asm pxor mm6,mm6 \
-  __asm paddw mm2,mm1 \
-  __asm movq mm5,OC_I(2,_x) \
-  __asm pmulhw mm0,mm3 \
-  __asm movq mm1,mm5 \
-  __asm paddw mm0,mm3 \
-  __asm pmulhw mm3,OC_C(7) \
-  __asm psubw mm6,mm2 \
-  __asm pmulhw mm5,OC_C(2) \
-  __asm psubw mm0,mm4 \
-  __asm movq mm7,OC_I(2,_x) \
-  __asm paddw mm4,mm4 \
-  __asm paddw mm7,mm5 \
-  __asm paddw mm4,mm0 \
-  __asm pmulhw mm1,OC_C(6) \
-  __asm psubw mm3,mm6 \
-  __asm movq OC_I(1,_y),mm4 \
-  __asm paddw mm6,mm6 \
-  __asm movq mm4,OC_C(4) \
-  __asm paddw mm6,mm3 \
-  __asm movq mm5,mm3 \
-  __asm pmulhw mm3,mm4 \
-  __asm movq OC_I(2,_y),mm6 \
-  __asm movq mm2,mm0 \
-  __asm movq mm6,OC_I(0,_x) \
-  __asm pmulhw mm0,mm4 \
-  __asm paddw mm5,mm3 \
-  __asm paddw mm2,mm0 \
-  __asm psubw mm5,mm1 \
-  __asm pmulhw mm6,mm4 \
-  __asm paddw mm6,OC_I(0,_x) \
-  __asm paddw mm1,mm1 \
-  __asm movq mm4,mm6 \
-  __asm paddw mm1,mm5 \
-  __asm psubw mm6,mm2 \
-  __asm paddw mm2,mm2 \
-  __asm movq mm0,OC_I(1,_y) \
-  __asm paddw mm2,mm6 \
-  __asm psubw mm2,mm1 \
-  __asm nop \
-}
-
-/*25+8=33 cycles.*/
-#define OC_ROW_IDCT_10(_y,_x) __asm{ \
-  OC_IDCT_BEGIN_10(_y,_x) \
-  /*r3=D'*/ \
-   __asm movq mm3,OC_I(2,_y) \
-  /*r4=E'=E-G*/ \
-   __asm psubw mm4,mm7 \
-  /*r1=H'+H'*/ \
-   __asm paddw mm1,mm1 \
-  /*r7=G+G*/ \
-   __asm paddw mm7,mm7 \
-  /*r1=R1=A''+H'*/ \
-   __asm paddw mm1,mm2 \
-  /*r7=G'=E+G*/ \
-   __asm paddw mm7,mm4 \
-  /*r4=R4=E'-D'*/ \
-   __asm psubw mm4,mm3 \
-   __asm paddw mm3,mm3 \
-  /*r6=R6=F'-B''*/ \
-   __asm psubw mm6,mm5 \
-   __asm paddw mm5,mm5 \
-  /*r3=R3=E'+D'*/ \
-   __asm paddw mm3,mm4 \
-  /*r5=R5=F'+B''*/ \
-   __asm paddw mm5,mm6 \
-  /*r7=R7=G'-C'*/ \
-   __asm psubw mm7,mm0 \
-   __asm paddw mm0,mm0 \
-  /*Save R1.*/ \
-   __asm movq OC_I(1,_y),mm1 \
-  /*r0=R0=G'+C'*/ \
-   __asm paddw mm0,mm7 \
-}
-
-/*25+19=44 cycles'*/
-#define OC_COLUMN_IDCT_10(_y) __asm{ \
-  OC_IDCT_BEGIN_10(_y,_y) \
-  __asm paddw mm2,OC_8 \
-  /*r1=H'+H'*/ \
-  __asm paddw mm1,mm1 \
-  /*r1=R1=A''+H'*/ \
-  __asm paddw mm1,mm2 \
-  /*r2=NR2*/ \
-  __asm psraw mm2,4 \
-  /*r4=E'=E-G*/ \
-  __asm psubw mm4,mm7 \
-  /*r1=NR1*/ \
-  __asm psraw mm1,4 \
-  /*r3=D'*/ \
-  __asm movq mm3,OC_I(2,_y) \
-  /*r7=G+G*/ \
-  __asm paddw mm7,mm7 \
-  /*Store NR2 at I(2).*/ \
-  __asm movq OC_I(2,_y),mm2 \
-  /*r7=G'=E+G*/ \
-  __asm paddw mm7,mm4 \
-  /*Store NR1 at I(1).*/ \
-  __asm movq OC_I(1,_y),mm1 \
-  /*r4=R4=E'-D'*/ \
-  __asm psubw mm4,mm3 \
-  __asm paddw mm4,OC_8 \
-  /*r3=D'+D'*/ \
-  __asm paddw mm3,mm3 \
-  /*r3=R3=E'+D'*/ \
-  __asm paddw mm3,mm4 \
-  /*r4=NR4*/ \
-  __asm psraw mm4,4 \
-  /*r6=R6=F'-B''*/ \
-  __asm psubw mm6,mm5 \
-  /*r3=NR3*/ \
-  __asm psraw mm3,4 \
-  __asm paddw mm6,OC_8 \
-  /*r5=B''+B''*/ \
-  __asm paddw mm5,mm5 \
-  /*r5=R5=F'+B''*/ \
-  __asm paddw mm5,mm6 \
-  /*r6=NR6*/ \
-  __asm psraw mm6,4 \
-  /*Store NR4 at J(4).*/ \
-  __asm movq OC_J(4,_y),mm4 \
-  /*r5=NR5*/ \
-  __asm psraw mm5,4 \
-  /*Store NR3 at I(3).*/ \
-  __asm movq OC_I(3,_y),mm3 \
-  /*r7=R7=G'-C'*/ \
-  __asm psubw mm7,mm0 \
-  __asm paddw mm7,OC_8 \
-  /*r0=C'+C'*/ \
-  __asm paddw mm0,mm0 \
-  /*r0=R0=G'+C'*/ \
-  __asm paddw mm0,mm7 \
-  /*r7=NR7*/ \
-  __asm psraw mm7,4 \
-  /*Store NR6 at J(6).*/ \
-  __asm movq OC_J(6,_y),mm6 \
-  /*r0=NR0*/ \
-  __asm psraw mm0,4 \
-  /*Store NR5 at J(5).*/ \
-  __asm movq OC_J(5,_y),mm5 \
-  /*Store NR7 at J(7).*/ \
-  __asm movq OC_J(7,_y),mm7 \
-  /*Store NR0 at I(0).*/ \
-  __asm movq OC_I(0,_y),mm0 \
-}
-
-static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
-  __asm{
-#define CONSTS eax
-#define Y edx
-#define X ecx
-    mov CONSTS,offset OC_IDCT_CONSTS
-    mov Y,_y
-    mov X,_x
-#define OC_I(_k,_y) [(_y)+(_k)*16]
-#define OC_J(_k,_y) [(_y)+((_k)-4)*16+8]
-    /*Done with dequant, descramble, and partial transpose.
-      Now do the iDCT itself.*/
-    OC_ROW_IDCT_10(Y,X)
-    OC_TRANSPOSE(Y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y) [(_y)+(_k)*16]
-#define OC_J(_k,_y) OC_I(_k,_y)
-    OC_COLUMN_IDCT_10(Y)
-#undef  OC_I
-#undef  OC_J
-#define OC_I(_k,_y) [(_y)+(_k)*16+8]
-#define OC_J(_k,_y) OC_I(_k,_y)
-    OC_COLUMN_IDCT_10(Y)
-#undef  OC_I
-#undef  OC_J
-#undef  CONSTS
-#undef  Y
-#undef  X
-  }
-  if(_x!=_y){
-#define X ecx
-    __asm{
-      pxor mm0,mm0;
-      mov X,_x
-      movq [X+0x00],mm0
-      movq [X+0x10],mm0
-      movq [X+0x20],mm0
-      movq [X+0x30],mm0
-    }
-#undef  X
-  }
-}
-
-/*Performs an inverse 8x8 Type-II DCT transform.
-  The input is assumed to be scaled by a factor of 4 relative to orthonormal
-   version of the transform.*/
-void oc_idct8x8_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
-  /*_last_zzi is subtly different from an actual count of the number of
-     coefficients we decoded for this block.
-    It contains the value of zzi BEFORE the final token in the block was
-     decoded.
-    In most cases this is an EOB token (the continuation of an EOB run from a
-     previous block counts), and so this is the same as the coefficient count.
-    However, in the case that the last token was NOT an EOB token, but filled
-     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
-    Provided the last token was not a pure zero run, the minimum value it can
-     be is 46, and so that doesn't affect any of the cases in this routine.
-    However, if the last token WAS a pure zero run of length 63, then _last_zzi
-     will be 1 while the number of coefficients decoded is 64.
-    Thus, we will trigger the following special case, where the real
-     coefficient count would not.
-    Note also that a zero run of length 64 will give _last_zzi a value of 0,
-     but we still process the DC coefficient, which might have a non-zero value
-     due to DC prediction.
-    Although convoluted, this is arguably the correct behavior: it allows us to
-     use a smaller transform when the block ends with a long zero run instead
-     of a normal EOB token.
-    It could be smarter... multiple separate zero runs at the end of a block
-     will fool it, but an encoder that generates these really deserves what it
-     gets.
-    Needless to say we inherited this approach from VP3.*/
-  /*Perform the iDCT.*/
-  if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
-  else oc_idct8x8_slow(_y,_x);
-}
-
-#endif
diff --git a/media/libtheora/lib/x86_vc/mmxloop.h b/media/libtheora/lib/x86_vc/mmxloop.h
deleted file mode 100644
index 2561fca2a..000000000
--- a/media/libtheora/lib/x86_vc/mmxloop.h
+++ /dev/null
@@ -1,219 +0,0 @@
-#if !defined(_x86_vc_mmxloop_H)
-# define _x86_vc_mmxloop_H (1)
-# include <stddef.h>
-# include "x86int.h"
-
-#if defined(OC_X86_ASM)
-
-/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}.
-  On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and
-   mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/
-#define OC_LOOP_FILTER8_MMX __asm{ \
-  /*mm7=0*/ \
-  __asm pxor mm7,mm7 \
-  /*mm6:mm0={a0,...,a7}*/ \
-  __asm movq mm6,mm0 \
-  __asm punpcklbw mm0,mm7 \
-  __asm punpckhbw mm6,mm7 \
-  /*mm3:mm5={d0,...,d7}*/ \
-  __asm movq mm5,mm3 \
-  __asm punpcklbw mm3,mm7 \
-  __asm punpckhbw mm5,mm7 \
-  /*mm6:mm0={a0-d0,...,a7-d7}*/ \
-  __asm psubw mm0,mm3 \
-  __asm psubw mm6,mm5 \
-  /*mm3:mm1={b0,...,b7}*/ \
-  __asm movq mm3,mm1 \
-  __asm punpcklbw mm1,mm7 \
-  __asm movq mm4,mm2 \
-  __asm punpckhbw mm3,mm7 \
-  /*mm5:mm4={c0,...,c7}*/ \
-  __asm movq mm5,mm2 \
-  __asm punpcklbw mm4,mm7 \
-  __asm punpckhbw mm5,mm7 \
-  /*mm7={3}x4 \
-    mm5:mm4={c0-b0,...,c7-b7}*/ \
-  __asm pcmpeqw mm7,mm7 \
-  __asm psubw mm4,mm1 \
-  __asm psrlw mm7,14 \
-  __asm psubw mm5,mm3 \
-  /*Scale by 3.*/ \
-  __asm pmullw mm4,mm7 \
-  __asm pmullw mm5,mm7 \
-  /*mm7={4}x4 \
-    mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \
-  __asm psrlw mm7,1 \
-  __asm paddw mm4,mm0 \
-  __asm psllw mm7,2 \
-  __asm movq mm0,[LL] \
-  __asm paddw mm5,mm6 \
-  /*R_i has the range [-127,128], so we compute -R_i instead. \
-    mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \
-  __asm psubw mm4,mm7 \
-  __asm psubw mm5,mm7 \
-  __asm psraw mm4,3 \
-  __asm psraw mm5,3 \
-  __asm pcmpeqb mm7,mm7 \
-  __asm packsswb mm4,mm5 \
-  __asm pxor mm6,mm6 \
-  __asm pxor mm4,mm7 \
-  __asm packuswb mm1,mm3 \
-  /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \
-  /*There's no unsigned byte+signed byte with unsigned saturation op code, so \
-     we have to split things by sign (the other option is to work in 16 bits, \
-     but working in 8 bits gives much better parallelism). \
-    We compute abs(R_i), but save a mask of which terms were negative in mm6. \
-    Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \
-    Finally, we split mm4 into positive and negative pieces using the mask in \
-     mm6, and add and subtract them as appropriate.*/ \
-  /*mm4=abs(-R_i)*/ \
-  /*mm7=255-2*L*/ \
-  __asm pcmpgtb mm6,mm4 \
-  __asm psubb mm7,mm0 \
-  __asm pxor mm4,mm6 \
-  __asm psubb mm7,mm0 \
-  __asm psubb mm4,mm6 \
-  /*mm7=255-max(2*L-abs(R_i),0)*/ \
-  __asm paddusb mm7,mm4 \
-  /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \
-  __asm paddusb mm4,mm7 \
-  __asm psubusb mm4,mm7 \
-  /*Now split mm4 by the original sign of -R_i.*/ \
-  __asm movq mm5,mm4 \
-  __asm pand mm4,mm6 \
-  __asm pandn mm6,mm5 \
-  /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \
-  /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \
-  __asm paddusb mm1,mm4 \
-  __asm psubusb mm2,mm4 \
-  __asm psubusb mm1,mm6 \
-  __asm paddusb mm2,mm6 \
-}
-
-#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \
-  do{ \
-    /*Used local variable pix__ in order to fix compilation errors like: \
-       "error C2425: 'SHL' : non-constant expression in 'second operand'".*/ \
-    unsigned char *pix__; \
-    unsigned char *ll__; \
-    ll__=(_ll); \
-    pix__=(_pix); \
-    __asm mov YSTRIDE,_ystride \
-    __asm mov LL,ll__ \
-    __asm mov PIX,pix__ \
-    __asm sub PIX,YSTRIDE \
-    __asm sub PIX,YSTRIDE \
-    /*mm0={a0,...,a7}*/ \
-    __asm movq mm0,[PIX] \
-    /*ystride3=_ystride*3*/ \
-    __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \
-    /*mm3={d0,...,d7}*/ \
-    __asm movq mm3,[PIX+YSTRIDE3] \
-    /*mm1={b0,...,b7}*/ \
-    __asm movq mm1,[PIX+YSTRIDE] \
-    /*mm2={c0,...,c7}*/ \
-    __asm movq mm2,[PIX+YSTRIDE*2] \
-    OC_LOOP_FILTER8_MMX \
-    /*Write it back out.*/ \
-    __asm movq [PIX+YSTRIDE],mm1 \
-    __asm movq [PIX+YSTRIDE*2],mm2 \
-  } \
-  while(0)
-
-#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \
-  do{ \
-    /*Used local variable ll__ in order to fix compilation errors like: \
-       "error C2443: operand size conflict".*/ \
-    unsigned char *ll__; \
-    unsigned char *pix__; \
-    ll__=(_ll); \
-    pix__=(_pix)-2; \
-    __asm mov PIX,pix__ \
-    __asm mov YSTRIDE,_ystride \
-    __asm mov LL,ll__ \
-    /*x x x x d0 c0 b0 a0*/ \
-    __asm movd mm0,[PIX] \
-    /*x x x x d1 c1 b1 a1*/ \
-    __asm movd mm1,[PIX+YSTRIDE] \
-    /*ystride3=_ystride*3*/ \
-    __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \
-    /*x x x x d2 c2 b2 a2*/ \
-    __asm movd mm2,[PIX+YSTRIDE*2] \
-    /*x x x x d3 c3 b3 a3*/ \
-    __asm lea D,[PIX+YSTRIDE*4] \
-    __asm movd mm3,[PIX+YSTRIDE3] \
-    /*x x x x d4 c4 b4 a4*/ \
-    __asm movd mm4,[D] \
-    /*x x x x d5 c5 b5 a5*/ \
-    __asm movd mm5,[D+YSTRIDE] \
-    /*x x x x d6 c6 b6 a6*/ \
-    __asm movd mm6,[D+YSTRIDE*2] \
-    /*x x x x d7 c7 b7 a7*/ \
-    __asm movd mm7,[D+YSTRIDE3] \
-    /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \
-    __asm punpcklbw mm0,mm1 \
-    /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \
-    __asm punpcklbw mm2,mm3 \
-    /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \
-    __asm movq mm3,mm0 \
-    /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \
-    __asm punpcklwd mm0,mm2 \
-    /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \
-    __asm punpckhwd mm3,mm2 \
-    /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \
-    __asm movq mm1,mm0 \
-    /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \
-    __asm punpcklbw mm4,mm5 \
-    /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \
-    __asm punpcklbw mm6,mm7 \
-    /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \
-    __asm movq mm5,mm4 \
-    /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \
-    __asm punpcklwd mm4,mm6 \
-    /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \
-    __asm punpckhwd mm5,mm6 \
-    /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \
-    __asm movq mm2,mm3 \
-    /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \
-    __asm punpckldq mm0,mm4 \
-    /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \
-    __asm punpckhdq mm1,mm4 \
-    /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \
-    __asm punpckldq mm2,mm5 \
-    /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \
-    __asm punpckhdq mm3,mm5 \
-    OC_LOOP_FILTER8_MMX \
-    /*mm2={b0+R_0'',...,b7+R_7''}*/ \
-    __asm movq mm0,mm1 \
-    /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \
-    __asm punpcklbw mm1,mm2 \
-    /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \
-    __asm punpckhbw mm0,mm2 \
-    /*[d]=c1 b1 c0 b0*/ \
-    __asm movd D,mm1 \
-    __asm mov [PIX+1],D_WORD \
-    __asm psrlq mm1,32 \
-    __asm shr D,16 \
-    __asm mov [PIX+YSTRIDE+1],D_WORD \
-    /*[d]=c3 b3 c2 b2*/ \
-    __asm movd D,mm1 \
-    __asm mov [PIX+YSTRIDE*2+1],D_WORD \
-    __asm shr D,16 \
-    __asm mov [PIX+YSTRIDE3+1],D_WORD \
-    __asm lea PIX,[PIX+YSTRIDE*4] \
-    /*[d]=c5 b5 c4 b4*/ \
-    __asm movd D,mm0 \
-    __asm mov [PIX+1],D_WORD \
-    __asm psrlq mm0,32 \
-    __asm shr D,16 \
-    __asm mov [PIX+YSTRIDE+1],D_WORD \
-    /*[d]=c7 b7 c6 b6*/ \
-    __asm movd D,mm0 \
-    __asm mov [PIX+YSTRIDE*2+1],D_WORD \
-    __asm shr D,16 \
-    __asm mov [PIX+YSTRIDE3+1],D_WORD \
-  } \
-  while(0)
-
-# endif
-#endif
diff --git a/media/libtheora/lib/x86_vc/mmxstate.c b/media/libtheora/lib/x86_vc/mmxstate.c
deleted file mode 100644
index d3d468d5f..000000000
--- a/media/libtheora/lib/x86_vc/mmxstate.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of complete fragment reconstruction algorithm.
-  Originally written by Rudolf Marek.*/
-#include <string.h>
-#include "x86int.h"
-#include "mmxloop.h"
-
-#if defined(OC_X86_ASM)
-
-void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
-  unsigned char *dst;
-  ptrdiff_t      frag_buf_off;
-  int            ystride;
-  int            refi;
-  /*Apply the inverse transform.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    /*Note that this value must be unsigned, to keep the __asm__ block from
-       sign-extending it when it puts it in a register.*/
-    ogg_uint16_t p;
-    /*We round this dequant product (and not any of the others) because there's
-       no iDCT rounding.*/
-    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
-    /*Fill _dct_coeffs with p.*/
-    __asm{
-#define Y eax
-#define P ecx
-      mov Y,_dct_coeffs
-      movzx P,p
-      lea Y,[Y+128]
-      /*mm0=0000 0000 0000 AAAA*/
-      movd mm0,P
-      /*mm0=0000 0000 AAAA AAAA*/
-      punpcklwd mm0,mm0
-      /*mm0=AAAA AAAA AAAA AAAA*/
-      punpckldq mm0,mm0
-      movq [Y],mm0
-      movq [8+Y],mm0
-      movq [16+Y],mm0
-      movq [24+Y],mm0
-      movq [32+Y],mm0
-      movq [40+Y],mm0
-      movq [48+Y],mm0
-      movq [56+Y],mm0
-      movq [64+Y],mm0
-      movq [72+Y],mm0
-      movq [80+Y],mm0
-      movq [88+Y],mm0
-      movq [96+Y],mm0
-      movq [104+Y],mm0
-      movq [112+Y],mm0
-      movq [120+Y],mm0
-#undef Y
-#undef P
-    }
-  }
-  else{
-    /*Dequantize the DC coefficient.*/
-    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
-    oc_idct8x8_mmx(_dct_coeffs+64,_dct_coeffs,_last_zzi);
-  }
-  /*Fill in the target buffer.*/
-  frag_buf_off=_state->frag_buf_offs[_fragi];
-  refi=_state->frags[_fragi].refi;
-  ystride=_state->ref_ystride[_pli];
-  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
-  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
-  else{
-    const unsigned char *ref;
-    int                  mvoffsets[2];
-    ref=_state->ref_frame_data[refi]+frag_buf_off;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
-     _state->frag_mvs[_fragi])>1){
-      oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
-       _dct_coeffs+64);
-    }
-    else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
-  }
-}
-
-/*We copy these entire function to inline the actual MMX routines so that we
-   use only a single indirect call.*/
-
-void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
-  memset(_bv,~(_flimit<<1),8);
-}
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
-  The filter may be run on the bottom edge, affecting pixels in the next row of
-   fragments, so this row also needs to be available.
-  _bv:        The bounding values array.
-  _refi:      The index of the frame buffer to filter.
-  _pli:       The color plane to filter.
-  _fragy0:    The Y coordinate of the first fragment row to filter.
-  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
-  const oc_fragment_plane *fplane;
-  const oc_fragment       *frags;
-  const ptrdiff_t         *frag_buf_offs;
-  unsigned char           *ref_frame_data;
-  ptrdiff_t                fragi_top;
-  ptrdiff_t                fragi_bot;
-  ptrdiff_t                fragi0;
-  ptrdiff_t                fragi0_end;
-  int                      ystride;
-  int                      nhfrags;
-  fplane=_state->fplanes+_pli;
-  nhfrags=fplane->nhfrags;
-  fragi_top=fplane->froffset;
-  fragi_bot=fragi_top+fplane->nfrags;
-  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
-  fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
-  ystride=_state->ref_ystride[_pli];
-  frags=_state->frags;
-  frag_buf_offs=_state->frag_buf_offs;
-  ref_frame_data=_state->ref_frame_data[_refi];
-  /*The following loops are constructed somewhat non-intuitively on purpose.
-    The main idea is: if a block boundary has at least one coded fragment on
-     it, the filter is applied to it.
-    However, the order that the filters are applied in matters, and VP3 chose
-     the somewhat strange ordering used below.*/
-  while(fragi0<fragi0_end){
-    ptrdiff_t fragi;
-    ptrdiff_t fragi_end;
-    fragi=fragi0;
-    fragi_end=fragi+nhfrags;
-    while(fragi<fragi_end){
-      if(frags[fragi].coded){
-        unsigned char *ref;
-        ref=ref_frame_data+frag_buf_offs[fragi];
-#define PIX eax
-#define YSTRIDE3 edi
-#define YSTRIDE ecx
-#define LL edx
-#define D esi
-#define D_WORD si
-        if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,_bv);
-        if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,_bv);
-        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
-          OC_LOOP_FILTER_H_MMX(ref+8,ystride,_bv);
-        }
-        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
-          OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,_bv);
-        }
-#undef PIX
-#undef YSTRIDE3
-#undef YSTRIDE
-#undef LL
-#undef D
-#undef D_WORD
-      }
-      fragi++;
-    }
-    fragi0+=nhfrags;
-  }
-}
-
-#endif
diff --git a/media/libtheora/lib/x86_vc/x86cpu.c b/media/libtheora/lib/x86_vc/x86cpu.c
deleted file mode 100644
index 41f4bcba9..000000000
--- a/media/libtheora/lib/x86_vc/x86cpu.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
- CPU capability detection for x86 processors.
-  Originally written by Rudolf Marek.
-
- function:
-  last mod: $Id: x86cpu.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#include "x86cpu.h"
-
-#if !defined(OC_X86_ASM)
-ogg_uint32_t oc_cpu_flags_get(void){
-  return 0;
-}
-#else
-/*Why does MSVC need this complicated rigamarole?
-  At this point I honestly do not care.*/
-
-/*Visual C cpuid helper function.
-  For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
-   for VS2003 users, so we do it in inline assembler.*/
-static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
-  _asm{
-    mov eax,[_op]
-    mov esi,_cpu_info
-    cpuid
-    mov [esi+0],eax
-    mov [esi+4],ebx
-    mov [esi+8],ecx
-    mov [esi+12],edx
-  }
-}
-
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  do{ \
-    ogg_uint32_t cpu_info[4]; \
-    oc_cpuid_helper(cpu_info,_op); \
-    (_eax)=cpu_info[0]; \
-    (_ebx)=cpu_info[1]; \
-    (_ecx)=cpu_info[2]; \
-    (_edx)=cpu_info[3]; \
-  }while(0)
-
-static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
-  _asm{
-    pushfd
-    pushfd
-    pop eax
-    mov ebx,eax
-    xor eax,200000h
-    push eax
-    popfd
-    pushfd
-    pop eax
-    popfd
-    mov ecx,_eax
-    mov [ecx],eax
-    mov ecx,_ebx
-    mov [ecx],ebx
-  }
-}
-
-static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
-  ogg_uint32_t flags;
-  /*If there isn't even MMX, give up.*/
-  if(!(_edx&0x00800000))return 0;
-  flags=OC_CPU_X86_MMX;
-  if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
-  if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
-  if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
-  if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
-  if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
-  if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
-  return flags;
-}
-
-static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
-  ogg_uint32_t flags;
-  /*If there isn't even MMX, give up.*/
-  if(!(_edx&0x00800000))return 0;
-  flags=OC_CPU_X86_MMX;
-  if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
-  if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
-  if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
-  if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
-  if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
-  return flags;
-}
-
-ogg_uint32_t oc_cpu_flags_get(void){
-  ogg_uint32_t flags;
-  ogg_uint32_t eax;
-  ogg_uint32_t ebx;
-  ogg_uint32_t ecx;
-  ogg_uint32_t edx;
-# if !defined(__amd64__)&&!defined(__x86_64__)
-  /*Not all x86-32 chips support cpuid, so we have to check.*/
-  oc_detect_cpuid_helper(&eax,&ebx);
-  /*No cpuid.*/
-  if(eax==ebx)return 0;
-# endif
-  cpuid(0,eax,ebx,ecx,edx);
-  /*         l e t n          I e n i          u n e G*/
-  if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
-   /*      6 8 x M          T e n i          u n e G*/
-   ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
-    int family;
-    int model;
-    /*Intel, Transmeta (tested with Crusoe TM5800):*/
-    cpuid(1,eax,ebx,ecx,edx);
-    flags=oc_parse_intel_flags(edx,ecx);
-    family=(eax>>8)&0xF;
-    model=(eax>>4)&0xF;
-    /*The SSE unit on the Pentium M and Core Duo is much slower than the MMX
-       unit, so don't use it.*/
-    if(family==6&&(model==9||model==13||model==14)){
-      flags&=~(OC_CPU_X86_SSE2|OC_CPU_X86_PNI);
-    }
-  }
-  /*              D M A c          i t n e          h t u A*/
-  else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
-   /*      C S N            y b   e          d o e G*/
-   ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
-    /*AMD, Geode:*/
-    cpuid(0x80000000,eax,ebx,ecx,edx);
-    if(eax<0x80000001)flags=0;
-    else{
-      cpuid(0x80000001,eax,ebx,ecx,edx);
-      flags=oc_parse_amd_flags(edx,ecx);
-    }
-    /*Also check for SSE.*/
-    cpuid(1,eax,ebx,ecx,edx);
-    flags|=oc_parse_intel_flags(edx,ecx);
-  }
-  /*Technically some VIA chips can be configured in the BIOS to return any
-     string here the user wants.
-    There is a special detection method that can be used to identify such
-     processors, but in my opinion, if the user really wants to change it, they
-     deserve what they get.*/
-  /*              s l u a          H r u a          t n e C*/
-  else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
-    /*VIA:*/
-    /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
-       chips (thanks to the engineers from Centaur Technology who provided it).
-      These chips support Intel-like cpuid info.
-      The C3-2 (Nehemiah) cores appear to, as well.*/
-    cpuid(1,eax,ebx,ecx,edx);
-    flags=oc_parse_intel_flags(edx,ecx);
-    if(eax>=0x80000001){
-      /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
-        We need to check this even if the Intel test succeeds to pick up 3DNow!
-         support on these processors.
-        Unlike actual AMD processors, we cannot _rely_ on this info, since
-         some cores (e.g., the 693 stepping of the Nehemiah) claim to support
-         this function, yet return edx=0, despite the Intel test indicating
-         MMX support.
-        Therefore the features detected here are strictly added to those
-         detected by the Intel test.*/
-      /*TODO: How about earlier chips?*/
-      cpuid(0x80000001,eax,ebx,ecx,edx);
-      /*Note: As of the C7, this function returns Intel-style extended feature
-         flags, not AMD-style.
-        Currently, this only defines bits 11, 20, and 29 (0x20100800), which
-         do not conflict with any of the AMD flags we inspect.
-        For the remaining bits, Intel tells us, "Do not count on their value",
-         but VIA assures us that they will all be zero (at least on the C7 and
-         Isaiah chips).
-        In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
-         (0xC0C00000) for something else, we will have to add code to detect
-         the model to decide when it is appropriate to inspect them.*/
-      flags|=oc_parse_amd_flags(edx,ecx);
-    }
-  }
-  else{
-    /*Implement me.*/
-    flags=0;
-  }
-  return flags;
-}
-#endif
diff --git a/media/libtheora/lib/x86_vc/x86cpu.h b/media/libtheora/lib/x86_vc/x86cpu.h
deleted file mode 100644
index 327d93246..000000000
--- a/media/libtheora/lib/x86_vc/x86cpu.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
- function:
-    last mod: $Id: x86cpu.h 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_vc_x86cpu_H)
-# define _x86_vc_x86cpu_H (1)
-#include "../internal.h"
-
-#define OC_CPU_X86_MMX      (1<<0)
-#define OC_CPU_X86_3DNOW    (1<<1)
-#define OC_CPU_X86_3DNOWEXT (1<<2)
-#define OC_CPU_X86_MMXEXT   (1<<3)
-#define OC_CPU_X86_SSE      (1<<4)
-#define OC_CPU_X86_SSE2     (1<<5)
-#define OC_CPU_X86_PNI      (1<<6)
-#define OC_CPU_X86_SSSE3    (1<<7)
-#define OC_CPU_X86_SSE4_1   (1<<8)
-#define OC_CPU_X86_SSE4_2   (1<<9)
-#define OC_CPU_X86_SSE4A    (1<<10)
-#define OC_CPU_X86_SSE5     (1<<11)
-
-ogg_uint32_t oc_cpu_flags_get(void);
-
-#endif
diff --git a/media/libtheora/lib/x86_vc/x86int.h b/media/libtheora/lib/x86_vc/x86int.h
deleted file mode 100644
index bc4c54a2f..000000000
--- a/media/libtheora/lib/x86_vc/x86int.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86int.h 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_vc_x86int_H)
-# define _x86_vc_x86int_H (1)
-# include "../internal.h"
-# if defined(OC_X86_ASM)
-#  define oc_state_accel_init oc_state_accel_init_x86
-#  define OC_STATE_USE_VTABLE (1)
-# endif
-# include "../state.h"
-# include "x86cpu.h"
-
-void oc_state_accel_init_x86(oc_theora_state *_state);
-
-void oc_frag_copy_mmx(unsigned char *_dst,
- const unsigned char *_src,int _ystride);
-void oc_frag_copy_list_mmx(unsigned char *_dst_frame,
- const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_mmx(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
-void oc_idct8x8_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
-void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit);
-void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-void oc_restore_fpu_mmx(void);
-
-#endif
diff --git a/media/libtheora/lib/x86_vc/x86state.c b/media/libtheora/lib/x86_vc/x86state.c
deleted file mode 100644
index 7aa73deae..000000000
--- a/media/libtheora/lib/x86_vc/x86state.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86state.c 17410 2010-09-21 21:53:48Z tterribe $
-
- ********************************************************************/
-
-#include "x86int.h"
-
-#if defined(OC_X86_ASM)
-
-/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
-   each quadrant of the destination.*/
-static const unsigned char OC_FZIG_ZAG_MMX[128]={
-   0, 8, 1, 2, 9,16,24,17,
-  10, 3,32,11,18,25, 4,12,
-   5,26,19,40,33,34,41,48,
-  27, 6,13,20,28,21,14, 7,
-  56,49,42,35,43,50,57,36,
-  15,22,29,30,23,44,37,58,
-  51,59,38,45,52,31,60,53,
-  46,39,47,54,61,62,55,63,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-  64,64,64,64,64,64,64,64,
-};
-
-void oc_state_accel_init_x86(oc_theora_state *_state){
-  _state->cpu_flags=oc_cpu_flags_get();
-  if(_state->cpu_flags&OC_CPU_X86_MMX){
-    _state->opt_vtable.frag_copy=oc_frag_copy_mmx;
-    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_mmx;
-    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
-    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
-    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
-    _state->opt_vtable.idct8x8=oc_idct8x8_mmx;
-    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
-    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_mmx;
-    _state->opt_vtable.state_loop_filter_frag_rows=
-     oc_state_loop_filter_frag_rows_mmx;
-    _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
-    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
-  }
-  else oc_state_accel_init_c(_state);
-}
-#endif