summaryrefslogtreecommitdiff
path: root/media/libtheora/lib/huffdec.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/libtheora/lib/huffdec.c')
-rw-r--r--media/libtheora/lib/huffdec.c521
1 files changed, 521 insertions, 0 deletions
diff --git a/media/libtheora/lib/huffdec.c b/media/libtheora/lib/huffdec.c
new file mode 100644
index 0000000000..fe013c611c
--- /dev/null
+++ b/media/libtheora/lib/huffdec.c
@@ -0,0 +1,521 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffdec.h"
+#include "decint.h"
+
+
+
+/*Instead of storing every branching in the tree, subtrees can be collapsed
+ into one node, with a table of size 1<<nbits pointing directly to its
+ descedents nbits levels down.
+ This allows more than one bit to be read at a time, and avoids following all
+ the intermediate branches with next to no increased code complexity once
+ the collapsed tree has been built.
+ We do _not_ require that a subtree be complete to be collapsed, but instead
+ store duplicate pointers in the table, and record the actual depth of the
+ node below its parent.
+ This tells us the number of bits to advance the stream after reaching it.
+
+ This turns out to be equivalent to the method described in \cite{Hash95},
+ without the requirement that codewords be sorted by length.
+ If the codewords were sorted by length (so-called ``canonical-codes''), they
+ could be decoded much faster via either Lindell and Moffat's approach or
+ Hashemian's Condensed Huffman Code approach, the latter of which has an
+ extremely small memory footprint.
+ We can't use Choueka et al.'s finite state machine approach, which is
+ extremely fast, because we can't allow multiple symbols to be output at a
+ time; the codebook can and does change between symbols.
+ It also has very large memory requirements, which impairs cache coherency.
+
+ We store the tree packed in an array of 16-bit integers (words).
+ Each node consists of a single word, followed consecutively by two or more
+ indices of its children.
+ Let n be the value of this first word.
+ This is the number of bits that need to be read to traverse the node, and
+ must be positive.
+ 1<<n entries follow in the array, each an index to a child node.
+ If the child is positive, then it is the index of another internal node in
+ the table.
+ If the child is negative or zero, then it is a leaf node.
+ These are stored directly in the child pointer to save space, since they only
+ require a single word.
+ If a leaf node would have been encountered before reading n bits, then it is
+ duplicated the necessary number of times in this table.
+ Leaf nodes pack both a token value and their actual depth in the tree.
+ The token in the leaf node is (-leaf&255).
+ The number of bits that need to be consumed to reach the leaf, starting from
+ the current node, is (-leaf>>8).
+
+ @ARTICLE{Hash95,
+ author="Reza Hashemian",
+ title="Memory Efficient and High-Speed Search {Huffman} Coding",
+ journal="{IEEE} Transactions on Communications",
+ volume=43,
+ number=10,
+ pages="2576--2581",
+ month=Oct,
+ year=1995
+ }*/
+
+
+
+/*The map from external spec-defined tokens to internal tokens.
+ This is constructed so that any extra bits read with the original token value
+ can be masked off the least significant bits of its internal token index.
+ In addition, all of the tokens which require additional extra bits are placed
+ at the start of the list, and grouped by type.
+ OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
+ giving it index 0 may simplify comparisons on some architectures.
+ These requirements require some substantial reordering.*/
+static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
+ /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+ 15,
+ /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+ 16,
+ /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+ 17,
+ /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
+ 88,
+ /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
+ 80,
+ /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+ 1,
+ /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+ 0,
+ /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
+ 48,
+ /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
+ 14,
+ /*OC_ONE_TOKEN (0 extra bits)*/
+ 56,
+ /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+ 57,
+ /*OC_TWO_TOKEN (0 extra bits)*/
+ 58,
+ /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+ 59,
+ /*OC_DCT_VAL_CAT2 (1 extra bit)*/
+ 60,
+ 62,
+ 64,
+ 66,
+ /*OC_DCT_VAL_CAT3 (2 extra bits)*/
+ 68,
+ /*OC_DCT_VAL_CAT4 (3 extra bits)*/
+ 72,
+ /*OC_DCT_VAL_CAT5 (4 extra bits)*/
+ 2,
+ /*OC_DCT_VAL_CAT6 (5 extra bits)*/
+ 4,
+ /*OC_DCT_VAL_CAT7 (6 extra bits)*/
+ 6,
+ /*OC_DCT_VAL_CAT8 (10 extra bits)*/
+ 8,
+ /*OC_DCT_RUN_CAT1A (1 extra bit)*/
+ 18,
+ 20,
+ 22,
+ 24,
+ 26,
+ /*OC_DCT_RUN_CAT1B (3 extra bits)*/
+ 32,
+ /*OC_DCT_RUN_CAT1C (4 extra bits)*/
+ 12,
+ /*OC_DCT_RUN_CAT2A (2 extra bits)*/
+ 28,
+ /*OC_DCT_RUN_CAT2B (3 extra bits)*/
+ 40
+};
+
+/*The log base 2 of number of internal tokens associated with each of the spec
+ tokens (i.e., how many of the extra bits are folded into the token value).
+ Increasing the maximum value beyond 3 will enlarge the amount of stack
+ required for tree construction.*/
+static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
+ 0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
+};
+
+
+/*The size a lookup table is allowed to grow to relative to the number of
+ unique nodes it contains.
+ E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
+ wasted (1/4 of the space must be used).
+ Larger numbers can decode tokens with fewer read operations, while smaller
+ numbers may save more space.
+ With a sample file:
+ 32233473 read calls are required when no tree collapsing is done (100.0%).
+ 19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
+ 11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
+ 10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
+ 10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
+ Since a value of 2 gets us the vast majority of the speed-up with only a
+ small amount of wasted memory, this is what we use.
+ This value must be less than 128, or you could create a tree with more than
+ 32767 entries, which would overflow the 16-bit words used to index it.*/
+#define OC_HUFF_SLUSH (2)
+/*The root of the tree is on the fast path, and a larger value here is more
+ beneficial than elsewhere in the tree.
+ 7 appears to give the best performance, trading off between increased use of
+ the single-read fast path and cache footprint for the tables, though
+ obviously this will depend on your cache size.
+ Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
+#define OC_ROOT_HUFF_SLUSH (7)
+
+
+
+/*Unpacks a Huffman codebook.
+ _opb: The buffer to unpack from.
+ _tokens: Stores a list of internal tokens, in the order they were found in
+ the codebook, and the lengths of their corresponding codewords.
+ This is enough to completely define the codebook, while minimizing
+ stack usage and avoiding temporary allocations (for platforms
+ where free() is a no-op).
+ Return: The number of internal tokens in the codebook, or a negative value
+ on error.*/
+int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
+ ogg_uint32_t code;
+ int len;
+ int ntokens;
+ int nleaves;
+ code=0;
+ len=ntokens=nleaves=0;
+ for(;;){
+ long bits;
+ bits=oc_pack_read1(_opb);
+ /*Only process nodes so long as there's more bits in the buffer.*/
+ if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+ /*Read an internal node:*/
+ if(!bits){
+ len++;
+ /*Don't allow codewords longer than 32 bits.*/
+ if(len>32)return TH_EBADHEADER;
+ }
+ /*Read a leaf node:*/
+ else{
+ ogg_uint32_t code_bit;
+ int neb;
+ int nentries;
+ int token;
+ /*Don't allow more than 32 spec-tokens per codebook.*/
+ if(++nleaves>32)return TH_EBADHEADER;
+ bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
+ neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
+ token=OC_DCT_TOKEN_MAP[bits];
+ nentries=1<<neb;
+ while(nentries-->0){
+ _tokens[ntokens][0]=(unsigned char)token++;
+ _tokens[ntokens][1]=(unsigned char)(len+neb);
+ ntokens++;
+ }
+ code_bit=0x80000000U>>len-1;
+ while(len>0&&(code&code_bit)){
+ code^=code_bit;
+ code_bit<<=1;
+ len--;
+ }
+ if(len<=0)break;
+ code|=code_bit;
+ }
+ }
+ return ntokens;
+}
+
+/*Count how many tokens would be required to fill a subtree at depth _depth.
+ _tokens: A list of internal tokens, in the order they are found in the
+ codebook, and the lengths of their corresponding codewords.
+ _depth: The depth of the desired node in the corresponding tree structure.
+ Return: The number of tokens that belong to that subtree.*/
+static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
+ ogg_uint32_t code;
+ int ti;
+ code=0;
+ ti=0;
+ do{
+ if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
+ else{
+ /*Because of the expanded internal tokens, we can have codewords as long
+ as 35 bits.
+ A single recursion here is enough to advance past them.*/
+ code++;
+ ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
+ }
+ }
+ while(code<0x80000000U);
+ return ti;
+}
+
+/*Compute the number of bits to use for a collapsed tree node at the given
+ depth.
+ _tokens: A list of internal tokens, in the order they are found in the
+ codebook, and the lengths of their corresponding codewords.
+ _ntokens: The number of tokens corresponding to this tree node.
+ _depth: The depth of this tree node.
+ Return: The number of bits to use for a collapsed tree node rooted here.
+ This is always at least one, even if this was a leaf node.*/
+static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
+ int _ntokens,int _depth){
+ int got_leaves;
+ int loccupancy;
+ int occupancy;
+ int slush;
+ int nbits;
+ int best_nbits;
+ slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
+ /*It's legal to have a tree with just a single node, which requires no bits
+ to decode and always returns the same token.
+ However, no encoder actually does this (yet).
+ To avoid a special case in oc_huff_token_decode(), we force the number of
+ lookahead bits to be at least one.
+ This will produce a tree that looks ahead one bit and then advances the
+ stream zero bits.*/
+ nbits=1;
+ occupancy=2;
+ got_leaves=1;
+ do{
+ int ti;
+ if(got_leaves)best_nbits=nbits;
+ nbits++;
+ got_leaves=0;
+ loccupancy=occupancy;
+ for(occupancy=ti=0;ti<_ntokens;occupancy++){
+ if(_tokens[ti][1]<_depth+nbits)ti++;
+ else if(_tokens[ti][1]==_depth+nbits){
+ got_leaves=1;
+ ti++;
+ }
+ else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
+ }
+ }
+ while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
+ return best_nbits;
+}
+
+/*Determines the size in words of a Huffman tree node that represents a
+ subtree of depth _nbits.
+ _nbits: The depth of the subtree.
+ This must be greater than zero.
+ Return: The number of words required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+ return 1+(1<<_nbits);
+}
+
+/*Produces a collapsed-tree representation of the given token list.
+ _tree: The storage for the collapsed Huffman tree.
+ This may be NULL to compute the required storage size instead of
+ constructing the tree.
+ _tokens: A list of internal tokens, in the order they are found in the
+ codebook, and the lengths of their corresponding codewords.
+ _ntokens: The number of tokens corresponding to this tree node.
+ Return: The number of words required to store the tree.*/
+#if defined(_MSC_VER) && _MSC_VER >= 1700
+#pragma optimize( "", off )
+#endif
+static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
+ unsigned char _tokens[][2],int _ntokens){
+ ogg_int16_t node[34];
+ unsigned char depth[34];
+ unsigned char last[34];
+ size_t ntree;
+ int ti;
+ int l;
+ depth[0]=0;
+ last[0]=(unsigned char)(_ntokens-1);
+ ntree=0;
+ ti=0;
+ l=0;
+ do{
+ int nbits;
+ nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
+ node[l]=(ogg_int16_t)ntree;
+ ntree+=oc_huff_node_size(nbits);
+ if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
+ do{
+ while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
+ if(_tree!=NULL){
+ ogg_int16_t leaf;
+ int nentries;
+ nentries=1<<depth[l]+nbits-_tokens[ti][1];
+ leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
+ while(nentries-->0)_tree[node[l]++]=leaf;
+ }
+ ti++;
+ }
+ if(ti<=last[l]){
+ /*We need to recurse*/
+ depth[l+1]=(unsigned char)(depth[l]+nbits);
+ if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
+ l++;
+ last[l]=
+ (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
+ break;
+ }
+ /*Pop back up a level of recursion.*/
+ else if(l-->0)nbits=depth[l+1]-depth[l];
+ }
+ while(l>=0);
+ }
+ while(l>=0);
+ return ntree;
+}
+#if defined(_MSC_VER) && _MSC_VER >= 1700
+#pragma optimize( "", on )
+#endif
+
+/*Unpacks a set of Huffman trees, and reduces them to a collapsed
+ representation.
+ _opb: The buffer to unpack the trees from.
+ _nodes: The table to fill with the Huffman trees.
+ Return: 0 on success, or a negative value on error.
+ The caller is responsible for cleaning up any partially initialized
+ _nodes on failure.*/
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
+ int i;
+ for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+ unsigned char tokens[256][2];
+ int ntokens;
+ ogg_int16_t *tree;
+ size_t size;
+ /*Unpack the full tree into a temporary buffer.*/
+ ntokens=oc_huff_tree_unpack(_opb,tokens);
+ if(ntokens<0)return ntokens;
+ /*Figure out how big the collapsed tree will be and allocate space for it.*/
+ size=oc_huff_tree_collapse(NULL,tokens,ntokens);
+ /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
+ OC_ROOT_HUFF_SLUSH too large.*/
+ if(size>32767)return TH_EIMPL;
+ tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
+ if(tree==NULL)return TH_EFAULT;
+ /*Construct the collapsed the tree.*/
+ oc_huff_tree_collapse(tree,tokens,ntokens);
+ _nodes[i]=tree;
+ }
+ return 0;
+}
+
+/*Determines the size in words of a Huffman subtree.
+ _tree: The complete Huffman tree.
+ _node: The index of the root of the desired subtree.
+ Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
+ size_t size;
+ int nchildren;
+ int n;
+ int i;
+ n=_tree[_node];
+ size=oc_huff_node_size(n);
+ nchildren=1<<n;
+ i=0;
+ do{
+ int child;
+ child=_tree[_node+i+1];
+ if(child<=0)i+=1<<n-(-child>>8);
+ else{
+ size+=oc_huff_tree_size(_tree,child);
+ i++;
+ }
+ }
+ while(i<nchildren);
+ return size;
+}
+
+/*Makes a copy of the given set of Huffman trees.
+ _dst: The array to store the copy in.
+ _src: The array of trees to copy.*/
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
+ int total;
+ int i;
+ total=0;
+ for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+ size_t size;
+ size=oc_huff_tree_size(_src[i],0);
+ total+=size;
+ _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
+ if(_dst[i]==NULL){
+ while(i-->0)_ogg_free(_dst[i]);
+ return TH_EFAULT;
+ }
+ memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
+ }
+ return 0;
+}
+
+/*Frees the memory used by a set of Huffman trees.
+ _nodes: The array of trees to free.*/
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
+ int i;
+ for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
+}
+
+
+/*Unpacks a single token using the given Huffman tree.
+ _opb: The buffer to unpack the token from.
+ _node: The tree to unpack the token with.
+ Return: The token value.*/
+int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
+ const unsigned char *ptr;
+ const unsigned char *stop;
+ oc_pb_window window;
+ int available;
+ long bits;
+ int node;
+ int n;
+ ptr=_opb->ptr;
+ window=_opb->window;
+ stop=_opb->stop;
+ available=_opb->bits;
+ node=0;
+ for(;;){
+ n=_tree[node];
+ if(n>available){
+ unsigned shift;
+ shift=OC_PB_WINDOW_SIZE-available;
+ do{
+ /*We don't bother setting eof because we won't check for it after we've
+ started decoding DCT tokens.*/
+ if(ptr>=stop){
+ shift=(unsigned)-OC_LOTS_OF_BITS;
+ break;
+ }
+ shift-=8;
+ window|=(oc_pb_window)*ptr++<<shift;
+ }
+ while(shift>=8);
+ /*Note: We never request more than 24 bits, so there's no need to fill in
+ the last partial byte here.*/
+ available=OC_PB_WINDOW_SIZE-shift;
+ }
+ bits=window>>OC_PB_WINDOW_SIZE-n;
+ node=_tree[node+1+bits];
+ if(node<=0)break;
+ window<<=n;
+ available-=n;
+ }
+ node=-node;
+ n=node>>8;
+ window<<=n;
+ available-=n;
+ _opb->ptr=ptr;
+ _opb->window=window;
+ _opb->bits=available;
+ return node&255;
+}