1 files changed, 0 insertions, 1007 deletions
diff --git a/third_party/aom/av1/common/pvq.c b/third_party/aom/av1/common/pvq.c
deleted file mode 100644
index 221c90c04c..0000000000
--- a/third_party/aom/av1/common/pvq.c
+++ /dev/null
@@ -1,1007 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "odintrin.h"
-#include "partition.h"
-#include "pvq.h"
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* Imported from encode.c in daala */
-/* These are the PVQ equivalent of quantization matrices, except that
-   the values are per-band. */
-#define OD_MASKING_DISABLED 0
-#define OD_MASKING_ENABLED 1
-
-const unsigned char OD_LUMA_QM_Q4[2][OD_QM_SIZE] = {
-/* Flat quantization for PSNR. The DC component isn't 16 because the DC
-   magnitude compensation is done here for inter (Haar DC doesn't need it).
-   Masking disabled: */
- {
-  16, 16,
-  16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16
- },
-/* The non-flat AC coefficients compensate for the non-linear scaling caused
-   by activity masking. The values are currently hand-tuned so that the rate
-   of each band remains roughly constant when enabling activity masking
-   on intra.
-   Masking enabled: */
- {
-  16, 16,
-  16, 18, 28, 32,
-  16, 14, 20, 20, 28, 32,
-  16, 11, 14, 14, 17, 17, 22, 28
- }
-};
-
-const unsigned char OD_CHROMA_QM_Q4[2][OD_QM_SIZE] = {
-/* Chroma quantization is different because of the reduced lapping.
-   FIXME: Use the same matrix as luma for 4:4:4.
-   Masking disabled: */
- {
-  16, 16,
-  16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16
- },
-/* The AC part is flat for chroma because it has no activity masking.
-   Masking enabled: */
- {
-  16, 16,
-  16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16
- }
-};
-
-/* No interpolation, always use od_flat_qm_q4, but use a different scale for
-   each plane.
-   FIXME: Add interpolation and properly tune chroma. */
-const od_qm_entry OD_DEFAULT_QMS[2][2][OD_NPLANES_MAX] = {
-  /* Masking disabled */
-  { { { 4, 256, OD_LUMA_QM_Q4[OD_MASKING_DISABLED] },
-      { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED] },
-      { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED] } },
-    { { 0, 0, NULL},
-      { 0, 0, NULL},
-      { 0, 0, NULL} } },
-  /* Masking enabled */
-  { { { 4, 256, OD_LUMA_QM_Q4[OD_MASKING_ENABLED] },
-      { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED] },
-      { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED] } },
-    { { 0, 0, NULL},
-      { 0, 0, NULL},
-      { 0, 0, NULL} } }
-};
-
-/* Constants for the beta parameter, which controls how activity masking is
-   used.
-   beta = 1 / (1 - alpha), so when beta is 1, alpha is 0 and activity
-   masking is disabled. When beta is 1.5, activity masking is used. Note that
-   activity masking is neither used for 4x4 blocks nor for chroma. */
-#define OD_BETA(b) OD_QCONST32(b, OD_BETA_SHIFT)
-static const od_val16 OD_PVQ_BETA4_LUMA[1] = {OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA8_LUMA[4] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA16_LUMA[7] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA32_LUMA[10] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-
-static const od_val16 OD_PVQ_BETA4_LUMA_MASKING[1] = {OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA8_LUMA_MASKING[4] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
-static const od_val16 OD_PVQ_BETA16_LUMA_MASKING[7] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
- OD_BETA(1.5)};
-static const od_val16 OD_PVQ_BETA32_LUMA_MASKING[10] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
-
-static const od_val16 OD_PVQ_BETA4_CHROMA[1] = {OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA8_CHROMA[4] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA16_CHROMA[7] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA32_CHROMA[10] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-
-const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1] = {
- {{OD_PVQ_BETA4_LUMA, OD_PVQ_BETA8_LUMA,
-   OD_PVQ_BETA16_LUMA, OD_PVQ_BETA32_LUMA},
-  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
-   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
-  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
-   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}},
- {{OD_PVQ_BETA4_LUMA_MASKING, OD_PVQ_BETA8_LUMA_MASKING,
-   OD_PVQ_BETA16_LUMA_MASKING, OD_PVQ_BETA32_LUMA_MASKING},
-  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
-   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
-  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
-   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}}
-};
-
-
-void od_interp_qm(unsigned char *out, int q, const od_qm_entry *entry1,
-  const od_qm_entry *entry2) {
-  int i;
-  if (entry2 == NULL || entry2->qm_q4 == NULL
-   || q < entry1->interp_q << OD_COEFF_SHIFT) {
-    /* Use entry1. */
-    for (i = 0; i < OD_QM_SIZE; i++) {
-      out[i] = OD_MINI(255, entry1->qm_q4[i]*entry1->scale_q8 >> 8);
-    }
-  }
-  else if (entry1 == NULL || entry1->qm_q4 == NULL
-   || q > entry2->interp_q << OD_COEFF_SHIFT) {
-    /* Use entry2. */
-    for (i = 0; i < OD_QM_SIZE; i++) {
-      out[i] = OD_MINI(255, entry2->qm_q4[i]*entry2->scale_q8 >> 8);
-    }
-  }
-  else {
-    /* Interpolate between entry1 and entry2. The interpolation is linear
-       in terms of log(q) vs log(m*scale). Considering that we're ultimately
-       multiplying the result it makes sense, but we haven't tried other
-       interpolation methods. */
-    double x;
-    const unsigned char *m1;
-    const unsigned char *m2;
-    int q1;
-    int q2;
-    m1 = entry1->qm_q4;
-    m2 = entry2->qm_q4;
-    q1 = entry1->interp_q << OD_COEFF_SHIFT;
-    q2 = entry2->interp_q << OD_COEFF_SHIFT;
-    x = (log(q)-log(q1))/(log(q2)-log(q1));
-    for (i = 0; i < OD_QM_SIZE; i++) {
-      out[i] = OD_MINI(255, (int)floor(.5 + (1./256)*exp(
-       x*log(m2[i]*entry2->scale_q8) + (1 - x)*log(m1[i]*entry1->scale_q8))));
-    }
-  }
-}
-
-void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe) {
-  od_pvq_codeword_ctx *ctx;
-  int i;
-  int pli;
-  int bs;
-  ctx = &state->pvq_codeword_ctx;
-  OD_CDFS_INIT_DYNAMIC(state->pvq_param_model[0].cdf);
-  OD_CDFS_INIT_DYNAMIC(state->pvq_param_model[1].cdf);
-  OD_CDFS_INIT_DYNAMIC(state->pvq_param_model[2].cdf);
-  for (i = 0; i < 2*OD_TXSIZES; i++) {
-    ctx->pvq_adapt[4*i + OD_ADAPT_K_Q8] = 384;
-    ctx->pvq_adapt[4*i + OD_ADAPT_SUM_EX_Q8] = 256;
-    ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_Q8] = 104;
-    ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_EX_Q8] = 128;
-  }
-  OD_CDFS_INIT_DYNAMIC(ctx->pvq_k1_cdf);
-  for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
-    for (bs = 0; bs < OD_TXSIZES; bs++)
-    for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
-      state->pvq_exg[pli][bs][i] = 2 << 16;
-    }
-  }
-  for (i = 0; i < OD_TXSIZES*PVQ_MAX_PARTITIONS; i++) {
-    state->pvq_ext[i] = is_keyframe ? 24576 : 2 << 16;
-  }
-  OD_CDFS_INIT_DYNAMIC(state->pvq_gaintheta_cdf);
-  OD_CDFS_INIT_Q15(state->pvq_skip_dir_cdf);
-  OD_CDFS_INIT_DYNAMIC(ctx->pvq_split_cdf);
-}
-
-/* QMs are arranged from smallest to largest blocksizes, first for
-   blocks with decimation=0, followed by blocks with decimation=1.*/
-int od_qm_offset(int bs, int xydec)
-{
-    return xydec*OD_QM_STRIDE + OD_QM_OFFSET(bs);
-}
-
-#if defined(OD_FLOAT_PVQ)
-#define OD_DEFAULT_MAG 1.0
-#else
-#define OD_DEFAULT_MAG OD_QM_SCALE
-#endif
-
-/* Initialize the quantization matrix. */
-// Note: When hybrid transform and corresponding scan order is used by PVQ,
-// we don't need seperate qm and qm_inv for each transform type,
-// because AOM does not do magnitude compensation (i.e. simplay x16 for all coeffs).
-void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
-  int i;
-  int j;
-  int16_t y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
-  int16_t y_inv[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
-  int16_t *x1;
-  int16_t *x1_inv;
-  int off;
-  int bs;
-  int xydec;
-  for (bs = 0; bs < OD_TXSIZES; bs++) {
-    for (xydec = 0; xydec < 2; xydec++) {
-      off = od_qm_offset(bs, xydec);
-      x1 = x + off;
-      x1_inv = x_inv + off;
-      for (i = 0; i < 4 << bs; i++) {
-        for (j = 0; j < 4 << bs; j++) {
-          /*This will ultimately be clamped to fit in 16 bits.*/
-          od_val32 mag;
-          int16_t ytmp;
-          mag = OD_DEFAULT_MAG;
-          if (i != 0 || j != 0) {
-#if defined(OD_FLOAT_PVQ)
-            mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
-#else
-            int qmv;
-            qmv = qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
-            mag *= 16;
-            mag = (mag + (qmv >> 1))/qmv;
-#endif
-            OD_ASSERT(mag > 0.0);
-          }
-          /*Convert to fit in 16 bits.*/
-#if defined(OD_FLOAT_PVQ)
-          y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX,
-           (int32_t)floor(.5 + mag*OD_QM_SCALE));
-          y_inv[i*(4 << bs) + j] = (int16_t)floor(.5
-           + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]);
-#else
-          y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX, mag);
-          ytmp = y[i*(4 << bs) + j];
-          y_inv[i*(4 << bs) + j] = (int16_t)((OD_QM_SCALE*OD_QM_INV_SCALE
-           + (ytmp >> 1))/ytmp);
-#endif
-        }
-      }
-      od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs);
-      od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs);
-    }
-  }
-}
-
-/* Maps each possible size (n) in the split k-tokenizer to a different value.
-   Possible values of n are:
-   2, 3, 4, 7, 8, 14, 15, 16, 31, 32, 63, 64, 127, 128
-   Since we don't care about the order (even in the bit-stream) the simplest
-   ordering (implemented here) is:
-   14, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 */
-int od_pvq_size_ctx(int n) {
-  int logn;
-  int odd;
-  logn = OD_ILOG(n - 1);
-  odd = n & 1;
-  return 2*logn - 1 - odd - 7*(n == 14);
-}
-
-/* Maps a length n to a context for the (k=1, n<=16) coder, with a special
-   case when n is the original length (orig_length=1) of the vector (i.e. we
-   haven't split it yet). For orig_length=0, we use the same mapping as
-   od_pvq_size_ctx() up to n=16. When orig_length=1, we map lengths
-   7, 8, 14, 15 to contexts 8 to 11. */
-int od_pvq_k1_ctx(int n, int orig_length) {
-  if (orig_length) return 8 + 2*(n > 8) + (n & 1);
-  else return od_pvq_size_ctx(n);
-}
-
-/* Indexing for the packed quantization matrices. */
-int od_qm_get_index(int bs, int band) {
-  /* The -band/3 term is due to the fact that we force corresponding horizontal
-     and vertical bands to have the same quantization. */
-  OD_ASSERT(bs >= 0 && bs < OD_TXSIZES);
-  return bs*(bs + 1) + band - band/3;
-}
-
-#if !defined(OD_FLOAT_PVQ)
-/*See celt/mathops.c in Opus and tools/cos_search.c.*/
-static int16_t od_pvq_cos_pi_2(int16_t x)
-{
-  int16_t x2;
-  x2 = OD_MULT16_16_Q15(x, x);
-  return OD_MINI(32767, (1073758164 - x*x + x2*(-7654 + OD_MULT16_16_Q16(x2,
-   16573 + OD_MULT16_16_Q16(-2529, x2)))) >> 15);
-}
-#endif
-
-/*Approximates cos(x) for -pi < x < pi.
-  Input is in OD_THETA_SCALE.*/
-od_val16 od_pvq_cos(od_val32 x) {
-#if defined(OD_FLOAT_PVQ)
-  return cos(x);
-#else
-  /*Wrap x around by masking, since cos is periodic.*/
-  x = x & 0x0001ffff;
-  if (x > (1 << 16)) {
-    x = (1 << 17) - x;
-  }
-  if (x & 0x00007fff) {
-    if (x < (1 << 15)) {
-       return od_pvq_cos_pi_2((int16_t)x);
-    }
-    else {
-      return -od_pvq_cos_pi_2((int16_t)(65536 - x));
-    }
-  }
-  else {
-    if (x & 0x0000ffff) {
-      return 0;
-    }
-    else if (x & 0x0001ffff) {
-      return -32767;
-    }
-    else {
-      return 32767;
-    }
-  }
-#endif
-}
-
-/*Approximates sin(x) for 0 <= x < pi.
-  Input is in OD_THETA_SCALE.*/
-od_val16 od_pvq_sin(od_val32 x) {
-#if defined(OD_FLOAT_PVQ)
-  return sin(x);
-#else
-  return od_pvq_cos(32768 - x);
-#endif
-}
-
-#if !defined(OD_FLOAT_PVQ)
-/* Computes an upper-bound on the number of bits required to store the L2 norm
-   of a vector (excluding sign). */
-int od_vector_log_mag(const od_coeff *x, int n) {
-  int i;
-  int32_t sum;
-  sum = 0;
-  for (i = 0; i < n; i++) {
-    int16_t tmp;
-    tmp = x[i] >> 8;
-    sum += tmp*(int32_t)tmp;
-  }
-  /* We add one full bit (instead of rounding OD_ILOG() up) for safety because
-     the >> 8 above causes the sum to be slightly underestimated. */
-  return 8 + 1 + OD_ILOG(n + sum)/2;
-}
-#endif
-
-/** Computes Householder reflection that aligns the reference r to the
- *  dimension in r with the greatest absolute value. The reflection
- *  vector is returned in r.
- *
- * @param [in,out]  r      reference vector to be reflected, reflection
- *                         also returned in r
- * @param [in]      n      number of dimensions in r
- * @param [in]      gr     gain of reference vector
- * @param [out]     sign   sign of reflection
- * @return                 dimension number to which reflection aligns
- **/
-int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
- int shift) {
-  int m;
-  int i;
-  int s;
-  od_val16 maxr;
-  OD_UNUSED(shift);
-  /* Pick component with largest magnitude. Not strictly
-   * necessary, but it helps numerical stability */
-  m = 0;
-  maxr = 0;
-  for (i = 0; i < n; i++) {
-    if (OD_ABS(r[i]) > maxr) {
-      maxr = OD_ABS(r[i]);
-      m = i;
-    }
-  }
-  s = r[m] > 0 ? 1 : -1;
-  /* This turns r into a Householder reflection vector that would reflect
-   * the original r[] to e_m */
-  r[m] += OD_SHR_ROUND(gr*s, shift);
-  *sign = s;
-  return m;
-}
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_RCP_INSHIFT 15
-#define OD_RCP_OUTSHIFT 14
-static od_val16 od_rcp(od_val16 x)
-{
-  int i;
-  od_val16 n;
-  od_val16 r;
-  i = OD_ILOG(x) - 1;
-  /*n is Q15 with range [0,1).*/
-  n = OD_VSHR_ROUND(x, i - OD_RCP_INSHIFT) - (1 << OD_RCP_INSHIFT);
-  /*Start with a linear approximation:
-    r = 1.8823529411764706-0.9411764705882353*n.
-    The coefficients and the result are Q14 in the range [15420,30840].*/
-  r = 30840 + OD_MULT16_16_Q15(-15420, n);
-  /*Perform two Newton iterations:
-    r -= r*((r*n)-1.Q15)
-       = r*((r*n)+(r-1.Q15)).*/
-  r = r - OD_MULT16_16_Q15(r, (OD_MULT16_16_Q15(r, n) + r - 32768));
-  /*We subtract an extra 1 in the second iteration to avoid overflow; it also
-     neatly compensates for truncation error in the rest of the process.*/
-  r = r - (1 + OD_MULT16_16_Q15(r, OD_MULT16_16_Q15(r, n) + r - 32768));
-  /*r is now the Q15 solution to 2/(n+1), with a maximum relative error
-     of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
-     error of 1.24665/32768.*/
-  return OD_VSHR_ROUND(r, i - OD_RCP_OUTSHIFT);
-}
-#endif
-
-/** Applies Householder reflection from compute_householder(). The
- * reflection is its own inverse.
- *
- * @param [out]     out    reflected vector
- * @param [in]      x      vector to be reflected
- * @param [in]      r      reflection
- * @param [in]      n      number of dimensions in x,r
- */
-void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
- int n) {
-  int i;
-  od_val32 proj;
-  od_val16 proj_1;
-  od_val32 l2r;
-#if !defined(OD_FLOAT_PVQ)
-  od_val16 proj_norm;
-  od_val16 l2r_norm;
-  od_val16 rcp;
-  int proj_shift;
-  int l2r_shift;
-  int outshift;
-#endif
-  /*FIXME: Can we get l2r and/or l2r_shift from an earlier computation?*/
-  l2r = 0;
-  for (i = 0; i < n; i++) {
-    l2r += OD_MULT16_16(r[i], r[i]);
-  }
-  /* Apply Householder reflection */
-  proj = 0;
-  for (i = 0; i < n; i++) {
-    proj += OD_MULT16_16(r[i], x[i]);
-  }
-#if defined(OD_FLOAT_PVQ)
-  proj_1 = proj*2./(1e-100 + l2r);
-  for (i = 0; i < n; i++) {
-    out[i] = x[i] - r[i]*proj_1;
-  }
-#else
-  /*l2r_norm is [0.5, 1.0[ in Q15.*/
-  l2r_shift = (OD_ILOG(l2r) - 1) - 14;
-  l2r_norm = OD_VSHR_ROUND(l2r, l2r_shift);
-  rcp = od_rcp(l2r_norm);
-  proj_shift = (OD_ILOG(abs(proj)) - 1) - 14;
-  /*proj_norm is [0.5, 1.0[ in Q15.*/
-  proj_norm = OD_VSHR_ROUND(proj, proj_shift);
-  proj_1 = OD_MULT16_16_Q15(proj_norm, rcp);
-  /*The proj*2. in the float code becomes -1 in the final outshift.
-    The sign of l2r_shift is positive since we're taking the reciprocal of
-     l2r_norm and this is a right shift.*/
-  outshift = OD_MINI(30, OD_RCP_OUTSHIFT - proj_shift - 1 + l2r_shift);
-  if (outshift >= 0) {
-    for (i = 0; i < n; i++) {
-      int32_t tmp;
-      tmp = OD_MULT16_16(r[i], proj_1);
-      tmp = OD_SHR_ROUND(tmp, outshift);
-      out[i] = x[i] - tmp;
-    }
-  }
-  else {
-    /*FIXME: Can we make this case impossible?
-      Right now, if r[] is all zeros except for 1, 2, or 3 ones, and
-       if x[] is all zeros except for large values at the same position as the
-       ones in r[], then we can end up with a shift of -1.*/
-    for (i = 0; i < n; i++) {
-      int32_t tmp;
-      tmp = OD_MULT16_16(r[i], proj_1);
-      tmp = OD_SHL(tmp, -outshift);
-      out[i] = x[i] - tmp;
-    }
-  }
-#endif
-}
-
-#if !defined(OD_FLOAT_PVQ)
-static od_val16 od_beta_rcp(od_val16 beta){
-  if (beta == OD_BETA(1.))
-    return OD_BETA(1.);
-  else if (beta == OD_BETA(1.5))
-    return OD_BETA(1./1.5);
-  else {
-    od_val16 rcp_beta;
-    /*Shift by 1 less, transposing beta to range [.5, .75] and thus < 32768.*/
-    rcp_beta = od_rcp(beta << (OD_RCP_INSHIFT - 1 - OD_BETA_SHIFT));
-    return OD_SHR_ROUND(rcp_beta, OD_RCP_OUTSHIFT + 1 - OD_BETA_SHIFT);
-  }
-}
-
-#define OD_EXP2_INSHIFT 15
-#define OD_EXP2_FRACSHIFT 15
-#define OD_EXP2_OUTSHIFT 15
-static const int32_t OD_EXP2_C[5] = {32768, 22709, 7913, 1704, 443};
-/*Output is [1.0, 2.0) in Q(OD_EXP2_FRACSHIFT).
-  It does not include the integer offset, which is added in od_exp2 after the
-   final shift).*/
-static int32_t od_exp2_frac(int32_t x)
-{
-  return OD_MULT16_16_Q15(x, (OD_EXP2_C[1] + OD_MULT16_16_Q15(x,
-   (OD_EXP2_C[2] + OD_MULT16_16_Q15(x, (OD_EXP2_C[3]
-   + OD_MULT16_16_Q15(x, OD_EXP2_C[4])))))));
-}
-
-/** Base-2 exponential approximation (2^x) with Q15 input and output.*/
-static int32_t od_exp2(int32_t x)
-{
-  int integer;
-  int32_t frac;
-  integer = x >> OD_EXP2_INSHIFT;
-  if (integer > 14)
-    return 0x7f000000;
-  else if (integer < -15)
-    return 0;
-  frac = od_exp2_frac(x - OD_SHL(integer, OD_EXP2_INSHIFT));
-  return OD_VSHR_ROUND(OD_EXP2_C[0] + frac, -integer) + 1;
-}
-
-#define OD_LOG2_INSHIFT 15
-#define OD_LOG2_OUTSHIFT 15
-#define OD_LOG2_INSCALE_1 (1./(1 << OD_LOG2_INSHIFT))
-#define OD_LOG2_OUTSCALE (1 << OD_LOG2_OUTSHIFT)
-static int16_t od_log2(int16_t x)
-{
-  return x + OD_MULT16_16_Q15(x, (14482 + OD_MULT16_16_Q15(x, (-23234
-   + OD_MULT16_16_Q15(x, (13643 + OD_MULT16_16_Q15(x, (-6403
-   + OD_MULT16_16_Q15(x, 1515)))))))));
-}
-
-static int32_t od_pow(int32_t x, od_val16 beta)
-{
-  int16_t t;
-  int xshift;
-  int log2_x;
-  od_val32 logr;
-  /*FIXME: this conditional is to avoid doing log2(0).*/
-  if (x == 0)
-    return 0;
-  log2_x = (OD_ILOG(x) - 1);
-  xshift = log2_x - OD_LOG2_INSHIFT;
-  /*t should be in range [0.0, 1.0[ in Q(OD_LOG2_INSHIFT).*/
-  t = OD_VSHR(x, xshift) - (1 << OD_LOG2_INSHIFT);
-  /*log2(g/OD_COMPAND_SCALE) = log2(x) - OD_COMPAND_SHIFT in
-     Q(OD_LOG2_OUTSHIFT).*/
-  logr = od_log2(t) + (log2_x - OD_COMPAND_SHIFT)*OD_LOG2_OUTSCALE;
-  logr = (od_val32)OD_MULT16_32_QBETA(beta, logr);
-  return od_exp2(logr);
-}
-#endif
-
-/** Gain companding: raises gain to the power 1/beta for activity masking.
- *
- * @param [in]  g     real (uncompanded) gain
- * @param [in]  q0    uncompanded quality parameter
- * @param [in]  beta  activity masking beta param (exponent)
- * @return            g^(1/beta)
- */
-static od_val32 od_gain_compand(od_val32 g, int q0, od_val16 beta) {
-#if defined(OD_FLOAT_PVQ)
-  if (beta == 1) return OD_CGAIN_SCALE*g/(double)q0;
-  else {
-    return OD_CGAIN_SCALE*OD_COMPAND_SCALE*pow(g*OD_COMPAND_SCALE_1,
-     1./beta)/(double)q0;
-  }
-#else
-  if (beta == OD_BETA(1)) return (OD_CGAIN_SCALE*g + (q0 >> 1))/q0;
-  else {
-    int32_t expr;
-    expr = od_pow(g, od_beta_rcp(beta));
-    expr <<= OD_CGAIN_SHIFT + OD_COMPAND_SHIFT - OD_EXP2_OUTSHIFT;
-    return (expr + (q0 >> 1))/q0;
-  }
-#endif
-}
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_SQRT_INSHIFT 16
-#define OD_SQRT_OUTSHIFT 15
-static int16_t od_rsqrt_norm(int16_t x);
-
-static int16_t od_sqrt_norm(int32_t x)
-{
-  OD_ASSERT(x < 65536);
-  return OD_MINI(OD_SHR_ROUND(x*od_rsqrt_norm(x), OD_SQRT_OUTSHIFT), 32767);
-}
-
-static int16_t od_sqrt(int32_t x, int *sqrt_shift)
-{
-  int k;
-  int s;
-  int32_t t;
-  if (x == 0) {
-    *sqrt_shift = 0;
-     return 0;
-  }
-  OD_ASSERT(x < (1 << 30));
-  k = ((OD_ILOG(x) - 1) >> 1);
-  /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
-    Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
-  s = 2*k - (OD_SQRT_INSHIFT - 2);
-  t = OD_VSHR(x, s);
-  /*We want to express od_sqrt() in terms of od_sqrt_norm(), which is
-     defined as (2^OUTSHIFT)*sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
-    This simplifies to 2^(OUTSHIFT-(INSHIFT/2)-(s/2))*sqrt(x), so the caller
-     needs to shift right by OUTSHIFT - INSHIFT/2 - s/2.*/
-  *sqrt_shift = OD_SQRT_OUTSHIFT - ((s + OD_SQRT_INSHIFT) >> 1);
-  return od_sqrt_norm(t);
-}
-#endif
-
-/** Gain expanding: raises gain to the power beta for activity masking.
- *
- * @param [in]  cg    companded gain
- * @param [in]  q0    uncompanded quality parameter
- * @param [in]  beta  activity masking beta param (exponent)
- * @return            g^beta
- */
-od_val32 od_gain_expand(od_val32 cg0, int q0, od_val16 beta) {
-  if (beta == OD_BETA(1)) {
-    /*The multiply fits into 28 bits because the expanded gain has a range from
-       0 to 2^20.*/
-    return OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
-  }
-  else if (beta == OD_BETA(1.5)) {
-#if defined(OD_FLOAT_PVQ)
-    double cg;
-    cg = cg0*OD_CGAIN_SCALE_1;
-    cg *= q0*OD_COMPAND_SCALE_1;
-    return OD_COMPAND_SCALE*cg*sqrt(cg);
-#else
-    int32_t irt;
-    int64_t tmp;
-    int sqrt_inshift;
-    int sqrt_outshift;
-    /*cg0 is in Q(OD_CGAIN_SHIFT) and we need to divide it by
-       2^OD_COMPAND_SHIFT.*/
-    irt = od_sqrt(cg0*q0, &sqrt_outshift);
-    sqrt_inshift = (OD_CGAIN_SHIFT + OD_COMPAND_SHIFT) >> 1;
-    /*tmp is in Q(OD_CGAIN_SHIFT + OD_COMPAND_SHIFT).*/
-    tmp = cg0*q0*(int64_t)irt;
-    /*Expanded gain must be in Q(OD_COMPAND_SHIFT), thus OD_COMPAND_SHIFT is
-       not included here.*/
-    return OD_MAXI(1,
-        OD_VSHR_ROUND(tmp, OD_CGAIN_SHIFT + sqrt_outshift + sqrt_inshift));
-#endif
-  }
-  else {
-#if defined(OD_FLOAT_PVQ)
-    /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the multiply by
-       OD_COMPAND_SCALE.*/
-    double cg;
-    cg = cg0*OD_CGAIN_SCALE_1;
-    return OD_COMPAND_SCALE*pow(cg*q0*OD_COMPAND_SCALE_1, beta);
-#else
-    int32_t expr;
-    int32_t cg;
-    cg = OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
-    expr = od_pow(cg, beta);
-    /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the subtraction by
-       OD_COMPAND_SHIFT.*/
-    return OD_MAXI(1, OD_SHR_ROUND(expr, OD_EXP2_OUTSHIFT - OD_COMPAND_SHIFT));
-#endif
-  }
-}
-
-/** Computes the raw and quantized/companded gain of a given input
- * vector
- *
- * @param [in]      x      vector of input data
- * @param [in]      n      number of elements in vector x
- * @param [in]      q0     quantizer
- * @param [out]     g      raw gain
- * @param [in]      beta   activity masking beta param
- * @param [in]      bshift shift to be applied to raw gain
- * @return                 quantized/companded gain
- */
-od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
- od_val16 beta, int bshift) {
-  int i;
-  od_val32 acc;
-#if !defined(OD_FLOAT_PVQ)
-  od_val32 irt;
-  int sqrt_shift;
-#else
-  OD_UNUSED(bshift);
-#endif
-  acc = 0;
-  for (i = 0; i < n; i++) {
-    acc += x[i]*(od_val32)x[i];
-  }
-#if defined(OD_FLOAT_PVQ)
-  *g = sqrt(acc);
-#else
-  irt = od_sqrt(acc, &sqrt_shift);
-  *g = OD_VSHR_ROUND(irt, sqrt_shift - bshift);
-#endif
-  /* Normalize gain by quantization step size and apply companding
-     (if ACTIVITY != 1). */
-  return od_gain_compand(*g, q0, beta);
-}
-
-/** Compute theta quantization range from quantized/companded gain
- *
- * @param [in]      qcg    quantized companded gain value
- * @param [in]      beta   activity masking beta param
- * @return                 max theta value
- */
-int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta){
-  /* Set angular resolution (in ra) to match the encoded gain */
-#if defined(OD_FLOAT_PVQ)
-  int ts = (int)floor(.5 + qcg*OD_CGAIN_SCALE_1*M_PI/(2*beta));
-#else
-  int ts = OD_SHR_ROUND(qcg*OD_MULT16_16_QBETA(OD_QCONST32(M_PI/2,
-   OD_CGAIN_SHIFT), od_beta_rcp(beta)), OD_CGAIN_SHIFT*2);
-#endif
-  /* Special case for low gains -- will need to be tuned anyway */
-  if (qcg < OD_QCONST32(1.4, OD_CGAIN_SHIFT)) ts = 1;
-  return ts;
-}
-
-/** Decode quantized theta value from coded value
- *
- * @param [in]      t          quantized companded gain value
- * @param [in]      max_theta  maximum theta value
- * @return                     decoded theta value
- */
-od_val32 od_pvq_compute_theta(int t, int max_theta) {
-  if (max_theta != 0) {
-#if defined(OD_FLOAT_PVQ)
-    return OD_MINI(t, max_theta - 1)*.5*M_PI/max_theta;
-#else
-    return (OD_MAX_THETA_SCALE*OD_MINI(t, max_theta - 1)
-     + (max_theta >> 1))/max_theta;
-#endif
-  }
-  else return 0;
-}
-
-#define OD_SQRT_TBL_SHIFT (10)
-
-#define OD_ITHETA_SHIFT 15
-/** Compute the number of pulses used for PVQ encoding a vector from
- * available metrics (encode and decode side)
- *
- * @param [in]      qcg        quantized companded gain value
- * @param [in]      itheta     quantized PVQ error angle theta
- * @param [in]      noref      indicates present or lack of reference
- *                             (prediction)
- * @param [in]      n          number of elements to be coded
- * @param [in]      beta       activity masking beta param
- * @return                     number of pulses to use for coding
- */
-int od_pvq_compute_k(od_val32 qcg, int itheta, int noref, int n,
-    od_val16 beta) {
-#if !defined(OD_FLOAT_PVQ)
-  /*Lookup table for sqrt(n+3/2) and sqrt(n+2/2) in Q10.
-    Real max values are 32792 and 32784, but clamped to stay within 16 bits.
-    Update with tools/gen_sqrt_tbl if needed.*/
-  static const od_val16 od_sqrt_table[2][13] = {
-   {0, 0, 0, 0, 2290, 2985, 4222, 0, 8256, 0, 16416, 0, 32767},
-   {0, 0, 0, 0, 2401, 3072, 4284, 0, 8287, 0, 16432, 0, 32767}};
-#endif
-  if (noref) {
-    if (qcg == 0) return 0;
-    if (n == 15 && qcg == OD_CGAIN_SCALE && beta > OD_BETA(1.25)) {
-      return 1;
-    }
-    else {
-#if defined(OD_FLOAT_PVQ)
-      return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1 - .2)*
-       sqrt((n + 3)/2)/beta));
-#else
-      od_val16 rt;
-      OD_ASSERT(OD_ILOG(n + 1) < 13);
-      rt = od_sqrt_table[1][OD_ILOG(n + 1)];
-      /*FIXME: get rid of 64-bit mul.*/
-      return OD_MAXI(1, OD_SHR_ROUND((int64_t)((qcg
-       - (int64_t)OD_QCONST32(.2, OD_CGAIN_SHIFT))*
-       OD_MULT16_16_QBETA(od_beta_rcp(beta), rt)), OD_CGAIN_SHIFT
-       + OD_SQRT_TBL_SHIFT));
-#endif
-    }
-  }
-  else {
-    if (itheta == 0) return 0;
-    /* Sets K according to gain and theta, based on the high-rate
-       PVQ distortion curves (see PVQ document). Low-rate will have to be
-       perceptually tuned anyway. We subtract 0.2 from the radius as an
-       approximation for the fact that the coefficients aren't identically
-       distributed within a band so at low gain the number of dimensions that
-       are likely to have a pulse is less than n. */
-#if defined(OD_FLOAT_PVQ)
-    return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2)));
-#else
-    od_val16 rt;
-    OD_ASSERT(OD_ILOG(n + 1) < 13);
-    rt = od_sqrt_table[0][OD_ILOG(n + 1)];
-    /*FIXME: get rid of 64-bit mul.*/
-    return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT)
-     - OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt,
-     OD_SQRT_TBL_SHIFT + OD_ITHETA_SHIFT));
-#endif
-  }
-}
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_RSQRT_INSHIFT 16
-#define OD_RSQRT_OUTSHIFT 14
-/** Reciprocal sqrt approximation where the input is in the range [0.25,1) in
-     Q16 and the output is in the range (1.0, 2.0] in Q14).
-    Error is always within +/1 of round(1/sqrt(t))*/
-static int16_t od_rsqrt_norm(int16_t t)
-{
-  int16_t n;
-  int32_t r;
-  int32_t r2;
-  int32_t ry;
-  int32_t y;
-  int32_t ret;
-  /* Range of n is [-16384,32767] ([-0.5,1) in Q15).*/
-  n = t - 32768;
-  OD_ASSERT(n >= -16384);
-  /*Get a rough initial guess for the root.
-    The optimal minimax quadratic approximation (using relative error) is
-     r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
-    Coefficients here, and the final result r, are Q14.*/
-  r = (23565 + OD_MULT16_16_Q15(n, (-13481 + OD_MULT16_16_Q15(n, 6711))));
-  /*We want y = t*r*r-1 in Q15, but t is 32-bit Q16 and r is Q14.
-    We can compute the result from n and r using Q15 multiplies with some
-     adjustment, carefully done to avoid overflow.*/
-  r2 = r*r;
-  y = (((r2 >> 15)*n + r2) >> 12) - 131077;
-  ry = r*y;
-  /*Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
-    This yields the Q14 reciprocal square root of the Q16 t, with a maximum
-     relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a peak
-     absolute error of 2.26591/16384.*/
-  ret = r + ((((ry >> 16)*(3*y) >> 3) - ry) >> 18);
-  OD_ASSERT(ret >= 16384 && ret < 32768);
-  return (int16_t)ret;
-}
-
-static int16_t od_rsqrt(int32_t x, int *rsqrt_shift)
-{
-   int k;
-   int s;
-   int16_t t;
-   k = (OD_ILOG(x) - 1) >> 1;
-  /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
-    Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
-   s = 2*k - (OD_RSQRT_INSHIFT - 2);
-   t = OD_VSHR(x, s);
-   /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is
-      defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
-     This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller
-      needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/
-   *rsqrt_shift = OD_RSQRT_OUTSHIFT + ((s + OD_RSQRT_INSHIFT) >> 1);
-   return od_rsqrt_norm(t);
-}
-#endif
-
-/** Synthesizes one parition of coefficient values from a PVQ-encoded
- * vector.  This 'partial' version is called by the encode loop where
- * the Householder reflection has already been computed and there's no
- * need to recompute it.
- *
- * @param [out]     xcoeff  output coefficient partition (x in math doc)
- * @param [in]      ypulse  PVQ-encoded values (y in the math doc); in
- *                          the noref case, this vector has n entries,
- *                          in the reference case it contains n-1 entries
- *                          (the m-th entry is not included)
- * @param [in]      r       reference vector (prediction)
- * @param [in]      n       number of elements in this partition
- * @param [in]      noref   indicates presence or lack of prediction
- * @param [in]      g       decoded quantized vector gain
- * @param [in]      theta   decoded theta (prediction error)
- * @param [in]      m       alignment dimension of Householder reflection
- * @param [in]      s       sign of Householder reflection
- * @param [in]      qm_inv  inverse of the QM with magnitude compensation
- */
-void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
- const od_val16 *r16, int n, int noref, od_val32 g, od_val32 theta, int m, int s,
- const int16_t *qm_inv) {
-  int i;
-  int yy;
-  od_val32 scale;
-  int nn;
-#if !defined(OD_FLOAT_PVQ)
-  int gshift;
-  int qshift;
-#endif
-  OD_ASSERT(g != 0);
-  nn = n-(!noref); /* when noref==0, vector in is sized n-1 */
-  yy = 0;
-  for (i = 0; i < nn; i++)
-    yy += ypulse[i]*(int32_t)ypulse[i];
-#if !defined(OD_FLOAT_PVQ)
-  /* Shift required for the magnitude of the pre-qm synthesis to be guaranteed
-     to fit in 16 bits. In practice, the range will be 8192-16384 after scaling
-     most of the time. */
-  gshift = OD_MAXI(0, OD_ILOG(g) - 14);
-#endif
-  /*scale is g/sqrt(yy) in Q(16-gshift) so that x[]*scale has a norm that fits
-     in 16 bits.*/
-  if (yy == 0) scale = 0;
-#if defined(OD_FLOAT_PVQ)
-  else {
-    scale = g/sqrt(yy);
-  }
-#else
-  else {
-    int rsqrt_shift;
-    int16_t rsqrt;
-    /*FIXME: should be < int64_t*/
-    int64_t tmp;
-    rsqrt = od_rsqrt(yy, &rsqrt_shift);
-    tmp = rsqrt*(int64_t)g;
-    scale = OD_VSHR_ROUND(tmp, rsqrt_shift + gshift - 16);
-  }
-  /* Shift to apply after multiplying by the inverse QM, taking into account
-     gshift. */
-  qshift = OD_QM_INV_SHIFT - gshift;
-#endif
-  if (noref) {
-    for (i = 0; i < n; i++) {
-      od_val32 x;
-      /* This multiply doesn't round, so it introduces some bias.
-         It would be nice (but not critical) to fix this. */
-      x = (od_val32)OD_MULT16_32_Q16(ypulse[i], scale);
-#if defined(OD_FLOAT_PVQ)
-      xcoeff[i] = (od_coeff)floor(.5
-       + x*(qm_inv[i]*OD_QM_INV_SCALE_1));
-#else
-      xcoeff[i] = OD_SHR_ROUND(x*qm_inv[i], qshift);
-#endif
-    }
-  }
-  else{
-    od_val16 x[MAXN];
-    scale = OD_ROUND32(scale*OD_TRIG_SCALE_1*od_pvq_sin(theta));
-    /* The following multiply doesn't round, but it's probably OK since
-       the Householder reflection is likely to undo most of the resulting
-       bias. */
-    for (i = 0; i < m; i++)
-      x[i] = OD_MULT16_32_Q16(ypulse[i], scale);
-    x[m] = OD_ROUND16(-s*(OD_SHR_ROUND(g, gshift))*OD_TRIG_SCALE_1*
-     od_pvq_cos(theta));
-    for (i = m; i < nn; i++)
-      x[i+1] = OD_MULT16_32_Q16(ypulse[i], scale);
-    od_apply_householder(x, x, r16, n);
-    for (i = 0; i < n; i++) {
-#if defined(OD_FLOAT_PVQ)
-      xcoeff[i] = (od_coeff)floor(.5 + (x[i]*(qm_inv[i]*OD_QM_INV_SCALE_1)));
-#else
-      xcoeff[i] = OD_SHR_ROUND(x[i]*qm_inv[i], qshift);
-#endif
-    }
-  }
-}