diff options
Diffstat (limited to 'media/libaom/src/av1/encoder/encodetxb.c')
-rw-r--r-- | media/libaom/src/av1/encoder/encodetxb.c | 1041 |
1 files changed, 620 insertions, 421 deletions
diff --git a/media/libaom/src/av1/encoder/encodetxb.c b/media/libaom/src/av1/encoder/encodetxb.c index 5a31d93d73..825d52a7ae 100644 --- a/media/libaom/src/av1/encoder/encodetxb.c +++ b/media/libaom/src/av1/encoder/encodetxb.c @@ -23,6 +23,7 @@ #include "av1/encoder/rdopt.h" #include "av1/encoder/tokenize.h" +#if CONFIG_HTB_TRELLIS static int hbt_needs_init = 1; static CRC32C crc_calculator; static const int HBT_EOB = 16; // also the length in opt_qcoeff @@ -41,6 +42,7 @@ typedef struct OptTxbQcoeff { } OptTxbQcoeff; OptTxbQcoeff *hbt_hash_table; +#endif // CONFIG_HTB_TRELLIS typedef struct LevelDownStats { int update; @@ -60,10 +62,19 @@ typedef struct LevelDownStats { int new_eob; } LevelDownStats; +static INLINE int get_dqv(const int16_t *dequant, int coeff_idx, + const qm_val_t *iqmatrix) { + int dqv = dequant[!!coeff_idx]; + if (iqmatrix != NULL) + dqv = + ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + return dqv; +} + void av1_alloc_txb_buf(AV1_COMP *cpi) { AV1_COMMON *cm = &cpi->common; - int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) * - ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1); + int size = ((cm->mi_params.mi_rows >> cm->seq_params.mib_size_log2) + 1) * + ((cm->mi_params.mi_cols >> cm->seq_params.mib_size_log2) + 1); av1_free_txb_buf(cpi); // TODO(jingning): This should be further reduced. @@ -73,26 +84,6 @@ void av1_alloc_txb_buf(AV1_COMP *cpi) { void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); } -void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x, - int mi_row, int mi_col) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - int mib_size_log2 = cm->seq_params.mib_size_log2; - int stride = (cm->mi_cols >> mib_size_log2) + 1; - int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2); - CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset]; - const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); - assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size])); - for (int plane = 0; plane < num_planes; ++plane) { - x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset; - x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset; - x->mbmi_ext->txb_skip_ctx[plane] = - coeff_buf->txb_skip_ctx[plane] + txb_offset; - x->mbmi_ext->dc_sign_ctx[plane] = - coeff_buf->dc_sign_ctx[plane] + txb_offset; - } -} - static void write_golomb(aom_writer *w, int level) { int x = level + 1; int i = x; @@ -160,7 +151,7 @@ static INLINE int get_eob_pos_token(const int eob, int *const extra) { t = eob_to_pos_large[e]; } - *extra = eob - k_eob_group_start[t]; + *extra = eob - av1_eob_group_start[t]; return t; } @@ -243,9 +234,9 @@ void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class, break; } - if (k_eob_offset_bits[eob_pt] > 0) { + if (av1_eob_offset_bits[eob_pt] > 0) { int eob_ctx = eob_pt - 3; - int eob_shift = k_eob_offset_bits[eob_pt] - 1; + int eob_shift = av1_eob_offset_bits[eob_pt] - 1; int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; #if CONFIG_ENTROPY_STATS counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++; @@ -263,12 +254,12 @@ static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs, const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1]; - if (k_eob_offset_bits[eob_pt] > 0) { + if (av1_eob_offset_bits[eob_pt] > 0) { const int eob_ctx = eob_pt - 3; - const int eob_shift = k_eob_offset_bits[eob_pt] - 1; + const int eob_shift = av1_eob_offset_bits[eob_pt] - 1; const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit]; - const int offset_bits = k_eob_offset_bits[eob_pt]; + const int offset_bits = av1_eob_offset_bits[eob_pt]; if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1); } return eob_cost; @@ -284,20 +275,16 @@ static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx, return av1_cost_literal(1); } -static INLINE int get_br_cost(tran_low_t abs_qc, int ctx, - const int *coeff_lps) { - const tran_low_t min_level = 1 + NUM_BASE_LEVELS; - const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE; - (void)ctx; - if (abs_qc >= min_level) { - if (abs_qc >= max_level) { - return coeff_lps[COEFF_BASE_RANGE]; // COEFF_BASE_RANGE * cost0; - } else { - return coeff_lps[(abs_qc - min_level)]; // * cost0 + cost1; - } - } - return 0; -} +static const int golomb_bits_cost[32] = { + 0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5, + 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, + 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, + 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9 +}; +static const int golomb_cost_diff[32] = { + 0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0, + 512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; static INLINE int get_golomb_cost(int abs_qc) { if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { @@ -308,6 +295,32 @@ static INLINE int get_golomb_cost(int abs_qc) { return 0; } +static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps, + int *diff) { + const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); + int golomb_bits = 0; + if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) + *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1]; + + if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) { + int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS; + if (r < 32) { + golomb_bits = golomb_bits_cost[r]; + *diff += golomb_cost_diff[r]; + } else { + golomb_bits = get_golomb_cost(level); + *diff += (r & (r - 1)) == 0 ? 1024 : 0; + } + } + + return coeff_lps[base_range] + golomb_bits; +} + +static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) { + const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); + return coeff_lps[base_range] + get_golomb_cost(level); +} + static int get_coeff_cost(const tran_low_t qc, const int scan_idx, const int is_eob, const TxbInfo *const txb_info, const LV_MAP_COEFF_COST *const txb_costs, @@ -331,8 +344,7 @@ static int get_coeff_cost(const tran_low_t qc, const int scan_idx, if (abs_qc > NUM_BASE_LEVELS) { const int ctx = get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class); - cost += get_br_cost(abs_qc, ctx, txb_costs->lps_cost[ctx]); - cost += get_golomb_cost(abs_qc); + cost += get_br_cost(abs_qc, txb_costs->lps_cost[ctx]); } } return cost; @@ -464,8 +476,6 @@ void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width, const int stride = width + TX_PAD_HOR; uint8_t *ls = levels; - memset(levels - TX_PAD_TOP * stride, 0, - sizeof(*levels) * TX_PAD_TOP * stride); memset(levels + stride * height, 0, sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END)); @@ -492,36 +502,35 @@ void av1_get_nz_map_contexts_c(const uint8_t *const levels, } } -void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, +void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCK *const x, aom_writer *w, int blk_row, int blk_col, int plane, - TX_SIZE tx_size, const tran_low_t *tcoeff, - uint16_t eob, TXB_CTX *txb_ctx) { + int block, TX_SIZE tx_size) { + MACROBLOCKD *xd = &x->e_mbd; + const CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff; + const int txb_offset = + x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); + const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset; + const uint16_t eob = eob_txb[block]; + const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; + const int txb_skip_ctx = entropy_ctx[block] & TXB_SKIP_CTX_MASK; const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - aom_write_symbol(w, eob == 0, - ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2); + aom_write_symbol(w, eob == 0, ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 2); if (eob == 0) return; - const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const TX_CLASS tx_class = tx_type_to_class[tx_type]; - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int16_t *const scan = scan_order->scan; - int c; - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); - av1_txb_init_levels(tcoeff, width, height, levels); - av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w); + const PLANE_TYPE plane_type = get_plane_type(plane); + const TX_TYPE tx_type = + av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, + cm->features.reduced_tx_set_used); + // Only y plane's tx_type is transmitted + if (plane == 0) { + av1_write_tx_type(cm, xd, tx_type, tx_size, w); + } int eob_extra; const int eob_pt = get_eob_pos_token(eob, &eob_extra); const int eob_multi_size = txsize_log2_minus4[tx_size]; + const TX_CLASS tx_class = tx_type_to_class[tx_type]; const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; switch (eob_multi_size) { case 0: @@ -554,22 +563,35 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, break; } - if (k_eob_offset_bits[eob_pt] > 0) { + const int eob_offset_bits = av1_eob_offset_bits[eob_pt]; + if (eob_offset_bits > 0) { const int eob_ctx = eob_pt - 3; - int eob_shift = k_eob_offset_bits[eob_pt] - 1; + int eob_shift = eob_offset_bits - 1; int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; aom_write_symbol(w, bit, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2); - for (int i = 1; i < k_eob_offset_bits[eob_pt]; i++) { - eob_shift = k_eob_offset_bits[eob_pt] - 1 - i; + for (int i = 1; i < eob_offset_bits; i++) { + eob_shift = eob_offset_bits - 1 - i; bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; aom_write_bit(w, bit); } } + const int width = get_txb_wide(tx_size); + const int height = get_txb_high(tx_size); + uint8_t levels_buf[TX_PAD_2D]; + uint8_t *const levels = set_levels(levels_buf, width); + const tran_low_t *tcoeff_txb = + cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset; + const tran_low_t *tcoeff = tcoeff_txb + BLOCK_OFFSET(block); + av1_txb_init_levels(tcoeff, width, height, levels); + const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); + const int16_t *const scan = scan_order->scan; + DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); - for (c = eob - 1; c >= 0; --c) { + const int bwl = get_txb_bwl(tx_size); + for (int c = eob - 1; c >= 0; --c) { const int pos = scan[c]; const int coeff_ctx = coeff_contexts[pos]; const tran_low_t v = tcoeff[pos]; @@ -588,12 +610,11 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, // level is above 1. const int base_range = level - 1 - NUM_BASE_LEVELS; const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); + aom_cdf_prob *cdf = + ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); - aom_write_symbol( - w, k, - ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], - BR_CDF_SIZE); + aom_write_symbol(w, k, cdf, BR_CDF_SIZE); if (k < BR_CDF_SIZE - 1) break; } } @@ -601,14 +622,16 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, // Loop to code all signs in the transform block, // starting with the sign of DC (if applicable) - for (c = 0; c < eob; ++c) { + for (int c = 0; c < eob; ++c) { const tran_low_t v = tcoeff[scan[c]]; const tran_low_t level = abs(v); const int sign = (v < 0) ? 1 : 0; if (level) { if (c == 0) { - aom_write_symbol( - w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2); + const int dc_sign_ctx = + (entropy_ctx[block] >> DC_SIGN_CTX_SHIFT) & DC_SIGN_CTX_MASK; + aom_write_symbol(w, sign, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], + 2); } else { aom_write_bit(w, sign); } @@ -624,20 +647,8 @@ typedef struct encode_txb_args { aom_writer *w; } ENCODE_TXB_ARGS; -static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x, - aom_writer *w, int plane, int block, - int blk_row, int blk_col, TX_SIZE tx_size) { - MACROBLOCKD *xd = &x->e_mbd; - tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); - uint16_t eob = x->mbmi_ext->eobs[plane][block]; - TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block], - x->mbmi_ext->dc_sign_ctx[plane][block] }; - av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob, - &txb_ctx); -} - -void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, - int mi_col, aom_writer *w, BLOCK_SIZE bsize) { +void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, + aom_writer *w, BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; const int num_planes = av1_num_planes(cm); int block[MAX_MB_PLANE] = { 0 }; @@ -647,23 +658,20 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, const int max_blocks_wide = max_block_wide(xd, bsize, 0); const int max_blocks_high = max_block_high(xd, bsize, 0); const BLOCK_SIZE max_unit_bsize = BLOCK_64X64; - int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; + int mu_blocks_wide = mi_size_wide[max_unit_bsize]; + int mu_blocks_high = mi_size_high[max_unit_bsize]; mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide); mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high); for (row = 0; row < max_blocks_high; row += mu_blocks_high) { for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) { for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; + if (plane && !xd->is_chroma_ref) break; const TX_SIZE tx_size = av1_get_tx_size(plane, xd); const int stepr = tx_size_high_unit[tx_size]; const int stepc = tx_size_wide_unit[tx_size]; const int step = stepr * stepc; - + const struct macroblockd_plane *const pd = &xd->plane[plane]; const int unit_height = ROUND_POWER_OF_TWO( AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y); const int unit_width = ROUND_POWER_OF_TWO( @@ -672,8 +680,8 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, blk_row += stepr) { for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width; blk_col += stepc) { - write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row, - blk_col, tx_size); + av1_write_coeffs_txb(cm, x, w, blk_row, blk_col, plane, + block[plane], tx_size); block[plane] += step; } } @@ -683,19 +691,19 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, } // TODO(angiebird): use this function whenever it's possible -static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x, - const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, - TX_TYPE tx_type) { +static int get_tx_type_cost(const MACROBLOCK *x, const MACROBLOCKD *xd, + int plane, TX_SIZE tx_size, TX_TYPE tx_type, + int reduced_tx_set_used) { if (plane > 0) return 0; const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; const MB_MODE_INFO *mbmi = xd->mi[0]; const int is_inter = is_inter_block(mbmi); - if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && + if (get_ext_tx_types(tx_size, is_inter, reduced_tx_set_used) > 1 && !xd->lossless[xd->mi[0]->segment_id]) { const int ext_tx_set = - get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); + get_ext_tx_set(tx_size, is_inter, reduced_tx_set_used); if (is_inter) { if (ext_tx_set > 0) return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type]; @@ -715,14 +723,44 @@ static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x, return 0; } +static INLINE void update_coeff_eob_fast(int *eob, int shift, + const int16_t *dequant_ptr, + const int16_t *scan, + const tran_low_t *coeff_ptr, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr) { + // TODO(sarahparker) make this work for aomqm + int eob_out = *eob; + int zbin[2] = { dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7), + dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7) }; + + for (int i = *eob - 1; i >= 0; i--) { + const int rc = scan[i]; + const int qcoeff = qcoeff_ptr[rc]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = AOMSIGN(coeff); + int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + + if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) { + eob_out--; + qcoeff_ptr[rc] = 0; + dqcoeff_ptr[rc] = 0; + } else { + break; + } + } + + *eob = eob_out; +} + static AOM_FORCE_INLINE int warehouse_efficients_txb( - const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane, - const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, + const MACROBLOCK *x, const int plane, const int block, + const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, const struct macroblock_plane *p, const int eob, const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs, - const MACROBLOCKD *const xd, const TX_TYPE tx_type, - const TX_CLASS tx_class) { - const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_CLASS tx_class, + int reduced_tx_set_used) { + const tran_low_t *const qcoeff = p->qcoeff + BLOCK_OFFSET(block); const int txb_skip_ctx = txb_ctx->txb_skip_ctx; const int bwl = get_txb_bwl(tx_size); const int width = get_txb_wide(tx_size); @@ -739,18 +777,19 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb( av1_txb_init_levels(qcoeff, width, height, levels); - cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); + cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used); cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); - const int(*lps_cost)[COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost; + const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] = + coeff_costs->lps_cost; int c = eob - 1; { const int pos = scan[c]; const tran_low_t v = qcoeff[pos]; - const int sign = v >> 31; + const int sign = AOMSIGN(v); const int level = (v ^ sign) - sign; const int coeff_ctx = coeff_contexts[pos]; cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; @@ -758,11 +797,8 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb( if (v) { // sign bit cost if (level > NUM_BASE_LEVELS) { - const int ctx = get_br_ctx(levels, pos, bwl, tx_class); - const int base_range = - AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); - cost += lps_cost[ctx][base_range]; - cost += get_golomb_cost(level); + const int ctx = get_br_ctx_eob(pos, bwl, tx_class); + cost += get_br_cost(level, lps_cost[ctx]); } if (c) { cost += av1_cost_literal(1); @@ -774,31 +810,28 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb( } } } - const int(*base_cost)[4] = coeff_costs->base_cost; + const int(*base_cost)[8] = coeff_costs->base_cost; for (c = eob - 2; c >= 1; --c) { const int pos = scan[c]; const int coeff_ctx = coeff_contexts[pos]; const tran_low_t v = qcoeff[pos]; const int level = abs(v); - const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)]; + cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; if (v) { // sign bit cost cost += av1_cost_literal(1); if (level > NUM_BASE_LEVELS) { const int ctx = get_br_ctx(levels, pos, bwl, tx_class); - const int base_range = - AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); - cost += lps_cost[ctx][base_range]; - cost += get_golomb_cost(level); + cost += get_br_cost(level, lps_cost[ctx]); } } - cost += cost0; } - if (c == 0) { + // c == 0 after previous loop + { const int pos = scan[c]; const tran_low_t v = qcoeff[pos]; const int coeff_ctx = coeff_contexts[pos]; - const int sign = v >> 31; + const int sign = AOMSIGN(v); const int level = (v ^ sign) - sign; cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; @@ -809,19 +842,81 @@ static AOM_FORCE_INLINE int warehouse_efficients_txb( cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; if (level > NUM_BASE_LEVELS) { const int ctx = get_br_ctx(levels, pos, bwl, tx_class); - const int base_range = - AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); - cost += lps_cost[ctx][base_range]; - cost += get_golomb_cost(level); + cost += get_br_cost(level, lps_cost[ctx]); } } } return cost; } -int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x, - const int plane, const int block, const TX_SIZE tx_size, - const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) { +static AOM_FORCE_INLINE int warehouse_efficients_txb_laplacian( + const MACROBLOCK *x, const int plane, const int block, + const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, const int eob, + const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs, + const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_CLASS tx_class, + int reduced_tx_set_used) { + const int txb_skip_ctx = txb_ctx->txb_skip_ctx; + + const int eob_multi_size = txsize_log2_minus4[tx_size]; + const LV_MAP_EOB_COST *const eob_costs = + &x->eob_costs[eob_multi_size][plane_type]; + int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; + + cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used); + + cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); + + cost += av1_cost_coeffs_txb_estimate(x, plane, block, tx_size, tx_type); + return cost; +} + +// Look up table of individual cost of coefficient by its quantization level. +// determined based on Laplacian distribution conditioned on estimated context +static const int costLUT[15] = { -1143, 53, 545, 825, 1031, + 1209, 1393, 1577, 1763, 1947, + 2132, 2317, 2501, 2686, 2871 }; +static const int const_term = (1 << AV1_PROB_COST_SHIFT); +static const int loge_par = ((14427 << AV1_PROB_COST_SHIFT) + 5000) / 10000; +int av1_cost_coeffs_txb_estimate(const MACROBLOCK *x, const int plane, + const int block, const TX_SIZE tx_size, + const TX_TYPE tx_type) { + assert(plane == 0); + + int cost = 0; + const struct macroblock_plane *p = &x->plane[plane]; + const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); + const int16_t *scan = scan_order->scan; + tran_low_t *qcoeff = p->qcoeff + BLOCK_OFFSET(block); + + int eob = p->eobs[block]; + + // coeffs + int c = eob - 1; + // eob + { + const int pos = scan[c]; + const tran_low_t v = abs(qcoeff[pos]) - 1; + cost += (v << (AV1_PROB_COST_SHIFT + 2)); + } + // other coeffs + for (c = eob - 2; c >= 0; c--) { + const int pos = scan[c]; + const tran_low_t v = abs(qcoeff[pos]); + const int idx = AOMMIN(v, 14); + + cost += costLUT[idx]; + } + + // const_term does not contain DC, and log(e) does not contain eob, so both + // (eob-1) + cost += (const_term + loge_par) * (eob - 1); + + return cost; +} + +int av1_cost_coeffs_txb(const MACROBLOCK *x, const int plane, const int block, + const TX_SIZE tx_size, const TX_TYPE tx_type, + const TXB_CTX *const txb_ctx, int reduced_tx_set_used) { const struct macroblock_plane *p = &x->plane[plane]; const int eob = p->eobs[block]; const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); @@ -835,18 +930,47 @@ int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x, const MACROBLOCKD *const xd = &x->e_mbd; const TX_CLASS tx_class = tx_type_to_class[tx_type]; -#define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal) \ - case tx_class_literal: \ - return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p, \ - eob, plane_type, coeff_costs, xd, tx_type, \ - tx_class_literal); - switch (tx_class) { - WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D); - WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ); - WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT); -#undef WAREHOUSE_EFFICIENTS_TXB_CASE - default: assert(false); return 0; + return warehouse_efficients_txb(x, plane, block, tx_size, txb_ctx, p, eob, + plane_type, coeff_costs, xd, tx_type, + tx_class, reduced_tx_set_used); +} + +int av1_cost_coeffs_txb_laplacian(const MACROBLOCK *x, const int plane, + const int block, const TX_SIZE tx_size, + const TX_TYPE tx_type, + const TXB_CTX *const txb_ctx, + const int reduced_tx_set_used, + const int adjust_eob) { + const struct macroblock_plane *p = &x->plane[plane]; + int eob = p->eobs[block]; + + if (adjust_eob) { + const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); + const int16_t *scan = scan_order->scan; + tran_low_t *tcoeff = p->coeff + BLOCK_OFFSET(block); + tran_low_t *qcoeff = p->qcoeff + BLOCK_OFFSET(block); + const MACROBLOCKD *xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + tran_low_t *dqcoeff = pd->dqcoeff + BLOCK_OFFSET(block); + update_coeff_eob_fast(&eob, av1_get_tx_scale(tx_size), p->dequant_QTX, scan, + tcoeff, qcoeff, dqcoeff); + p->eobs[block] = eob; + } + + const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); + const PLANE_TYPE plane_type = get_plane_type(plane); + const LV_MAP_COEFF_COST *const coeff_costs = + &x->coeff_costs[txs_ctx][plane_type]; + if (eob == 0) { + return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; } + + const MACROBLOCKD *const xd = &x->e_mbd; + const TX_CLASS tx_class = tx_type_to_class[tx_type]; + + return warehouse_efficients_txb_laplacian( + x, plane, block, tx_size, txb_ctx, eob, plane_type, coeff_costs, xd, + tx_type, tx_class, reduced_tx_set_used); } static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, @@ -1014,13 +1138,8 @@ static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, return update; } -// These numbers are empirically obtained. -static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { - { 17, 13 }, - { 16, 10 }, -}; - -void hbt_init() { +#if CONFIG_HTB_TRELLIS +static void hbt_init() { hbt_hash_table = aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH); memset(hbt_hash_table, 0, @@ -1032,11 +1151,11 @@ void hbt_init() { void hbt_destroy() { aom_free(hbt_hash_table); } -int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, - TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, - const struct macroblock_plane *p, int block, int fast_mode, - int *rate_cost) { +static int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, + TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, + const LV_MAP_EOB_COST *txb_eob_costs, + const struct macroblock_plane *p, int block, + int fast_mode, int *rate_cost) { (void)fast_mode; const int16_t *scan = txb_info->scan_order->scan; int prev_eob = txb_info->eob; @@ -1098,9 +1217,9 @@ int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, return txb_info->eob; } -int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index, - TxbInfo *txb_info, const struct macroblock_plane *p, int block, - int *rate_cost) { +static int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index, + TxbInfo *txb_info, const struct macroblock_plane *p, + int block, int *rate_cost) { const int16_t *scan = txb_info->scan_order->scan; int new_eob = 0; int update = 0; @@ -1140,11 +1259,12 @@ int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index, return txb_info->eob; } -int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, - TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, - const struct macroblock_plane *p, int block, int fast_mode, - int *rate_cost) { +static int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, + TxbInfo *txb_info, + const LV_MAP_COEFF_COST *txb_costs, + const LV_MAP_EOB_COST *txb_eob_costs, + const struct macroblock_plane *p, int block, + int fast_mode, int *rate_cost) { // Check for qcoeff match int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH; int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE; @@ -1163,10 +1283,11 @@ int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, } } -int hbt_create_hashes(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, - const struct macroblock_plane *p, int block, - int fast_mode, int *rate_cost) { +static int hbt_create_hashes(TxbInfo *txb_info, + const LV_MAP_COEFF_COST *txb_costs, + const LV_MAP_EOB_COST *txb_eob_costs, + const struct macroblock_plane *p, int block, + int fast_mode, int *rate_cost) { // Initialize hash table if needed. if (hbt_needs_init) { hbt_init(); @@ -1287,21 +1408,49 @@ int hbt_create_hashes(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs, txb_eob_costs, p, block, fast_mode, rate_cost); } +#endif // CONFIG_HTB_TRELLIS -static AOM_FORCE_INLINE int get_coeff_cost_simple( +static AOM_FORCE_INLINE int get_two_coeff_cost_simple( int ci, tran_low_t abs_qc, int coeff_ctx, const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class, - const uint8_t *levels) { + const uint8_t *levels, int *cost_low) { // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) // and not the last (scan_idx != eob - 1) assert(ci > 0); int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; + int diff = 0; + if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4]; if (abs_qc) { cost += av1_cost_literal(1); if (abs_qc > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class); - cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]); - cost += get_golomb_cost(abs_qc); + int brcost_diff = 0; + cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx], + &brcost_diff); + diff += brcost_diff; + } + } + *cost_low = cost - diff; + + return cost; +} + +static INLINE int get_coeff_cost_eob(int ci, tran_low_t abs_qc, int sign, + int coeff_ctx, int dc_sign_ctx, + const LV_MAP_COEFF_COST *txb_costs, + int bwl, TX_CLASS tx_class) { + int cost = 0; + cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; + if (abs_qc != 0) { + if (ci == 0) { + cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign]; + } else { + cost += av1_cost_literal(1); + } + if (abs_qc > NUM_BASE_LEVELS) { + int br_ctx; + br_ctx = get_br_ctx_eob(ci, bwl, tx_class); + cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); } } return cost; @@ -1326,9 +1475,12 @@ static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc, cost += av1_cost_literal(1); } if (abs_qc > NUM_BASE_LEVELS) { - const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class); - cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]); - cost += get_golomb_cost(abs_qc); + int br_ctx; + if (is_last) + br_ctx = get_br_ctx_eob(ci, bwl, tx_class); + else + br_ctx = get_br_ctx(levels, ci, bwl, tx_class); + cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); } } return cost; @@ -1350,8 +1502,9 @@ static INLINE void update_coeff_general( TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift, int dc_sign_ctx, const int16_t *dequant, const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, - tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) { - const int dqv = dequant[si != 0]; + tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, + const qm_val_t *iqmatrix) { + const int dqv = get_dqv(dequant, scan[si], iqmatrix); const int ci = scan[si]; const tran_low_t qc = qcoeff[ci]; const int is_last = si == (eob - 1); @@ -1372,13 +1525,23 @@ static INLINE void update_coeff_general( const int64_t rd = RDCOST(rdmult, rate, dist); tran_low_t qc_low, dqc_low; - get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); - const tran_low_t abs_qc_low = abs_qc - 1; - const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift); - const int rate_low = - get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx, - dc_sign_ctx, txb_costs, bwl, tx_class, levels); - const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); + tran_low_t abs_qc_low; + int64_t dist_low, rd_low; + int rate_low; + if (abs_qc == 1) { + abs_qc_low = qc_low = dqc_low = 0; + dist_low = dist0; + rate_low = txb_costs->base_cost[coeff_ctx][0]; + } else { + get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); + abs_qc_low = abs_qc - 1; + dist_low = get_coeff_dist(tqc, dqc_low, shift); + rate_low = + get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx, + dc_sign_ctx, txb_costs, bwl, tx_class, levels); + } + + rd_low = RDCOST(rdmult, rate_low, dist_low); if (rd_low < rd) { qcoeff[ci] = qc_low; dqcoeff[ci] = dqc_low; @@ -1397,8 +1560,8 @@ static AOM_FORCE_INLINE void update_coeff_simple( int bwl, int64_t rdmult, int shift, const int16_t *dequant, const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, - uint8_t *levels) { - const int dqv = dequant[1]; + uint8_t *levels, const qm_val_t *iqmatrix) { + const int dqv = get_dqv(dequant, scan[si], iqmatrix); (void)eob; // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) // and not the last (scan_idx != eob - 1) @@ -1412,28 +1575,28 @@ static AOM_FORCE_INLINE void update_coeff_simple( *accu_rate += txb_costs->base_cost[coeff_ctx][0]; } else { const tran_low_t abs_qc = abs(qc); - const tran_low_t tqc = tcoeff[ci]; - const tran_low_t dqc = dqcoeff[ci]; - const int rate = get_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs, - bwl, tx_class, levels); - if (abs(dqc) < abs(tqc)) { + const tran_low_t abs_tqc = abs(tcoeff[ci]); + const tran_low_t abs_dqc = abs(dqcoeff[ci]); + int rate_low = 0; + const int rate = get_two_coeff_cost_simple( + ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low); + if (abs_dqc < abs_tqc) { *accu_rate += rate; return; } - const int64_t dist = get_coeff_dist(tqc, dqc, shift); + + const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift); const int64_t rd = RDCOST(rdmult, rate, dist); - const int sign = (qc < 0) ? 1 : 0; - tran_low_t qc_low, dqc_low; - get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); const tran_low_t abs_qc_low = abs_qc - 1; - const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift); - const int rate_low = get_coeff_cost_simple( - ci, abs_qc_low, coeff_ctx, txb_costs, bwl, tx_class, levels); + const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift; + const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift); const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); + if (rd_low < rd) { - qcoeff[ci] = qc_low; - dqcoeff[ci] = dqc_low; + const int sign = (qc < 0) ? 1 : 0; + qcoeff[ci] = (-sign ^ abs_qc_low) + sign; + dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign; levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); *accu_rate += rate_low; } else { @@ -1448,8 +1611,9 @@ static AOM_FORCE_INLINE void update_coeff_eob( int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant, const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs, const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, - tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) { - const int dqv = dequant[si != 0]; + tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness, + const qm_val_t *iqmatrix) { + const int dqv = get_dqv(dequant, scan[si], iqmatrix); assert(si != *eob - 1); const int ci = scan[si]; const tran_low_t qc = qcoeff[ci]; @@ -1471,40 +1635,42 @@ static AOM_FORCE_INLINE void update_coeff_eob( int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist); tran_low_t qc_low, dqc_low; - get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); - const tran_low_t abs_qc_low = abs_qc - 1; - const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0; - const int rate_low = - get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, - txb_costs, bwl, tx_class, levels); - const int64_t rd_low = - RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); + tran_low_t abs_qc_low; + int64_t dist_low, rd_low; + int rate_low; + if (abs_qc == 1) { + abs_qc_low = 0; + dqc_low = qc_low = 0; + dist_low = 0; + rate_low = txb_costs->base_cost[coeff_ctx][0]; + rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist); + } else { + get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); + abs_qc_low = abs_qc - 1; + dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0; + rate_low = + get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, + dc_sign_ctx, txb_costs, bwl, tx_class, levels); + rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); + } int lower_level_new_eob = 0; const int new_eob = si + 1; - uint8_t tmp_levels[3]; - for (int ni = 0; ni < *nz_num; ++ni) { - const int last_ci = nz_ci[ni]; - tmp_levels[ni] = levels[get_padded_idx(last_ci, bwl)]; - levels[get_padded_idx(last_ci, bwl)] = 0; - } - - const int coeff_ctx_new_eob = get_lower_levels_ctx_general( - 1, si, bwl, height, levels, ci, tx_size, tx_class); + const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si); const int new_eob_cost = get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class); int rate_coeff_eob = - new_eob_cost + get_coeff_cost_general(1, ci, abs_qc, sign, - coeff_ctx_new_eob, dc_sign_ctx, - txb_costs, bwl, tx_class, levels); + new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, + dc_sign_ctx, txb_costs, bwl, + tx_class); int64_t dist_new_eob = dist; int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob); if (abs_qc_low > 0) { const int rate_coeff_eob_low = - new_eob_cost + - get_coeff_cost_general(1, ci, abs_qc_low, sign, coeff_ctx_new_eob, - dc_sign_ctx, txb_costs, bwl, tx_class, levels); + new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign, + coeff_ctx_new_eob, dc_sign_ctx, + txb_costs, bwl, tx_class); const int64_t dist_new_eob_low = dist_low; const int64_t rd_new_eob_low = RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low); @@ -1526,7 +1692,7 @@ static AOM_FORCE_INLINE void update_coeff_eob( if (sharpness == 0 && rd_new_eob < rd) { for (int ni = 0; ni < *nz_num; ++ni) { int last_ci = nz_ci[ni]; - // levels[get_padded_idx(last_ci, bwl)] = 0; + levels[get_padded_idx(last_ci, bwl)] = 0; qcoeff[last_ci] = 0; dqcoeff[last_ci] = 0; } @@ -1536,10 +1702,6 @@ static AOM_FORCE_INLINE void update_coeff_eob( *accu_dist = dist_new_eob; lower_level = lower_level_new_eob; } else { - for (int ni = 0; ni < *nz_num; ++ni) { - const int last_ci = nz_ci[ni]; - levels[get_padded_idx(last_ci, bwl)] = tmp_levels[ni]; - } *accu_rate += rate; *accu_dist += dist; } @@ -1579,54 +1741,74 @@ static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob, int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, TX_TYPE tx_type, const TXB_CTX *const txb_ctx, int *rate_cost, - int sharpness) { - const AV1_COMMON *cm = &cpi->common; + int sharpness, int fast_mode) { MACROBLOCKD *xd = &x->e_mbd; + struct macroblockd_plane *pd = &xd->plane[plane]; + const struct macroblock_plane *p = &x->plane[plane]; + const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); + const int16_t *scan = scan_order->scan; + const int shift = av1_get_tx_scale(tx_size); + int eob = p->eobs[block]; + const int16_t *dequant = p->dequant_QTX; + const qm_val_t *iqmatrix = + av1_get_iqmatrix(&cpi->common.quant_params, xd, plane, tx_size, tx_type); + const int block_offset = BLOCK_OFFSET(block); + tran_low_t *qcoeff = p->qcoeff + block_offset; + tran_low_t *dqcoeff = pd->dqcoeff + block_offset; + const tran_low_t *tcoeff = p->coeff + block_offset; + + // This function is not called if eob = 0. + assert(eob > 0); + + if (fast_mode) { + update_coeff_eob_fast(&eob, shift, dequant, scan, tcoeff, qcoeff, dqcoeff); + p->eobs[block] = eob; + if (eob == 0) { + *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size); + return eob; + } + } + + const AV1_COMMON *cm = &cpi->common; const PLANE_TYPE plane_type = get_plane_type(plane); const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); const TX_CLASS tx_class = tx_type_to_class[tx_type]; const MB_MODE_INFO *mbmi = xd->mi[0]; - const struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; - tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); - const int16_t *dequant = p->dequant_QTX; const int bwl = get_txb_bwl(tx_size); const int width = get_txb_wide(tx_size); const int height = get_txb_high(tx_size); assert(width == (1 << bwl)); const int is_inter = is_inter_block(mbmi); - const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); - const int16_t *scan = scan_order->scan; const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type]; const int eob_multi_size = txsize_log2_minus4[tx_size]; const LV_MAP_EOB_COST *txb_eob_costs = &x->eob_costs[eob_multi_size][plane_type]; - const int shift = av1_get_tx_scale(tx_size); - const int64_t rdmult = - ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) + - 2) >> + const int rshift = (sharpness + (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4 ? 7 - mbmi->segment_id : 2) + (cpi->oxcf.aq_mode != VARIANCE_AQ && - cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0 + cpi->oxcf.deltaq_mode == DELTA_Q_PERCEPTUAL && + cm->delta_q_info.delta_q_present_flag && x->sb_energy_level < 0 ? (3 - x->sb_energy_level) : 0)); + const int64_t rdmult = + (((int64_t)x->rdmult * + (plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8)))) + + 2) >> + rshift; uint8_t levels_buf[TX_PAD_2D]; uint8_t *const levels = set_levels(levels_buf, width); - av1_txb_init_levels(qcoeff, width, height, levels); + if (eob > 1) av1_txb_init_levels(qcoeff, width, height, levels); // TODO(angirbird): check iqmatrix const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0]; const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; - int eob = p->eobs[block]; const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class); int accu_rate = eob_cost; int64_t accu_dist = 0; @@ -1642,15 +1824,14 @@ int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class, bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, - levels); + levels, iqmatrix); --si; } else { assert(abs_qc == 1); - const int coeff_ctx = get_lower_levels_ctx_general( - 1, si, bwl, height, levels, ci, tx_size, tx_class); - accu_rate += get_coeff_cost_general(1, ci, abs_qc, sign, coeff_ctx, - txb_ctx->dc_sign_ctx, txb_costs, bwl, - tx_class, levels); + const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si); + accu_rate += + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, txb_ctx->dc_sign_ctx, + txb_costs, bwl, tx_class); const tran_low_t tqc = tcoeff[ci]; const tran_low_t dqc = dqcoeff[ci]; const int64_t dist = get_coeff_dist(tqc, dqc, shift); @@ -1661,12 +1842,12 @@ int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ case tx_class_literal: \ - for (; si >= 0 && nz_num <= max_nz_num; --si) { \ + for (; si >= 0 && nz_num <= max_nz_num && !fast_mode; --si) { \ update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si, \ tx_size, tx_class_literal, bwl, height, \ txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \ txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff, \ - levels, sharpness); \ + levels, sharpness, iqmatrix); \ } \ break; switch (tx_class) { @@ -1687,7 +1868,7 @@ int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, for (; si >= 1; --si) { \ update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \ rdmult, shift, dequant, scan, txb_costs, tcoeff, \ - qcoeff, dqcoeff, levels); \ + qcoeff, dqcoeff, levels, iqmatrix); \ } \ break; switch (tx_class) { @@ -1705,10 +1886,11 @@ int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class, bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, - levels); + levels, iqmatrix); } - const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); + const int tx_type_cost = get_tx_type_cost(x, xd, plane, tx_size, tx_type, + cm->features.reduced_tx_set_used); if (eob == 0) accu_rate += skip_cost; else @@ -1728,18 +1910,20 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, int blk_row, int blk_col, int block, TX_SIZE tx_size, TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) { const AV1_COMMON *cm = &cpi->common; + const int reduced_tx_set_used = cm->features.reduced_tx_set_used; MACROBLOCKD *const xd = &x->e_mbd; const PLANE_TYPE plane_type = get_plane_type(plane); const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); + const TX_TYPE tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, + tx_size, reduced_tx_set_used); const MB_MODE_INFO *mbmi = xd->mi[0]; const struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; const int eob = p->eobs[block]; - tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); + const int block_offset = BLOCK_OFFSET(block); + tran_low_t *qcoeff = p->qcoeff + block_offset; + tran_low_t *dqcoeff = pd->dqcoeff + block_offset; + const tran_low_t *tcoeff = p->coeff + block_offset; const int16_t *dequant = p->dequant_QTX; const int seg_eob = av1_get_max_eob(tx_size); const int bwl = get_txb_bwl(tx_size); @@ -1754,25 +1938,24 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, const int shift = av1_get_tx_scale(tx_size); const int64_t rdmult = - ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) + + (((int64_t)x->rdmult * plane_rd_mult[is_inter][plane_type] + << (2 * (xd->bd - 8))) + 2) >> 2; uint8_t levels_buf[TX_PAD_2D]; uint8_t *const levels = set_levels(levels_buf, width); - const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size); const qm_val_t *iqmatrix = - IS_2D_TRANSFORM(tx_type) - ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size] - : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size]; + av1_get_iqmatrix(&cpi->common.quant_params, xd, plane, tx_size, tx_type); assert(width == (1 << bwl)); - const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); + const int tx_type_cost = + get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used); TxbInfo txb_info = { - qcoeff, levels, dqcoeff, tcoeff, dequant, shift, - tx_size, txs_ctx, tx_type, bwl, width, height, - eob, seg_eob, scan_order, txb_ctx, rdmult, &cm->coeff_ctx_table, - iqmatrix, tx_type_cost, + qcoeff, levels, dqcoeff, tcoeff, dequant, shift, tx_size, + txs_ctx, tx_type, bwl, width, height, eob, seg_eob, + scan_order, txb_ctx, rdmult, iqmatrix, tx_type_cost, }; +#if CONFIG_HTB_TRELLIS // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls // by storing the coefficient deltas in a hash table. // Currently disabled in speedfeatures.c @@ -1780,7 +1963,9 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block, fast_mode, rate_cost); } - +#else + (void)fast_mode; +#endif // CONFIG_HTB_TRELLIS av1_txb_init_levels(qcoeff, width, height, levels); const int update = @@ -1812,34 +1997,14 @@ int av1_get_txb_entropy_context(const tran_low_t *qcoeff, return cul_level; } -void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg) { - struct tokenize_b_args *const args = arg; - const AV1_COMP *cpi = args->cpi; - const AV1_COMMON *cm = &cpi->common; - ThreadData *const td = args->td; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; - const uint16_t eob = p->eobs[block]; - const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - const PLANE_TYPE plane_type = pd->plane_type; - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob); - av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col, - blk_row); -} - -static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, - int blk_row, int blk_col, int plane, - TX_SIZE tx_size, FRAME_COUNTS *counts, +static void update_tx_type_count(const AV1_COMP *cpi, const AV1_COMMON *cm, + MACROBLOCKD *xd, int blk_row, int blk_col, + int plane, TX_SIZE tx_size, + FRAME_COUNTS *counts, uint8_t allow_update_cdf) { MB_MODE_INFO *mbmi = xd->mi[0]; int is_inter = is_inter_block(mbmi); + const int reduced_tx_set_used = cm->features.reduced_tx_set_used; FRAME_CONTEXT *fc = xd->tile_ctx; #if !CONFIG_ENTROPY_STATS (void)counts; @@ -1847,15 +2012,30 @@ static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, // Only y plane's tx_type is updated if (plane > 0) return; - TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size, - cm->reduced_tx_set_used); - if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && - cm->base_qindex > 0 && !mbmi->skip && + const TX_TYPE tx_type = av1_get_tx_type(xd, PLANE_TYPE_Y, blk_row, blk_col, + tx_size, reduced_tx_set_used); + if (is_inter) { + if (cpi->oxcf.use_inter_dct_only) { + assert(tx_type == DCT_DCT); + } + } else { + if (cpi->oxcf.use_intra_dct_only) { + assert(tx_type == DCT_DCT); + } else if (cpi->oxcf.use_intra_default_tx_only) { + const TX_TYPE default_type = get_default_tx_type( + PLANE_TYPE_Y, xd, tx_size, cpi->is_screen_content_type); + (void)default_type; + assert(tx_type == default_type); + } + } + + if (get_ext_tx_types(tx_size, is_inter, reduced_tx_set_used) > 1 && + cm->quant_params.base_qindex > 0 && !mbmi->skip && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); + const int eset = get_ext_tx_set(tx_size, is_inter, reduced_tx_set_used); if (eset > 0) { const TxSetType tx_set_type = - av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used); + av1_get_ext_tx_set_type(tx_size, is_inter, reduced_tx_set_used); if (is_inter) { if (allow_update_cdf) { update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]], @@ -1899,84 +2079,98 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = xd->mi[0]; const int eob = p->eobs[block]; - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col, - pd->left_context + blk_row, &txb_ctx); - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - const uint8_t allow_update_cdf = args->allow_update_cdf; - const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#if CONFIG_ENTROPY_STATS - int cdf_idx = cm->coef_cdf_category; -#endif // CONFIG_ENTROPY_STATS - + const int block_offset = BLOCK_OFFSET(block); + tran_low_t *qcoeff = p->qcoeff + block_offset; + const PLANE_TYPE plane_type = pd->plane_type; + const TX_TYPE tx_type = + av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, + cm->features.reduced_tx_set_used); + const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); + tran_low_t *tcoeff; + assert(args->dry_run != DRY_RUN_COSTCOEFFS); + if (args->dry_run == OUTPUT_ENABLED) { + MB_MODE_INFO *mbmi = xd->mi[0]; + TXB_CTX txb_ctx; + get_txb_ctx(plane_bsize, tx_size, plane, + pd->above_entropy_context + blk_col, + pd->left_entropy_context + blk_row, &txb_ctx); + const int bwl = get_txb_bwl(tx_size); + const int width = get_txb_wide(tx_size); + const int height = get_txb_high(tx_size); + const uint8_t allow_update_cdf = args->allow_update_cdf; + const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size); + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; #if CONFIG_ENTROPY_STATS - ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0]; + int cdf_idx = cm->coef_cdf_category; + ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0]; #endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) { - update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0, - 2); - } - - x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx; - x->mbmi_ext->eobs[plane][block] = eob; + if (allow_update_cdf) { + update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], + eob == 0, 2); + } - if (eob == 0) { - av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row); - return; - } + CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff; + const int txb_offset = + x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); + uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset; + uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; + entropy_ctx[block] = txb_ctx.txb_skip_ctx; + eob_txb[block] = eob; + + if (eob == 0) { + av1_set_entropy_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, + blk_row); + return; + } + const int segment_id = mbmi->segment_id; + const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size); + tran_low_t *tcoeff_txb = + cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset; + tcoeff = tcoeff_txb + block_offset; + memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob); + + uint8_t levels_buf[TX_PAD_2D]; + uint8_t *const levels = set_levels(levels_buf, width); + av1_txb_init_levels(tcoeff, width, height, levels); + update_tx_type_count(cpi, cm, xd, blk_row, blk_col, plane, tx_size, + td->counts, allow_update_cdf); - tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); - const int segment_id = mbmi->segment_id; - const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size); - const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob); + const TX_CLASS tx_class = tx_type_to_class[tx_type]; + const int16_t *const scan = scan_order->scan; - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - av1_txb_init_levels(tcoeff, width, height, levels); - update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts, - allow_update_cdf); + // record tx type usage + td->rd_counts.tx_type_used[tx_size][tx_type]++; - const PLANE_TYPE plane_type = pd->plane_type; - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const TX_CLASS tx_class = tx_type_to_class[tx_type]; - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int16_t *const scan = scan_order->scan; #if CONFIG_ENTROPY_STATS - av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx, - td->counts, allow_update_cdf); + av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx, + td->counts, allow_update_cdf); #else - av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx, - allow_update_cdf); + av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx, + allow_update_cdf); #endif - DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); - av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); + DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); + av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, + coeff_contexts); - for (int c = eob - 1; c >= 0; --c) { - const int pos = scan[c]; - const int coeff_ctx = coeff_contexts[pos]; - const tran_low_t v = qcoeff[pos]; - const tran_low_t level = abs(v); + for (int c = eob - 1; c >= 0; --c) { + const int pos = scan[c]; + const int coeff_ctx = coeff_contexts[pos]; + const tran_low_t v = qcoeff[pos]; + const tran_low_t level = abs(v); - if (allow_update_cdf) { - if (c == eob - 1) { - assert(coeff_ctx < 4); - update_cdf( - ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], - AOMMIN(level, 3) - 1, 3); - } else { - update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx], - AOMMIN(level, 3), 4); + if (allow_update_cdf) { + if (c == eob - 1) { + assert(coeff_ctx < 4); + update_cdf( + ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], + AOMMIN(level, 3) - 1, 3); + } else { + update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx], + AOMMIN(level, 3), 4); + } } - } - { if (c == eob - 1) { assert(coeff_ctx < 4); #if CONFIG_ENTROPY_STATS @@ -1987,76 +2181,81 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, [coeff_ctx][AOMMIN(level, 3)]; #endif } - } - if (level > NUM_BASE_LEVELS) { - const int base_range = level - 1 - NUM_BASE_LEVELS; - const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); - for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); - if (allow_update_cdf) { - update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)] - [plane_type][br_ctx], - k, BR_CDF_SIZE); - } - for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) { + if (level > NUM_BASE_LEVELS) { + const int base_range = level - 1 - NUM_BASE_LEVELS; + const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); + for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { + const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); + if (allow_update_cdf) { + update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)] + [plane_type][br_ctx], + k, BR_CDF_SIZE); + } + for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) { #if CONFIG_ENTROPY_STATS - ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps] - [br_ctx][lps == k]; + ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type] + [lps][br_ctx][lps == k]; #endif // CONFIG_ENTROPY_STATS - if (lps == k) break; - } + if (lps == k) break; + } #if CONFIG_ENTROPY_STATS - ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)] - [plane_type][br_ctx][k]; + ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)] + [plane_type][br_ctx][k]; #endif - if (k < BR_CDF_SIZE - 1) break; + if (k < BR_CDF_SIZE - 1) break; + } } } - } - - // Update the context needed to code the DC sign (if applicable) - if (tcoeff[0] != 0) { - const int dc_sign = (tcoeff[0] < 0) ? 1 : 0; - const int dc_sign_ctx = txb_ctx.dc_sign_ctx; + // Update the context needed to code the DC sign (if applicable) + if (tcoeff[0] != 0) { + const int dc_sign = (tcoeff[0] < 0) ? 1 : 0; + const int dc_sign_ctx = txb_ctx.dc_sign_ctx; #if CONFIG_ENTROPY_STATS - ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign]; + ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign]; #endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) - update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2); - x->mbmi_ext->dc_sign_ctx[plane][block] = dc_sign_ctx; + if (allow_update_cdf) + update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2); + entropy_ctx[block] |= dc_sign_ctx << DC_SIGN_CTX_SHIFT; + } + } else { + tcoeff = qcoeff; } - const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob); - av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col, - blk_row); + av1_set_entropy_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, + blk_col, blk_row); } void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td, - RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate, - int mi_row, int mi_col, uint8_t allow_update_cdf) { + RUN_TYPE dry_run, BLOCK_SIZE bsize, + uint8_t allow_update_cdf) { const AV1_COMMON *const cm = &cpi->common; const int num_planes = av1_num_planes(cm); MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = xd->mi[0]; - struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf }; - (void)rate; - (void)mi_row; - (void)mi_col; + struct tokenize_b_args arg = { cpi, td, 0, allow_update_cdf, dry_run }; if (mbmi->skip) { - av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes); + av1_reset_entropy_context(xd, bsize, num_planes); return; } - if (!dry_run) { - av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, - av1_update_and_record_txb_context, &arg, - num_planes); - } else if (dry_run == DRY_RUN_NORMAL) { - av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, - av1_update_txb_context_b, &arg, num_planes); - } else { - printf("DRY_RUN_COSTCOEFFS is not supported yet\n"); - assert(0); + for (int plane = 0; plane < num_planes; ++plane) { + if (plane && !xd->is_chroma_ref) break; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int ss_x = pd->subsampling_x; + const int ss_y = pd->subsampling_y; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y); + av1_foreach_transformed_block_in_plane( + xd, plane_bsize, plane, av1_update_and_record_txb_context, &arg); } } + +CB_COEFF_BUFFER *av1_get_cb_coeff_buffer(const struct AV1_COMP *cpi, int mi_row, + int mi_col) { + const AV1_COMMON *const cm = &cpi->common; + const int mib_size_log2 = cm->seq_params.mib_size_log2; + const int stride = (cm->mi_params.mi_cols >> mib_size_log2) + 1; + const int offset = + (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2); + return cpi->coeff_buffer_base + offset; +} |