summaryrefslogtreecommitdiff
path: root/media/libaom/src/av1/encoder/block.h
diff options
context:
space:
mode:
Diffstat (limited to 'media/libaom/src/av1/encoder/block.h')
-rw-r--r--media/libaom/src/av1/encoder/block.h323
1 files changed, 223 insertions, 100 deletions
diff --git a/media/libaom/src/av1/encoder/block.h b/media/libaom/src/av1/encoder/block.h
index 0bc5dea82..5a74567a4 100644
--- a/media/libaom/src/av1/encoder/block.h
+++ b/media/libaom/src/av1/encoder/block.h
@@ -15,23 +15,48 @@
#include "av1/common/entropymv.h"
#include "av1/common/entropy.h"
#include "av1/common/mvref_common.h"
-#include "av1/encoder/hash.h"
-#if CONFIG_DIST_8X8
-#include "aom/aomcx.h"
+
+#include "av1/encoder/enc_enums.h"
+#if !CONFIG_REALTIME_ONLY
+#include "av1/encoder/partition_cnn_weights.h"
#endif
+#include "av1/encoder/hash.h"
+
#ifdef __cplusplus
extern "C" {
#endif
+#define MC_FLOW_BSIZE_1D 16
+#define MC_FLOW_NUM_PELS (MC_FLOW_BSIZE_1D * MC_FLOW_BSIZE_1D)
+#define MAX_MC_FLOW_BLK_IN_SB (MAX_SB_SIZE / MC_FLOW_BSIZE_1D)
+#define MAX_WINNER_MODE_COUNT_INTRA 3
+#define MAX_WINNER_MODE_COUNT_INTER 1
+typedef struct {
+ MB_MODE_INFO mbmi;
+ RD_STATS rd_cost;
+ int64_t rd;
+ int rate_y;
+ int rate_uv;
+ uint8_t color_index_map[64 * 64];
+ THR_MODES mode_index;
+} WinnerModeStats;
+
typedef struct {
unsigned int sse;
int sum;
unsigned int var;
} DIFF;
+enum {
+ NO_TRELLIS_OPT, // No trellis optimization
+ FULL_TRELLIS_OPT, // Trellis optimization in all stages
+ FINAL_PASS_TRELLIS_OPT, // Trellis optimization in only the final encode pass
+ NO_ESTIMATE_YRD_TRELLIS_OPT // Disable trellis in estimate_yrd_for_sb
+} UENUM1BYTE(TRELLIS_OPT_TYPE);
+
typedef struct macroblock_plane {
- DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, int16_t, src_diff[MAX_SB_SQUARE]);
tran_low_t *qcoeff;
tran_low_t *coeff;
uint16_t *eobs;
@@ -54,10 +79,10 @@ typedef struct macroblock_plane {
typedef struct {
int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
- int base_cost[SIG_COEF_CONTEXTS][4];
+ int base_cost[SIG_COEF_CONTEXTS][8];
int eob_extra_cost[EOB_COEF_CONTEXTS][2];
int dc_sign_cost[DC_SIGN_CONTEXTS][2];
- int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
+ int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1];
} LV_MAP_COEFF_COST;
typedef struct {
@@ -67,31 +92,32 @@ typedef struct {
typedef struct {
tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- uint8_t txb_skip_ctx[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- int dc_sign_ctx[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+ // Transform block entropy contexts.
+ // Bits 0~3: txb_skip_ctx; bits 4~5: dc_sign_ctx.
+ uint8_t entropy_ctx[MAX_MB_PLANE]
+ [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
} CB_COEFF_BUFFER;
typedef struct {
- int16_t mode_context[MODE_CTX_REF_FRAMES];
// TODO(angiebird): Reduce the buffer size according to sb_type
- tran_low_t *tcoeff[MAX_MB_PLANE];
- uint16_t *eobs[MAX_MB_PLANE];
- uint8_t *txb_skip_ctx[MAX_MB_PLANE];
- int *dc_sign_ctx[MAX_MB_PLANE];
- uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
- CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+ CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][USABLE_REF_MV_STACK_SIZE];
+ uint16_t weight[MODE_CTX_REF_FRAMES][USABLE_REF_MV_STACK_SIZE];
int_mv global_mvs[REF_FRAMES];
- int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
+ int16_t mode_context[MODE_CTX_REF_FRAMES];
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
} MB_MODE_INFO_EXT;
+// Structure to store best mode information at frame level. This
+// frame level information will be used during bitstream preparation stage.
typedef struct {
- int col_min;
- int col_max;
- int row_min;
- int row_max;
-} MvLimits;
+ CANDIDATE_MV ref_mv_stack[USABLE_REF_MV_STACK_SIZE];
+ uint16_t weight[USABLE_REF_MV_STACK_SIZE];
+ // TODO(Ravi/Remya): Reduce the buffer size of global_mvs
+ int_mv global_mvs[REF_FRAMES];
+ int cb_offset;
+ int16_t mode_context;
+ uint8_t ref_mv_count;
+} MB_MODE_INFO_EXT_FRAME;
typedef struct {
uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
@@ -102,7 +128,7 @@ typedef struct {
TX_SIZE tx_size;
TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
+ uint8_t tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
RD_STATS rd_stats;
uint32_t hash_value;
} MB_RD_INFO;
@@ -125,6 +151,7 @@ typedef struct {
uint8_t txb_entropy_ctx;
uint8_t valid;
uint8_t fast; // This is not being used now.
+ uint8_t perform_block_coeff_opt;
} TXB_RD_INFO;
#define TX_SIZE_RD_RECORD_BUFFER_LEN 256
@@ -140,39 +167,57 @@ typedef struct tx_size_rd_info_node {
struct tx_size_rd_info_node *children[4];
} TXB_RD_INFO_NODE;
-// Region size for mode decision sampling in the first pass of partition
-// search(two_pass_partition_search speed feature), in units of mi size(4).
-// Used by the mode_pruning_based_on_two_pass_partition_search speed feature.
-#define FIRST_PARTITION_PASS_SAMPLE_REGION 8
-#define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3
-#define FIRST_PARTITION_PASS_STATS_TABLES \
- (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \
- (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
-#define FIRST_PARTITION_PASS_STATS_STRIDE \
- (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
-
-static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) {
- const int row =
- (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
- const int col =
- (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
- return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col;
-}
-
+// Simple translation rd state for prune_comp_search_by_single_result
typedef struct {
- uint8_t ref0_counts[REF_FRAMES]; // Counters for ref_frame[0].
- uint8_t ref1_counts[REF_FRAMES]; // Counters for ref_frame[1].
- int sample_counts; // Number of samples collected.
-} FIRST_PARTITION_PASS_STATS;
+ RD_STATS rd_stats;
+ RD_STATS rd_stats_y;
+ RD_STATS rd_stats_uv;
+ uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ uint8_t tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ uint8_t skip;
+ uint8_t disable_skip;
+ uint8_t early_skipped;
+} SimpleRDState;
+
+// 4: NEAREST, NEW, NEAR, GLOBAL
+#define SINGLE_REF_MODES ((REF_FRAMES - 1) * 4)
-#define MAX_INTERP_FILTER_STATS 64
+#define MAX_COMP_RD_STATS 64
typedef struct {
- InterpFilters filters;
+ int32_t rate[COMPOUND_TYPES];
+ int64_t dist[COMPOUND_TYPES];
+ int32_t model_rate[COMPOUND_TYPES];
+ int64_t model_dist[COMPOUND_TYPES];
+ int comp_rs2[COMPOUND_TYPES];
int_mv mv[2];
- int8_t ref_frames[2];
- COMPOUND_TYPE comp_type;
-} INTERPOLATION_FILTER_STATS;
-
+ MV_REFERENCE_FRAME ref_frames[2];
+ PREDICTION_MODE mode;
+ int_interpfilters filter;
+ int ref_mv_idx;
+ int is_global[2];
+ INTERINTER_COMPOUND_DATA interinter_comp;
+} COMP_RD_STATS;
+
+// Struct for buffers used by av1_compound_type_rd() function.
+// For sizes and alignment of these arrays, refer to
+// alloc_compound_type_rd_buffers() function.
+typedef struct {
+ uint8_t *pred0;
+ uint8_t *pred1;
+ int16_t *residual1; // src - pred1
+ int16_t *diff10; // pred1 - pred0
+ uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask
+} CompoundTypeRdBuffers;
+
+enum {
+ MV_COST_ENTROPY, // Use the entropy rate of the mv as the cost
+ MV_COST_L1_LOWRES, // Use the l1 norm of the mv as the cost (<480p)
+ MV_COST_L1_MIDRES, // Use the l1 norm of the mv as the cost (>=480p)
+ MV_COST_L1_HDRES, // Use the l1 norm of the mv as the cost (>=720p)
+ MV_COST_NONE // Use 0 as as cost irrespective of the current mv
+} UENUM1BYTE(MV_COST_TYPE);
+
+struct inter_modes_info;
typedef struct macroblock MACROBLOCK;
struct macroblock {
struct macroblock_plane plane[MAX_MB_PLANE];
@@ -182,20 +227,8 @@ struct macroblock {
// to select transform kernel.
int rd_model;
- // Indicate if the encoder is running in the first pass partition search.
- // In that case, apply certain speed features therein to reduce the overhead
- // cost in the first pass search.
- int cb_partition_scan;
-
- FIRST_PARTITION_PASS_STATS
- first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES];
-
- // [comp_idx][saved stat_idx]
- INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS];
- int interp_filter_stats_idx[2];
-
- // Activate constrained coding block partition search range.
- int use_cb_search_range;
+ // prune_comp_search_by_single_result (3:MAX_REF_MV_SEARCH)
+ SimpleRDState simple_rd_state[SINGLE_REF_MODES][3];
// Inter macroblock RD search info.
MB_RD_RECORD mb_rd_record;
@@ -211,6 +244,11 @@ struct macroblock {
MACROBLOCKD e_mbd;
MB_MODE_INFO_EXT *mbmi_ext;
+ MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame;
+ // Array of mode stats for winner mode processing
+ WinnerModeStats winner_mode_stats[AOMMAX(MAX_WINNER_MODE_COUNT_INTRA,
+ MAX_WINNER_MODE_COUNT_INTER)];
+ int winner_mode_count;
int skip_block;
int qindex;
@@ -219,17 +257,15 @@ struct macroblock {
int errorperbit;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for large blocks.
- int sadperbit16;
- // The equivalend SAD error of one (whole) bit at the current quantizer
- // for sub-8x8 blocks.
- int sadperbit4;
+ int sadperbit;
int rdmult;
int mb_energy;
int sb_energy_level;
- int *m_search_count_ptr;
- int *ex_search_count_ptr;
unsigned int txb_split_count;
+#if CONFIG_SPEED_STATS
+ unsigned int tx_search_count;
+#endif // CONFIG_SPEED_STATS
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
@@ -238,15 +274,17 @@ struct macroblock {
unsigned int max_mv_context[REF_FRAMES];
unsigned int source_variance;
+ unsigned int simple_motion_pred_sse;
unsigned int pred_sse[REF_FRAMES];
int pred_mv_sad[REF_FRAMES];
+ int best_pred_mv_sad;
- int *nmvjointcost;
int nmv_vec_cost[MV_JOINTS];
+ int nmv_costs[2][MV_VALS];
+ int nmv_costs_hp[2][MV_VALS];
int *nmvcost[2];
int *nmvcost_hp[2];
int **mv_cost_stack;
- int **mvcost;
int32_t *wsrc_buf;
int32_t *mask_buf;
@@ -254,35 +292,38 @@ struct macroblock {
uint8_t *left_pred_buf;
PALETTE_BUFFER *palette_buffer;
+ CompoundTypeRdBuffers comp_rd_buffer;
CONV_BUF_TYPE *tmp_conv_dst;
uint8_t *tmp_obmc_bufs[2];
- // buffer for hash value calculation of a block
- // used only in av1_get_block_hash_value()
- // [first hash/second hash]
- // [two buffers used ping-pong]
- uint32_t *hash_value_buffer[2][2];
+ FRAME_CONTEXT *row_ctx;
+ // This context will be used to update color_map_cdf pointer which would be
+ // used during pack bitstream. For single thread and tile-multithreading case
+ // this ponter will be same as xd->tile_ctx, but for the case of row-mt:
+ // xd->tile_ctx will point to a temporary context while tile_pb_ctx will point
+ // to the accurate tile context.
+ FRAME_CONTEXT *tile_pb_ctx;
- CRC_CALCULATOR crc_calculator1;
- CRC_CALCULATOR crc_calculator2;
- int g_crc_initialized;
+ struct inter_modes_info *inter_modes_info;
+
+ // Contains the hash table, hash function, and buffer used for intrabc
+ IntraBCHashInfo intrabc_hash_info;
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
- MvLimits mv_limits;
+ FullMvLimits mv_limits;
uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ uint8_t tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
- int skip;
- int skip_chroma_rd;
+ // Force the coding block to skip transform and quantization.
+ int force_skip;
int skip_cost[SKIP_CONTEXTS][2];
int skip_mode; // 0: off; 1: on
int skip_mode_cost[SKIP_CONTEXTS][2];
- int compound_idx;
-
LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
LV_MAP_EOB_COST eob_costs[7][2];
uint16_t cb_offset;
@@ -309,7 +350,7 @@ struct macroblock {
// BWDREF_FRAME) in bidir-comp mode.
int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2];
int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
- int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
+ int compound_type_cost[BLOCK_SIZES_ALL][MASKED_COMPOUND_TYPES];
int wedge_idx_cost[BLOCK_SIZES_ALL][16];
int interintra_cost[BLOCK_SIZE_GROUPS][2];
int wedge_interintra_cost[BLOCK_SIZES_ALL][2];
@@ -346,29 +387,111 @@ struct macroblock {
// Used to store sub partition's choices.
MV pred_mv[REF_FRAMES];
- // Store the best motion vector during motion search
- int_mv best_mv;
- // Store the second best motion vector during full-pixel motion search
- int_mv second_best_mv;
+ // Ref frames that are selected by square partition blocks within a super-
+ // block, in MI resolution. They can be used to prune ref frames for
+ // rectangular blocks.
+ int picked_ref_frames_mask[32 * 32];
// use default transform and skip transform type search for intra modes
int use_default_intra_tx_type;
// use default transform and skip transform type search for inter modes
int use_default_inter_tx_type;
-#if CONFIG_DIST_8X8
- int using_dist_8x8;
- aom_tune_metric tune_metric;
-#endif // CONFIG_DIST_8X8
int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2];
- // Bit flags for pruning tx type search, tx split, etc.
- int tx_search_prune[EXT_TX_SET_TYPES];
int must_find_valid_partition;
- int tx_split_prune_flag; // Flag to skip tx split RD search.
int recalc_luma_mc_data; // Flag to indicate recalculation of MC data during
// interpolation filter search
+ int prune_mode;
+ uint32_t tx_domain_dist_threshold;
+ int use_transform_domain_distortion;
+ // The likelihood of an edge existing in the block (using partial Canny edge
+ // detection). For reference, 556 is the value returned for a solid
+ // vertical black/white edge.
+ uint16_t edge_strength;
+ // The strongest edge strength seen along the x/y axis.
+ uint16_t edge_strength_x;
+ uint16_t edge_strength_y;
+ uint8_t compound_idx;
+
+ // [Saved stat index]
+ COMP_RD_STATS comp_rd_stats[MAX_COMP_RD_STATS];
+ int comp_rd_stats_idx;
+
+ CB_COEFF_BUFFER *cb_coef_buff;
+
+ // Threshold used to decide the applicability of R-D optimization of
+ // quantized coeffs
+ uint32_t coeff_opt_dist_threshold;
+
+#if !CONFIG_REALTIME_ONLY
+ int quad_tree_idx;
+ int cnn_output_valid;
+ float cnn_buffer[CNN_OUT_BUF_SIZE];
+ float log_q;
+#endif
+ int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
+ // 0 - 128x128
+ // 1-2 - 128x64
+ // 3-4 - 64x128
+ // 5-8 - 64x64
+ // 9-16 - 64x32
+ // 17-24 - 32x64
+ // 25-40 - 32x32
+ // 41-104 - 16x16
+ uint8_t variance_low[105];
+ uint8_t content_state_sb;
+ // Strong color activity detection. Used in REALTIME coding mode to enhance
+ // the visual quality at the boundary of moving color objects.
+ uint8_t color_sensitivity[2];
+ int nonrd_prune_ref_frame_search;
+
+ // Used to control the tx size search evaluation for mode processing
+ // (normal/winner mode)
+ int tx_size_search_method;
+ // This tx_mode_search_type is used internally by the encoder, and is not
+ // written to the bitstream. It determines what kind of tx_mode should be
+ // searched. For example, we might set it to TX_MODE_LARGEST to find a good
+ // candidate, then use TX_MODE_SELECT on it
+ TX_MODE tx_mode_search_type;
+
+ // Used to control aggressiveness of skip flag prediction for mode processing
+ // (normal/winner mode)
+ unsigned int predict_skip_level;
+
+ // Copy out this SB's TPL block stats.
+ int valid_cost_b;
+ int64_t inter_cost_b[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB];
+ int64_t intra_cost_b[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB];
+ int_mv mv_b[MAX_MC_FLOW_BLK_IN_SB * MAX_MC_FLOW_BLK_IN_SB]
+ [INTER_REFS_PER_FRAME];
+ int cost_stride;
+
+ // The type of mv cost used during motion search
+ MV_COST_TYPE mv_cost_type;
+
+ uint8_t search_ref_frame[REF_FRAMES];
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ void (*fwd_txfm4x4)(const int16_t *input, tran_low_t *output, int stride);
+ void (*inv_txfm_add)(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
+#else
+ void (*fwd_txfm4x4)(const int16_t *input, int16_t *output, int stride);
+ void (*inv_txfm_add)(const int16_t *input, uint8_t *dest, int stride,
+ int eob);
+#endif
};
+// Only consider full SB, MC_FLOW_BSIZE_1D = 16.
+static INLINE int tpl_blocks_in_sb(BLOCK_SIZE bsize) {
+ switch (bsize) {
+ case BLOCK_64X64: return 16;
+ case BLOCK_128X128: return 64;
+ default: assert(0);
+ }
+ return -1;
+}
+
static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
static const char LUT[BLOCK_SIZES_ALL] = {
0, // BLOCK_4X4