diff options
author | trav90 <travawine@palemoon.org> | 2018-10-18 06:04:57 -0500 |
---|---|---|
committer | trav90 <travawine@palemoon.org> | 2018-10-18 06:04:57 -0500 |
commit | 7369c7d7a5eed32963d8af37658286617919f91c (patch) | |
tree | 5397ce7ee9bca1641118fdc3187bd9e2b24fdc9c /third_party/aom/av1/common/reconinter.c | |
parent | 77887af9c4ad1420bbdb33984af4f74b55ca59db (diff) | |
download | uxp-7369c7d7a5eed32963d8af37658286617919f91c.tar.gz |
Update aom to commit id f5bdeac22930ff4c6b219be49c843db35970b918
Diffstat (limited to 'third_party/aom/av1/common/reconinter.c')
-rw-r--r-- | third_party/aom/av1/common/reconinter.c | 714 |
1 files changed, 453 insertions, 261 deletions
diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c index fa37a992fd..a1b5c1f671 100644 --- a/third_party/aom/av1/common/reconinter.c +++ b/third_party/aom/av1/common/reconinter.c @@ -35,14 +35,14 @@ DECLARE_ALIGNED(16, static uint8_t, [MASK_MASTER_SIZE * MASK_MASTER_SIZE]); DECLARE_ALIGNED(16, static uint8_t, - wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]); + wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]); -// 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound +// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. DECLARE_ALIGNED(16, static uint8_t, - wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]); + wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]); -static wedge_masks_type wedge_masks[BLOCK_SIZES][2]; +static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2]; // Some unused wedge codebooks left temporarily to facilitate experiments. // To be removed when settled. @@ -159,12 +159,12 @@ static const wedge_code_type wedge_codebook_16_heqw[16] = { { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; -const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, -#endif // CONFIG_CB4X4 +#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, @@ -216,6 +216,14 @@ const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = { { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #endif // CONFIG_EXT_PARTITION + { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0, + wedge_masks[BLOCK_4X16] }, + { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0, + wedge_masks[BLOCK_16X4] }, + { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0, + wedge_masks[BLOCK_8X32] }, + { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0, + wedge_masks[BLOCK_32X8] }, }; static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, @@ -349,6 +357,47 @@ void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type, #elif COMPOUND_SEGMENT_TYPE == 1 #define DIFF_FACTOR 16 + +#if CONFIG_CONVOLVE_ROUND +static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base, + const int32_t *src0, int src0_stride, + const int32_t *src1, int src1_stride, + BLOCK_SIZE sb_type, int h, int w, + ConvolveParams *conv_params, int bd) { + int round = + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); + int i, j, m, diff; + int block_stride = block_size_wide[sb_type]; + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]); + diff = ROUND_POWER_OF_TWO(diff, round); + m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); + mask[i * block_stride + j] = + which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; + } + } +} + +static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type, + const int32_t *src0, int src0_stride, + const int32_t *src1, int src1_stride, + BLOCK_SIZE sb_type, int h, int w, + ConvolveParams *conv_params, int bd) { + switch (mask_type) { + case DIFFWTD_38: + diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride, + sb_type, h, w, conv_params, bd); + break; + case DIFFWTD_38_INV: + diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride, + sb_type, h, w, conv_params, bd); + break; + default: assert(0); + } +} +#endif + static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, @@ -531,7 +580,7 @@ static void init_wedge_master_masks() { static void init_wedge_signs() { BLOCK_SIZE sb_type; memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup)); - for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) { + for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) { const int bw = block_size_wide[sb_type]; const int bh = block_size_high[sb_type]; const wedge_params_type wedge_params = wedge_params_lookup[sb_type]; @@ -565,7 +614,7 @@ static void init_wedge_masks() { uint8_t *dst = wedge_mask_buf; BLOCK_SIZE bsize; memset(wedge_masks, 0, sizeof(wedge_masks)); - for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES; ++bsize) { + for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) { const uint8_t *mask; const int bw = block_size_wide[bsize]; const int bh = block_size_high[bsize]; @@ -656,6 +705,21 @@ static void build_masked_compound_wedge_extend_highbd( } #endif // CONFIG_HIGHBITDEPTH #else +#if CONFIG_CONVOLVE_ROUND +static void build_masked_compound_no_round( + CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0, + int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, + const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, + int w) { + // Derive subsampling from h and w passed in. May be refactored to + // pass in subsampling factors directly. + const int subh = (2 << b_height_log2_lookup[sb_type]) == h; + const int subw = (2 << b_width_log2_lookup[sb_type]) == w; + const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); + aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride, + mask, block_size_wide[sb_type], h, w, subh, subw); +} +#endif // CONFIG_CONVOLVE_ROUND static void build_masked_compound( uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, @@ -694,7 +758,7 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, const struct scale_factors *sf, int w, - int h, + int h, ConvolveParams *conv_params, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else @@ -710,7 +774,8 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, int p_col, int p_row, int ref, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION MACROBLOCKD *xd) { - MODE_INFO *mi = xd->mi[0]; + const MODE_INFO *mi = xd->mi[0]; + const INTERINTER_COMPOUND_DATA comp_data = { #if CONFIG_WEDGE mi->mbmi.wedge_index, @@ -722,24 +787,27 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, #endif // CONFIG_COMPOUND_SEGMENT mi->mbmi.interinter_compound_type }; -// The prediction filter types used here should be those for -// the second reference block. -#if CONFIG_DUAL_FILTER - InterpFilter tmp_ipf[4] = { - interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3], - }; -#else - InterpFilter tmp_ipf = interp_filter; -#endif // CONFIG_DUAL_FILTER - ConvolveParams conv_params = get_conv_params(0, plane); #if CONFIG_HIGHBITDEPTH +#if CONFIG_CONVOLVE_ROUND + DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]); + int tmp_dst2_stride = MAX_SB_SIZE; + CONV_BUF_TYPE *org_dst = conv_params->dst; + int org_dst_stride = conv_params->dst_stride; + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { + memset(tmp_dst2, 0, sizeof(tmp_dst2)); + conv_params->dst = tmp_dst2; + conv_params->dst_stride = tmp_dst2_stride; + // mask compound has its own average mechanism + conv_params->do_average = 0; + } +#endif // CONFIG_CONVOLVE_ROUND DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]); uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, - subpel_y, sf, w, h, &conv_params, tmp_ipf, + subpel_y, sf, w, h, conv_params, interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION warp_types, p_col, p_row, plane, ref, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -749,14 +817,26 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, xs, ys, xd); #if CONFIG_COMPOUND_SEGMENT if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type, - dst, dst_stride, tmp_dst, MAX_SB_SIZE, - mi->mbmi.sb_type, h, w, xd->bd); - else - build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst, - dst_stride, tmp_dst, MAX_SB_SIZE, - mi->mbmi.sb_type, h, w); +#if CONFIG_CONVOLVE_ROUND + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { + build_compound_seg_mask_d32(comp_data.seg_mask, comp_data.mask_type, + org_dst, org_dst_stride, tmp_dst2, + tmp_dst2_stride, mi->mbmi.sb_type, h, w, + conv_params, xd->bd); + } else { +#endif // CONFIG_CONVOLVE_ROUND + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type, + dst, dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.sb_type, h, w, xd->bd); + } else { + build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst, + dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.sb_type, h, w); + } +#if CONFIG_CONVOLVE_ROUND + } +#endif } #endif // CONFIG_COMPOUND_SEGMENT @@ -770,19 +850,55 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); #else - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst, - MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, - w, xd->bd); - else - build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, - MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w); +#if CONFIG_CONVOLVE_ROUND + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { + build_masked_compound_no_round(org_dst, org_dst_stride, org_dst, + org_dst_stride, tmp_dst2, tmp_dst2_stride, + &comp_data, mi->mbmi.sb_type, h, w); + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + av1_highbd_convolve_rounding( + org_dst, org_dst_stride, dst, dst_stride, w, h, + FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1, + xd->bd); + } else { + av1_convolve_rounding( + org_dst, org_dst_stride, dst, dst_stride, w, h, + FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1); + } + conv_params->do_post_rounding = 0; + } else { +#endif // CONFIG_CONVOLVE_ROUND + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst, + MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, + w, xd->bd); + } else { + build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, + MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w); + } +#if CONFIG_CONVOLVE_ROUND + } +#endif // CONFIG_CONVOLVE_ROUND #endif // CONFIG_SUPERTX #else // CONFIG_HIGHBITDEPTH + +#if CONFIG_CONVOLVE_ROUND + DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]); + int tmp_dst2_stride = MAX_SB_SIZE; + CONV_BUF_TYPE *org_dst = conv_params->dst; + int org_dst_stride = conv_params->dst_stride; + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { + memset(tmp_dst2, 0, sizeof(tmp_dst2)); + conv_params->dst = tmp_dst2; + conv_params->dst_stride = tmp_dst2_stride; + // mask compound has its own average mechanism + conv_params->do_average = 0; + } +#endif DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]); av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, - subpel_y, sf, w, h, &conv_params, tmp_ipf, + subpel_y, sf, w, h, conv_params, interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION warp_types, p_col, p_row, plane, ref, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -791,18 +907,43 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, #endif xs, ys, xd); #if CONFIG_COMPOUND_SEGMENT - if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) - build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst, - dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type, - h, w); + if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) { +#if CONFIG_CONVOLVE_ROUND + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { + build_compound_seg_mask_d32( + comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride, + tmp_dst2, tmp_dst2_stride, mi->mbmi.sb_type, h, w, conv_params, 8); + } else { +#endif // CONFIG_CONVOLVE_ROUND + build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst, + dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.sb_type, h, w); +#if CONFIG_CONVOLVE_ROUND + } +#endif + } #endif // CONFIG_COMPOUND_SEGMENT #if CONFIG_SUPERTX build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); #else - build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, - &comp_data, mi->mbmi.sb_type, h, w); +#if CONFIG_CONVOLVE_ROUND + if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { + build_masked_compound_no_round(org_dst, org_dst_stride, org_dst, + org_dst_stride, tmp_dst2, tmp_dst2_stride, + &comp_data, mi->mbmi.sb_type, h, w); + av1_convolve_rounding( + org_dst, org_dst_stride, dst, dst_stride, w, h, + FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1); + conv_params->do_post_rounding = 0; + } else { +#endif // CONFIG_CONVOLVE_ROUND + build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, + MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w); +#if CONFIG_CONVOLVE_ROUND + } +#endif // CONFIG_CONVOLVE_ROUND #endif // CONFIG_SUPERTX #endif // CONFIG_HIGHBITDEPTH #if CONFIG_COMPOUND_SEGMENT @@ -832,11 +973,14 @@ void av1_highbd_build_inter_predictor( const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; MV32 mv = av1_scale_mv(&mv_q4, x, y, sf); - const int subpel_x = mv.col & SUBPEL_MASK; - const int subpel_y = mv.row & SUBPEL_MASK; - ConvolveParams conv_params = get_conv_params(ref, plane); + mv.col += SCALE_EXTRA_OFF; + mv.row += SCALE_EXTRA_OFF; + const int subpel_x = mv.col & SCALE_SUBPEL_MASK; + const int subpel_y = mv.row & SCALE_SUBPEL_MASK; + ConvolveParams conv_params = get_conv_params(ref, ref, plane); - src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); + src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride + + (mv.col >> SCALE_SUBPEL_BITS); av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, &conv_params, interp_filter, @@ -869,10 +1013,13 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; MV32 mv = av1_scale_mv(&mv_q4, x, y, sf); - const int subpel_x = mv.col & SUBPEL_MASK; - const int subpel_y = mv.row & SUBPEL_MASK; + mv.col += SCALE_EXTRA_OFF; + mv.row += SCALE_EXTRA_OFF; + const int subpel_x = mv.col & SCALE_SUBPEL_MASK; + const int subpel_y = mv.row & SCALE_SUBPEL_MASK; - src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); + src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride + + (mv.col >> SCALE_SUBPEL_BITS); av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, conv_params, interp_filter, @@ -909,6 +1056,10 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MODE_INFO *mi = xd->mi[0]; #endif // CONFIG_MOTION_VAR int is_compound = has_second_ref(&mi->mbmi); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int is_comp_mode_pred = + is_compound || is_inter_singleref_comp_mode(mi->mbmi.mode); +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int ref; #if CONFIG_INTRABC const int is_intrabc = is_intrabc_block(&mi->mbmi); @@ -920,6 +1071,9 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]]; is_global[ref] = is_global_mv_block(mi, block, wm->wmtype); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!is_compound && is_comp_mode_pred) is_global[1] = is_global[0]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_GLOBAL_MOTION #if CONFIG_CB4X4 @@ -974,7 +1128,8 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, for (idx = 0; idx < b8_w; idx += b4_w) { MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi; is_compound = has_second_ref(this_mbmi); - + // TODO(zoeliu): If single ref comp modes are considered here, a + // mismatch was caused. Need a further investigation. for (ref = 0; ref < 1 + is_compound; ++ref) { struct buf_2d *const dst_buf = &pd->dst; @@ -995,23 +1150,19 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, #if CONFIG_INTRABC const struct scale_factors *const sf = - is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf; + is_intrabc ? &xd->sf_identity : &ref_buf->sf; struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref]; #else - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + const struct scale_factors *const sf = &ref_buf->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; #endif // CONFIG_INTRABC uint8_t *dst = dst_buf->buf; const MV mv = this_mbmi->mv[ref].as_mv; - const MV mv_q4 = clamp_mv_to_umv_border_sb( - xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); uint8_t *pre; - MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); - ConvolveParams conv_params = get_conv_params(ref, plane); #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION WarpTypesAllowed warp_types; #if CONFIG_GLOBAL_MOTION @@ -1029,28 +1180,55 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, dst += dst_buf->stride * y + x; if (is_scaled) { - pre = - pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); - scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + int ssx = pd->subsampling_x; + int ssy = pd->subsampling_y; + int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS); + orig_pos_y += mv.row * (1 << (1 - ssy)); + int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS); + orig_pos_x += mv.col * (1 << (1 - ssx)); + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; + + const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS); + const int right = (pre_buf->width + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); + + pre = pre_buf->buf0 + + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + + (pos_x >> SCALE_SUBPEL_BITS); + subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_y = pos_y & SCALE_SUBPEL_MASK; xs = sf->x_step_q4; ys = sf->y_step_q4; } else { - pre = pre_buf->buf + y * pre_buf->stride + x; - scaled_mv.row = mv_q4.row; - scaled_mv.col = mv_q4.col; - xs = ys = 16; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); + xs = ys = SCALE_SUBPEL_SHIFTS; + subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; + subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; + pre = pre_buf->buf + + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride + + (x + (mv_q4.col >> SUBPEL_BITS)); } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + - (scaled_mv.col >> SUBPEL_BITS); - + ConvolveParams conv_params = get_conv_params(ref, ref, plane); #if CONFIG_EXT_INTER + if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) { + // TODO(angiebird): use get_conv_params_no_round() here + // masked compound type has its own average mechanism + conv_params = get_conv_params(ref, 0, plane); + } if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type)) av1_make_masked_inter_predictor( pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, - sf, w, h, mi->mbmi.interp_filter, xs, ys, + sf, w, h, &conv_params, mi->mbmi.interp_filter, xs, ys, #if CONFIG_SUPERTX wedge_offset_x, wedge_offset_y, #endif // CONFIG_SUPERTX @@ -1071,7 +1249,7 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION #if CONFIG_MOTION_VAR mi_col_offset, mi_row_offset, -#endif +#endif // CONFIG_MOTION_VAR xs, ys, xd); } ++col; @@ -1090,14 +1268,17 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; uint8_t *pre[2]; - MV32 scaled_mv[2]; SubpelParams subpel_params[2]; #if CONFIG_CONVOLVE_ROUND DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]); av1_zero(tmp_dst); #endif // CONFIG_CONVOLVE_ROUND +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref) { +#else for (ref = 0; ref < 1 + is_compound; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_INTRABC const struct scale_factors *const sf = is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf; @@ -1120,42 +1301,70 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, : mi->mbmi.mv[ref].as_mv; #endif - // TODO(jkoleszar): This clamping is done in the incorrect place for the - // scaling case. It needs to be done on the scaled MV, not the pre-scaling - // MV. Note however that it performs the subsampling aware scaling so - // that the result is always q4. - // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb( - xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); - const int is_scaled = av1_is_scaled(sf); if (is_scaled) { - pre[ref] = - pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); - scaled_mv[ref] = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + // Note: The various inputs here have different units: + // * mi_x/mi_y are in units of luma pixels + // * mv is in units of 1/8 luma pixels + // * x/y are in units of pixels *in the current plane* + // Here we unify these into a q4-format position within the current + // plane, then project into the reference frame + int ssx = pd->subsampling_x; + int ssy = pd->subsampling_y; + int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS); + orig_pos_y += mv.row * (1 << (1 - ssy)); + int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS); + orig_pos_x += mv.col * (1 << (1 - ssx)); + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; + + // Clamp against the reference frame borders, with enough extension + // that we don't force the reference block to be partially onscreen. + const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS); + const int right = (pre_buf->width + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); + + pre[ref] = pre_buf->buf0 + + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + + (pos_x >> SCALE_SUBPEL_BITS); + subpel_params[ref].subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_params[ref].subpel_y = pos_y & SCALE_SUBPEL_MASK; subpel_params[ref].xs = sf->x_step_q4; subpel_params[ref].ys = sf->y_step_q4; } else { - pre[ref] = pre_buf->buf + (y * pre_buf->stride + x); - scaled_mv[ref].row = mv_q4.row; - scaled_mv[ref].col = mv_q4.col; - subpel_params[ref].xs = 16; - subpel_params[ref].ys = 16; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); + subpel_params[ref].subpel_x = (mv_q4.col & SUBPEL_MASK) + << SCALE_EXTRA_BITS; + subpel_params[ref].subpel_y = (mv_q4.row & SUBPEL_MASK) + << SCALE_EXTRA_BITS; + subpel_params[ref].xs = SCALE_SUBPEL_SHIFTS; + subpel_params[ref].ys = SCALE_SUBPEL_SHIFTS; + pre[ref] = pre_buf->buf + + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride + + (x + (mv_q4.col >> SUBPEL_BITS)); } - - subpel_params[ref].subpel_x = scaled_mv[ref].col & SUBPEL_MASK; - subpel_params[ref].subpel_y = scaled_mv[ref].row & SUBPEL_MASK; - pre[ref] += (scaled_mv[ref].row >> SUBPEL_BITS) * pre_buf->stride + - (scaled_mv[ref].col >> SUBPEL_BITS); } #if CONFIG_CONVOLVE_ROUND ConvolveParams conv_params = - get_conv_params_no_round(ref, plane, tmp_dst, MAX_SB_SIZE); + get_conv_params_no_round(ref, ref, plane, tmp_dst, MAX_SB_SIZE); #else - ConvolveParams conv_params = get_conv_params(ref, plane); + ConvolveParams conv_params = get_conv_params(ref, ref, plane); #endif // CONFIG_CONVOLVE_ROUND + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref) { +#else for (ref = 0; ref < 1 + is_compound; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_INTRABC const struct scale_factors *const sf = is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf; @@ -1174,12 +1383,23 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, #endif // CONFIG_WARPED_MOTION #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION conv_params.ref = ref; + conv_params.do_average = ref; #if CONFIG_EXT_INTER + if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) { + // masked compound type has its own average mechanism + conv_params.do_average = 0; +#if CONFIG_CONVOLVE_ROUND && CONFIG_COMPOUND_SEGMENT && CONFIG_SUPERTX + // TODO(angiebird): convolve_round does not support compound_segment + // when supertx is on + conv_params = get_conv_params(ref, 0, plane); +#endif + } + if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type)) av1_make_masked_inter_predictor( pre[ref], pre_buf->stride, dst, dst_buf->stride, subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h, - mi->mbmi.interp_filter, subpel_params[ref].xs, + &conv_params, mi->mbmi.interp_filter, subpel_params[ref].xs, subpel_params[ref].ys, #if CONFIG_SUPERTX wedge_offset_x, wedge_offset_y, @@ -1202,7 +1422,7 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION #if CONFIG_MOTION_VAR mi_col_offset, mi_row_offset, -#endif +#endif // CONFIG_MOTION_VAR subpel_params[ref].xs, subpel_params[ref].ys, xd); } @@ -1218,76 +1438,20 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, xd->bd); else #endif // CONFIG_HIGHBITDEPTH +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h, - FILTER_BITS * 2 + is_compound - + FILTER_BITS * 2 + is_comp_mode_pred - conv_params.round_0 - conv_params.round_1); +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) + av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h, + FILTER_BITS * 2 + is_compound - + conv_params.round_0 - conv_params.round_1); +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } #endif // CONFIG_CONVOLVE_ROUND } } -void av1_build_inter_predictor_sub8x8(const AV1_COMMON *cm, MACROBLOCKD *xd, - int plane, int i, int ir, int ic, - int mi_row, int mi_col) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->mi[0]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); - const int width = block_size_wide[plane_bsize]; - const int height = block_size_high[plane_bsize]; - uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2]; - int ref; - const int is_compound = has_second_ref(&mi->mbmi); - (void)cm; - -#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - WarpTypesAllowed warp_types; - const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; - const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; -#if CONFIG_GLOBAL_MOTION - int is_global[2]; - for (ref = 0; ref < 1 + is_compound; ++ref) { - WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]]; - is_global[ref] = is_global_mv_block(mi, i, wm->wmtype); - } -#endif // CONFIG_GLOBAL_MOTION -#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - - for (ref = 0; ref < 1 + is_compound; ++ref) { - ConvolveParams conv_params = get_conv_params(ref, plane); - const uint8_t *pre = - &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2]; -#if CONFIG_GLOBAL_MOTION - warp_types.global_warp_allowed = is_global[ref]; -#endif // CONFIG_GLOBAL_MOTION -#if CONFIG_WARPED_MOTION - warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL; -#endif // CONFIG_WARPED_MOTION - -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - av1_highbd_build_inter_predictor( - pre, pd->pre[ref].stride, dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, - ref, mi->mbmi.interp_filter, -#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - &warp_types, p_col, p_row, -#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - plane, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, - mi_row * MI_SIZE + 4 * ir, xd); - else -#endif // CONFIG_HIGHBITDEPTH - av1_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, - &conv_params, mi->mbmi.interp_filter, -#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - &warp_types, p_col, p_row, plane, ref, -#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, - mi_row * MI_SIZE + 4 * ir, xd); - } -} - static void build_inter_predictors_for_planes(const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col, @@ -1386,28 +1550,11 @@ void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd, #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA } -// TODO(afergs): Check if ctx can be made constant void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, BUFFER_SET *ctx, BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, - MAX_MB_PLANE - 1); -#if CONFIG_EXT_INTER && CONFIG_INTERINTRA - if (is_interintra_pred(&xd->mi[0]->mbmi)) { - BUFFER_SET default_ctx = { - { xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf }, - { xd->plane[0].dst.stride, xd->plane[1].dst.stride, - xd->plane[2].dst.stride } - }; - if (!ctx) ctx = &default_ctx; - av1_build_interintra_predictors( - xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf, - xd->plane[0].dst.stride, xd->plane[1].dst.stride, - xd->plane[2].dst.stride, ctx, bsize); - } -#else - (void)ctx; -#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize); + av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize); } void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], @@ -1585,19 +1732,19 @@ void av1_build_masked_inter_predictor_complex( } while (--h_remain); } -void av1_build_inter_predictors_sb_sub8x8_extend(const AV1_COMMON *cm, - MACROBLOCKD *xd, +void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm, + MACROBLOCKD *xd, #if CONFIG_EXT_INTER - int mi_row_ori, int mi_col_ori, + int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row, int mi_col, - BLOCK_SIZE bsize, int block) { + int mi_row, int mi_col, + int plane, BLOCK_SIZE bsize, + int block) { // Prediction function used in supertx: // Use the mv at current block (which is less than 8x8) // to get prediction of a block located at (mi_row, mi_col) at size of bsize // bsize can be larger than 8x8. // block (0-3): the sub8x8 location of current block - int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; #if CONFIG_EXT_INTER @@ -1608,68 +1755,50 @@ void av1_build_inter_predictors_sb_sub8x8_extend(const AV1_COMMON *cm, // For sub8x8 uv: // Skip uv prediction in supertx except the first block (block = 0) int max_plane = block ? 1 : MAX_MB_PLANE; + if (plane >= max_plane) return; - for (plane = 0; plane < max_plane; plane++) { - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, &xd->plane[plane]); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int bw = 4 * num_4x4_w; - const int bh = 4 * num_4x4_h; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; - build_inter_predictors(cm, xd, plane, + build_inter_predictors(cm, xd, plane, #if CONFIG_MOTION_VAR - 0, 0, + 0, 0, #endif // CONFIG_MOTION_VAR - block, bw, bh, 0, 0, bw, bh, + block, bw, bh, 0, 0, bw, bh, #if CONFIG_EXT_INTER - wedge_offset_x, wedge_offset_y, -#endif // CONFIG_EXT_INTER - mi_x, mi_y); - } -#if CONFIG_EXT_INTER - if (is_interintra_pred(&xd->mi[0]->mbmi)) { - BUFFER_SET ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf, - xd->plane[2].dst.buf }, - { xd->plane[0].dst.stride, xd->plane[1].dst.stride, - xd->plane[2].dst.stride } }; - av1_build_interintra_predictors( - xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf, - xd->plane[0].dst.stride, xd->plane[1].dst.stride, - xd->plane[2].dst.stride, &ctx, bsize); - } + wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER + mi_x, mi_y); } -void av1_build_inter_predictors_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd, +void av1_build_inter_predictor_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd, #if CONFIG_EXT_INTER - int mi_row_ori, int mi_col_ori, + int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row, int mi_col, - BLOCK_SIZE bsize) { - int plane; + int mi_row, int mi_col, int plane, + BLOCK_SIZE bsize) { const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; #if CONFIG_EXT_INTER const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; #endif // CONFIG_EXT_INTER - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, &xd->plane[plane]); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); + const int bw = block_size_wide[plane_bsize]; + const int bh = block_size_high[plane_bsize]; - build_inter_predictors(cm, xd, plane, + build_inter_predictors(cm, xd, plane, #if CONFIG_MOTION_VAR - 0, 0, + 0, 0, #endif // CONFIG_MOTION_VAR - 0, bw, bh, 0, 0, bw, bh, + 0, bw, bh, 0, 0, bw, bh, #if CONFIG_EXT_INTER - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER - mi_x, mi_y); - } + mi_x, mi_y); } #endif // CONFIG_SUPERTX @@ -1828,7 +1957,7 @@ int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd, BLOCK_SIZE bsize_plane = ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 if (bsize_plane < BLOCK_4X4) return 1; #endif switch (bsize_plane) { @@ -1863,7 +1992,8 @@ void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, // handle above row if (xd->up_available) { - const int overlap = num_4x4_blocks_high_lookup[bsize] * 2; + const int overlap = + AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1); const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); const int mi_row_offset = -1; const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]]; @@ -1885,7 +2015,9 @@ void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, #endif const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type); - const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); + const int above_step = + AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]); + const int mi_step = AOMMIN(xd->n8_w, above_step); if (is_neighbor_overlappable(above_mbmi)) { neighbor_count++; @@ -1920,7 +2052,8 @@ void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, // handle left column if (xd->left_available) { - const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2; + const int overlap = + AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1); const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); const int mi_col_offset = -1; const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]]; @@ -1942,7 +2075,9 @@ void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, #endif const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type); - const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); + const int left_step = + AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]); + const int mi_step = AOMMIN(xd->n8_h, left_step); if (is_neighbor_overlappable(left_mbmi)) { neighbor_count++; @@ -1985,6 +2120,14 @@ void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { is_masked_compound_type(mbmi->interinter_compound_type)) { mbmi->interinter_compound_type = COMPOUND_AVERAGE; mbmi->ref_frame[1] = NONE_FRAME; +#if CONFIG_COMPOUND_SINGLEREF + } else if (!has_second_ref(mbmi) && + is_inter_singleref_comp_mode(mbmi->mode)) { + // mbmi->mode = compound_ref0_mode(mbmi->mode); + mbmi->mode = compound_ref1_mode(mbmi->mode); + assert(is_inter_singleref_mode(mbmi->mode)); + mbmi->mv[0].as_int = mbmi->mv[1].as_int; +#endif // CONFIG_COMPOUND_SINGLEREF } #endif // CONFIG_EXT_INTER if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME; @@ -2025,7 +2168,9 @@ void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type); MB_MODE_INFO backup_mbmi; - mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); + const int above_step = + AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]); + mi_step = AOMMIN(xd->n8_w, above_step); if (!is_neighbor_overlappable(above_mbmi)) continue; @@ -2041,8 +2186,16 @@ void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, tmp_height[j], tmp_stride[j], 0, i, NULL, pd->subsampling_x, pd->subsampling_y); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(above_mbmi->mode)); + ++ref) { + const MV_REFERENCE_FRAME frame = has_second_ref(above_mbmi) + ? above_mbmi->ref_frame[ref] + : above_mbmi->ref_frame[0]; +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) { const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; @@ -2064,6 +2217,7 @@ void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, bw = (mi_step * MI_SIZE) >> pd->subsampling_x; bh = AOMMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y, 4); + bh = AOMMIN(bh, block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1)); if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, @@ -2113,8 +2267,9 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8); MB_MODE_INFO backup_mbmi; - - mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); + const int left_step = + AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]); + mi_step = AOMMIN(xd->n8_h, left_step); if (!is_neighbor_overlappable(left_mbmi)) continue; @@ -2130,8 +2285,16 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, tmp_height[j], tmp_stride[j], i, 0, NULL, pd->subsampling_x, pd->subsampling_y); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(left_mbmi->mode)); + ++ref) { + const MV_REFERENCE_FRAME frame = has_second_ref(left_mbmi) + ? left_mbmi->ref_frame[ref] + : left_mbmi->ref_frame[0]; +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) { const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; @@ -2152,6 +2315,7 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, const struct macroblockd_plane *pd = &xd->plane[j]; bw = AOMMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x, 4); + bw = AOMMIN(bw, block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1)); bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y; if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; @@ -2584,6 +2748,7 @@ void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, #endif // CONFIG_HIGHBITDEPTH const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + // TODO(zoeliu): COMPOUND_SINGLEREF has not worked with NCOBMC yet. av1_build_prediction_by_bottom_preds(cm, xd, mi_row, mi_col, dst_buf1, dst_width1, dst_height1, dst_stride1); av1_build_prediction_by_right_preds(cm, xd, mi_row, mi_col, dst_buf2, @@ -2614,12 +2779,13 @@ static const int ii_weights1d[MAX_SB_SIZE] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; -static int ii_size_scales[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static int ii_size_scales[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 32, 32, 32, #endif 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1, + 16, 16, 8, 8, }; #else static const int ii_weights1d[MAX_SB_SIZE] = { @@ -2628,12 +2794,13 @@ static const int ii_weights1d[MAX_SB_SIZE] = { 6, 6, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; -static int ii_size_scales[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static int ii_size_scales[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 16, 16, 16, #endif 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1, + 8, 8, 4, 4, }; /* clang-format on */ #endif // CONFIG_EXT_PARTITION @@ -2918,23 +3085,20 @@ static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, ? average_split_mvs(pd, mi, ref, block) : mi->mbmi.mv[ref].as_mv; - // TODO(jkoleszar): This clamping is done in the incorrect place for the - // scaling case. It needs to be done on the scaled MV, not the pre-scaling - // MV. Note however that it performs the subsampling aware scaling so - // that the result is always q4. - // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x, - pd->subsampling_y); - uint8_t *pre; - MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); - ConvolveParams conv_params = get_conv_params(0, plane); + ConvolveParams conv_params = get_conv_params(ref, 0, plane); #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION WarpTypesAllowed warp_types; #if CONFIG_GLOBAL_MOTION +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + WarpedMotionParams *const wm = + mi->mbmi.ref_frame[ref] > 0 ? &xd->global_motion[mi->mbmi.ref_frame[ref]] + : &xd->global_motion[mi->mbmi.ref_frame[0]]; +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF warp_types.global_warp_allowed = is_global_mv_block(mi, block, wm->wmtype); #endif // CONFIG_GLOBAL_MOTION #if CONFIG_WARPED_MOTION @@ -2943,22 +3107,41 @@ static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION if (is_scaled) { - pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); - scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + int ssx = pd->subsampling_x; + int ssy = pd->subsampling_y; + int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS); + orig_pos_y += mv.row * (1 << (1 - ssy)); + int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS); + orig_pos_x += mv.col * (1 << (1 - ssx)); + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; + + const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS); + const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); + + pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + + (pos_x >> SCALE_SUBPEL_BITS); + subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_y = pos_y & SCALE_SUBPEL_MASK; xs = sf->x_step_q4; ys = sf->y_step_q4; } else { - pre = pre_buf->buf + (y * pre_buf->stride + x); - scaled_mv.row = mv_q4.row; - scaled_mv.col = mv_q4.col; - xs = ys = 16; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); + xs = ys = SCALE_SUBPEL_SHIFTS; + subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; + subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; + pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride + + (x + (mv_q4.col >> SUBPEL_BITS)); } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + - (scaled_mv.col >> SUBPEL_BITS); - av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x, subpel_y, sf, w, h, &conv_params, mi->mbmi.interp_filter, @@ -2989,6 +3172,10 @@ void av1_build_inter_predictors_for_planes_single_buf( const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; assert(bsize == BLOCK_8X8); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + assert(has_second_ref(&xd->mi[0]->mbmi) || + !is_inter_singleref_comp_mode(xd->mi[0]->mbmi.mode)); +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors_single_buf( @@ -3026,7 +3213,12 @@ static void build_wedge_inter_predictor_from_buf( mbmi->interinter_compound_type }; +#if CONFIG_COMPOUND_SINGLEREF + if ((is_compound || is_inter_singleref_comp_mode(mbmi->mode)) && + is_masked_compound_type(mbmi->interinter_compound_type)) { +#else // !CONFIG_COMPOUND_SINGLEREF if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type)) { +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_COMPOUND_SEGMENT if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) { #if CONFIG_HIGHBITDEPTH @@ -3057,7 +3249,7 @@ static void build_wedge_inter_predictor_from_buf( dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1, ext_dst_stride1, &comp_data, mbmi->sb_type, wedge_offset_x, wedge_offset_y, h, w); -#else +#else // !CONFIG_SUPERTX #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_highbd( |