1002 lines
42 KiB
C
1002 lines
42 KiB
C
/*
|
|
* Copyright (c) 2016, Alliance for Open Media. All rights reserved.
|
|
*
|
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
|
* was not distributed with this source code in the LICENSE file, you can
|
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
|
* Media Patent License 1.0 was not distributed with this source code in the
|
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
|
|
#include "config/aom_dsp_rtcd.h"
|
|
#include "config/aom_scale_rtcd.h"
|
|
|
|
#include "aom/aom_integer.h"
|
|
#include "av1/common/av1_common_int.h"
|
|
#include "av1/common/reconinter.h"
|
|
#include "av1/encoder/encoder.h"
|
|
#include "av1/encoder/ethread.h"
|
|
#include "av1/encoder/pickcdef.h"
|
|
#include "av1/encoder/mcomp.h"
|
|
|
|
// Get primary and secondary filter strength for the given strength index and
|
|
// search method
|
|
static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
|
|
int *pri_strength,
|
|
int *sec_strength,
|
|
int strength_idx) {
|
|
const int tot_sec_filter =
|
|
(pick_method == CDEF_FAST_SEARCH_LVL5)
|
|
? REDUCED_SEC_STRENGTHS_LVL5
|
|
: ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
|
|
: CDEF_SEC_STRENGTHS);
|
|
const int pri_idx = strength_idx / tot_sec_filter;
|
|
const int sec_idx = strength_idx % tot_sec_filter;
|
|
*pri_strength = pri_idx;
|
|
*sec_strength = sec_idx;
|
|
if (pick_method == CDEF_FULL_SEARCH) return;
|
|
|
|
switch (pick_method) {
|
|
case CDEF_FAST_SEARCH_LVL1:
|
|
assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
|
|
*pri_strength = priconv_lvl1[pri_idx];
|
|
break;
|
|
case CDEF_FAST_SEARCH_LVL2:
|
|
assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
|
|
*pri_strength = priconv_lvl2[pri_idx];
|
|
break;
|
|
case CDEF_FAST_SEARCH_LVL3:
|
|
assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
|
|
assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
|
|
*pri_strength = priconv_lvl2[pri_idx];
|
|
*sec_strength = secconv_lvl3[sec_idx];
|
|
break;
|
|
case CDEF_FAST_SEARCH_LVL4:
|
|
assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
|
|
assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
|
|
*pri_strength = priconv_lvl4[pri_idx];
|
|
*sec_strength = secconv_lvl3[sec_idx];
|
|
break;
|
|
case CDEF_FAST_SEARCH_LVL5:
|
|
assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
|
|
assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
|
|
*pri_strength = priconv_lvl5[pri_idx];
|
|
*sec_strength = secconv_lvl5[sec_idx];
|
|
break;
|
|
default: assert(0 && "Invalid CDEF search method");
|
|
}
|
|
}
|
|
|
|
// Store CDEF filter strength calculated from strength index for given search
|
|
// method
|
|
#define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
|
|
do { \
|
|
get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \
|
|
(strength_idx)); \
|
|
cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; \
|
|
} while (0)
|
|
|
|
/* Search for the best strength to add as an option, knowing we
|
|
already selected nb_strengths options. */
|
|
static uint64_t search_one(int *lev, int nb_strengths,
|
|
uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
|
|
CDEF_PICK_METHOD pick_method) {
|
|
uint64_t tot_mse[TOTAL_STRENGTHS];
|
|
const int total_strengths = nb_cdef_strengths[pick_method];
|
|
int i, j;
|
|
uint64_t best_tot_mse = (uint64_t)1 << 63;
|
|
int best_id = 0;
|
|
memset(tot_mse, 0, sizeof(tot_mse));
|
|
for (i = 0; i < sb_count; i++) {
|
|
int gi;
|
|
uint64_t best_mse = (uint64_t)1 << 63;
|
|
/* Find best mse among already selected options. */
|
|
for (gi = 0; gi < nb_strengths; gi++) {
|
|
if (mse[i][lev[gi]] < best_mse) {
|
|
best_mse = mse[i][lev[gi]];
|
|
}
|
|
}
|
|
/* Find best mse when adding each possible new option. */
|
|
for (j = 0; j < total_strengths; j++) {
|
|
uint64_t best = best_mse;
|
|
if (mse[i][j] < best) best = mse[i][j];
|
|
tot_mse[j] += best;
|
|
}
|
|
}
|
|
for (j = 0; j < total_strengths; j++) {
|
|
if (tot_mse[j] < best_tot_mse) {
|
|
best_tot_mse = tot_mse[j];
|
|
best_id = j;
|
|
}
|
|
}
|
|
lev[nb_strengths] = best_id;
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* Search for the best luma+chroma strength to add as an option, knowing we
|
|
already selected nb_strengths options. */
|
|
static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
|
|
uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
|
|
CDEF_PICK_METHOD pick_method) {
|
|
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
|
|
int i, j;
|
|
uint64_t best_tot_mse = (uint64_t)1 << 63;
|
|
int best_id0 = 0;
|
|
int best_id1 = 0;
|
|
const int total_strengths = nb_cdef_strengths[pick_method];
|
|
memset(tot_mse, 0, sizeof(tot_mse));
|
|
for (i = 0; i < sb_count; i++) {
|
|
int gi;
|
|
uint64_t best_mse = (uint64_t)1 << 63;
|
|
/* Find best mse among already selected options. */
|
|
for (gi = 0; gi < nb_strengths; gi++) {
|
|
uint64_t curr = mse[0][i][lev0[gi]];
|
|
curr += mse[1][i][lev1[gi]];
|
|
if (curr < best_mse) {
|
|
best_mse = curr;
|
|
}
|
|
}
|
|
/* Find best mse when adding each possible new option. */
|
|
for (j = 0; j < total_strengths; j++) {
|
|
int k;
|
|
for (k = 0; k < total_strengths; k++) {
|
|
uint64_t best = best_mse;
|
|
uint64_t curr = mse[0][i][j];
|
|
curr += mse[1][i][k];
|
|
if (curr < best) best = curr;
|
|
tot_mse[j][k] += best;
|
|
}
|
|
}
|
|
}
|
|
for (j = 0; j < total_strengths; j++) {
|
|
int k;
|
|
for (k = 0; k < total_strengths; k++) {
|
|
if (tot_mse[j][k] < best_tot_mse) {
|
|
best_tot_mse = tot_mse[j][k];
|
|
best_id0 = j;
|
|
best_id1 = k;
|
|
}
|
|
}
|
|
}
|
|
lev0[nb_strengths] = best_id0;
|
|
lev1[nb_strengths] = best_id1;
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* Search for the set of strengths that minimizes mse. */
|
|
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
|
|
uint64_t mse[][TOTAL_STRENGTHS],
|
|
int sb_count,
|
|
CDEF_PICK_METHOD pick_method) {
|
|
uint64_t best_tot_mse;
|
|
int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
|
|
pick_method <= CDEF_FAST_SEARCH_LVL5);
|
|
int i;
|
|
best_tot_mse = (uint64_t)1 << 63;
|
|
/* Greedy search: add one strength options at a time. */
|
|
for (i = 0; i < nb_strengths; i++) {
|
|
best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
|
|
}
|
|
/* Trying to refine the greedy search by reconsidering each
|
|
already-selected option. */
|
|
if (!fast) {
|
|
for (i = 0; i < 4 * nb_strengths; i++) {
|
|
int j;
|
|
for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
|
|
best_tot_mse =
|
|
search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
|
|
}
|
|
}
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* Search for the set of luma+chroma strengths that minimizes mse. */
|
|
static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
|
|
int nb_strengths,
|
|
uint64_t (**mse)[TOTAL_STRENGTHS],
|
|
int sb_count,
|
|
CDEF_PICK_METHOD pick_method) {
|
|
uint64_t best_tot_mse;
|
|
int i;
|
|
best_tot_mse = (uint64_t)1 << 63;
|
|
/* Greedy search: add one strength options at a time. */
|
|
for (i = 0; i < nb_strengths; i++) {
|
|
best_tot_mse =
|
|
search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
|
|
}
|
|
/* Trying to refine the greedy search by reconsidering each
|
|
already-selected option. */
|
|
for (i = 0; i < 4 * nb_strengths; i++) {
|
|
int j;
|
|
for (j = 0; j < nb_strengths - 1; j++) {
|
|
best_lev0[j] = best_lev0[j + 1];
|
|
best_lev1[j] = best_lev1[j + 1];
|
|
}
|
|
best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
|
|
sb_count, pick_method);
|
|
}
|
|
return best_tot_mse;
|
|
}
|
|
|
|
static inline void init_src_params(int *src_stride, int *width, int *height,
|
|
int *width_log2, int *height_log2,
|
|
BLOCK_SIZE bsize) {
|
|
*src_stride = block_size_wide[bsize];
|
|
*width = block_size_wide[bsize];
|
|
*height = block_size_high[bsize];
|
|
*width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
|
|
*height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize];
|
|
}
|
|
#if CONFIG_AV1_HIGHBITDEPTH
|
|
/* Compute MSE only on the blocks we filtered. */
|
|
static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
|
|
cdef_list *dlist, int cdef_count,
|
|
BLOCK_SIZE bsize, int coeff_shift,
|
|
int row, int col) {
|
|
assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
|
|
bsize == BLOCK_8X8);
|
|
uint64_t sum = 0;
|
|
int bi, bx, by;
|
|
uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
|
|
uint16_t *dst_buff = &dst16[row * dstride + col];
|
|
int src_stride, width, height, width_log2, height_log2;
|
|
init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
|
|
bsize);
|
|
for (bi = 0; bi < cdef_count; bi++) {
|
|
by = dlist[bi].by;
|
|
bx = dlist[bi].bx;
|
|
sum += aom_mse_wxh_16bit_highbd(
|
|
&dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
|
|
&src[bi << (height_log2 + width_log2)], src_stride, width, height);
|
|
}
|
|
return sum >> 2 * coeff_shift;
|
|
}
|
|
#endif
|
|
|
|
// Checks dual and quad block processing is applicable for block widths 8 and 4
|
|
// respectively.
|
|
static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width,
|
|
int cdef_count, int bi, int iter) {
|
|
assert(width == 8 || width == 4);
|
|
const int blk_offset = (width == 8) ? 1 : 3;
|
|
if ((iter + blk_offset) >= cdef_count) return 0;
|
|
|
|
if (dlist[bi].by == dlist[bi + blk_offset].by &&
|
|
dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
|
|
cdef_list *dlist, int cdef_count,
|
|
BLOCK_SIZE bsize, int coeff_shift, int row,
|
|
int col) {
|
|
assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
|
|
bsize == BLOCK_8X8);
|
|
uint64_t sum = 0;
|
|
int bi, bx, by;
|
|
int iter = 0;
|
|
int inc = 1;
|
|
uint8_t *dst8 = (uint8_t *)dst;
|
|
uint8_t *dst_buff = &dst8[row * dstride + col];
|
|
int src_stride, width, height, width_log2, height_log2;
|
|
init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
|
|
bsize);
|
|
|
|
const int num_blks = 16 / width;
|
|
for (bi = 0; bi < cdef_count; bi += inc) {
|
|
by = dlist[bi].by;
|
|
bx = dlist[bi].bx;
|
|
uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
|
|
uint8_t *dst_tmp =
|
|
&dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
|
|
|
|
if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
|
|
sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
|
|
iter += num_blks;
|
|
inc = num_blks;
|
|
} else {
|
|
sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
|
|
height);
|
|
iter += 1;
|
|
inc = 1;
|
|
}
|
|
}
|
|
|
|
return sum >> 2 * coeff_shift;
|
|
}
|
|
|
|
// Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
|
|
// region is outside frame boundary
|
|
static inline void fill_borders_for_fbs_on_frame_boundary(
|
|
uint16_t *inbuf, int hfilt_size, int vfilt_size,
|
|
bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
|
|
bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
|
|
if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
|
|
!is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
|
|
return;
|
|
if (is_fb_on_frm_bottom_boundary) {
|
|
// Fill bottom region of the block
|
|
const int buf_offset =
|
|
(vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
|
|
fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
|
|
CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
|
|
const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
|
|
// Fill bottom-left region of the block
|
|
fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
|
|
CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
|
|
const int buf_offset =
|
|
(vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
|
|
// Fill bottom-right region of the block
|
|
fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
|
|
CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_top_boundary) {
|
|
// Fill top region of the block
|
|
fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
|
|
CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
|
|
// Fill top-left region of the block
|
|
fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
|
|
const int buf_offset = hfilt_size + CDEF_HBORDER;
|
|
// Fill top-right region of the block
|
|
fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
|
|
CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_left_boundary) {
|
|
const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
|
|
// Fill left region of the block
|
|
fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
|
|
CDEF_VERY_LARGE);
|
|
}
|
|
if (is_fb_on_frm_right_boundary) {
|
|
const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
|
|
// Fill right region of the block
|
|
fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
|
|
vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
|
|
}
|
|
}
|
|
|
|
// Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
|
|
// after CDEF filtering in single function call
|
|
static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
|
|
cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
|
|
int subsampling_y) {
|
|
// TODO(Ranjit): Extend the optimization for 422
|
|
if (subsampling_x != subsampling_y) return 1;
|
|
|
|
// Combining more blocks seems to increase encode time due to increase in
|
|
// control code
|
|
if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
|
|
dlist[bi].bx + 3 == dlist[bi + 3].bx) {
|
|
/* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
|
|
* logic if y co-ordinates match and x co-ordinates are
|
|
* separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
|
|
return 4;
|
|
}
|
|
if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
|
|
dlist[bi].bx + 1 == dlist[bi + 1].bx) {
|
|
/* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
|
|
* logic if their y co-ordinates match and x co-ordinates are
|
|
* separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
|
|
return 2;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
// Returns the block error after CDEF filtering for a given strength
|
|
static inline uint64_t get_filt_error(
|
|
const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
|
|
cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
|
|
int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
|
|
int ref_stride, int row, int col, int pri_strength, int sec_strength,
|
|
int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
|
|
uint64_t curr_sse = 0;
|
|
const BLOCK_SIZE plane_bsize =
|
|
get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
|
|
const int bw_log2 = 3 - pd->subsampling_x;
|
|
const int bh_log2 = 3 - pd->subsampling_y;
|
|
|
|
// TODO(Ranjit): Extend this optimization for HBD
|
|
if (!cdef_search_ctx->use_highbitdepth) {
|
|
// If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
|
|
// error at CDEF block level
|
|
const int tot_blk_count =
|
|
(block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
|
|
(bw_log2 + bh_log2);
|
|
if (cdef_count == tot_blk_count) {
|
|
// Calculate the offset in the buffer based on block position
|
|
const FULLPEL_MV this_mv = { row, col };
|
|
const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
|
|
if (pri_strength == 0 && sec_strength == 0) {
|
|
// When CDEF strength is zero, filtering is not applied. Hence
|
|
// error is calculated between source and unfiltered pixels
|
|
curr_sse =
|
|
aom_sse(&ref_buffer[buf_offset], ref_stride,
|
|
get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
|
|
block_size_wide[plane_bsize], block_size_high[plane_bsize]);
|
|
} else {
|
|
DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
|
|
|
|
av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
|
|
cdef_search_ctx->xdec[pli],
|
|
cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
|
|
dlist, cdef_count, pri_strength,
|
|
sec_strength + (sec_strength == 3),
|
|
cdef_search_ctx->damping, coeff_shift);
|
|
curr_sse =
|
|
aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
|
|
(1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
|
|
block_size_high[plane_bsize]);
|
|
}
|
|
} else {
|
|
// If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
|
|
// functions produce 8-bit output and the error is calculated in 8-bit
|
|
// domain
|
|
if (pri_strength == 0 && sec_strength == 0) {
|
|
int num_error_calc_filt_units = 1;
|
|
for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
|
|
const uint8_t by = dlist[bi].by;
|
|
const uint8_t bx = dlist[bi].bx;
|
|
const int16_t by_pos = (by << bh_log2);
|
|
const int16_t bx_pos = (bx << bw_log2);
|
|
// Calculate the offset in the buffer based on block position
|
|
const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
|
|
const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
|
|
num_error_calc_filt_units = get_error_calc_width_in_filt_units(
|
|
dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
|
|
curr_sse += aom_sse(
|
|
&ref_buffer[buf_offset], ref_stride,
|
|
get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
|
|
num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
|
|
}
|
|
} else {
|
|
DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
|
|
av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
|
|
cdef_search_ctx->xdec[pli],
|
|
cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
|
|
dlist, cdef_count, pri_strength,
|
|
sec_strength + (sec_strength == 3),
|
|
cdef_search_ctx->damping, coeff_shift);
|
|
int num_error_calc_filt_units = 1;
|
|
for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
|
|
const uint8_t by = dlist[bi].by;
|
|
const uint8_t bx = dlist[bi].bx;
|
|
const int16_t by_pos = (by << bh_log2);
|
|
const int16_t bx_pos = (bx << bw_log2);
|
|
// Calculate the offset in the buffer based on block position
|
|
const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
|
|
const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
|
|
const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
|
|
const int tmp_buf_offset =
|
|
get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
|
|
num_error_calc_filt_units = get_error_calc_width_in_filt_units(
|
|
dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
|
|
curr_sse += aom_sse(
|
|
&ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
|
|
(1 << MAX_SB_SIZE_LOG2),
|
|
num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
|
|
|
|
av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
|
|
cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
|
|
dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
|
|
sec_strength + (sec_strength == 3),
|
|
cdef_search_ctx->damping, coeff_shift);
|
|
curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
|
|
ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
|
|
cdef_search_ctx->bsize[pli], coeff_shift, row, col);
|
|
}
|
|
return curr_sse;
|
|
}
|
|
|
|
// Calculates MSE at block level.
|
|
// Inputs:
|
|
// cdef_search_ctx: Pointer to the structure containing parameters related to
|
|
// CDEF search context.
|
|
// fbr: Row index in units of 64x64 block
|
|
// fbc: Column index in units of 64x64 block
|
|
// Returns:
|
|
// Nothing will be returned. Contents of cdef_search_ctx will be modified.
|
|
void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
|
|
struct aom_internal_error_info *error_info,
|
|
int fbr, int fbc, int sb_count) {
|
|
// TODO(aomedia:3276): Pass error_info to the low-level functions as required
|
|
// in future to handle error propagation.
|
|
(void)error_info;
|
|
const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
|
|
const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
|
|
const int coeff_shift = cdef_search_ctx->coeff_shift;
|
|
const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
|
|
const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
|
|
|
|
// Declare and initialize the temporary buffers.
|
|
DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
|
|
cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
|
|
int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
|
|
int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
|
|
uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
|
|
int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
|
|
int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
|
|
int hb_step = 1, vb_step = 1;
|
|
BLOCK_SIZE bs;
|
|
|
|
const MB_MODE_INFO *const mbmi =
|
|
mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
|
|
MI_SIZE_64X64 * fbc];
|
|
|
|
uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
|
|
ref->v_buffer };
|
|
int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
|
|
ref->uv_stride };
|
|
|
|
if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
|
|
mbmi->bsize == BLOCK_64X128) {
|
|
bs = mbmi->bsize;
|
|
if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
|
|
nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
|
|
hb_step = 2;
|
|
}
|
|
if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
|
|
nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
|
|
vb_step = 2;
|
|
}
|
|
} else {
|
|
bs = BLOCK_64X64;
|
|
}
|
|
// Get number of 8x8 blocks which are not skip. Cdef processing happens for
|
|
// 8x8 blocks which are not skip.
|
|
const int cdef_count = av1_cdef_compute_sb_list(
|
|
mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
|
|
const bool is_fb_on_frm_left_boundary = (fbc == 0);
|
|
const bool is_fb_on_frm_right_boundary =
|
|
(fbc + hb_step == cdef_search_ctx->nhfb);
|
|
const bool is_fb_on_frm_top_boundary = (fbr == 0);
|
|
const bool is_fb_on_frm_bottom_boundary =
|
|
(fbr + vb_step == cdef_search_ctx->nvfb);
|
|
const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
|
|
const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
|
|
int dirinit = 0;
|
|
for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
|
|
/* We avoid filtering the pixels for which some of the pixels to
|
|
average are outside the frame. We could change the filter instead,
|
|
but it would add special cases for any future vectorization. */
|
|
const int hfilt_size = (nhb << mi_wide_l2[pli]);
|
|
const int vfilt_size = (nvb << mi_high_l2[pli]);
|
|
const int ysize =
|
|
vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
|
|
const int xsize =
|
|
hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
|
|
const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
|
|
const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
|
|
struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
|
|
cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
|
|
pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
|
|
ysize, xsize);
|
|
fill_borders_for_fbs_on_frame_boundary(
|
|
inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
|
|
is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
|
|
is_fb_on_frm_bottom_boundary);
|
|
for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
|
|
int pri_strength, sec_strength;
|
|
get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
|
|
&sec_strength, gi);
|
|
const uint64_t curr_mse = get_filt_error(
|
|
cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
|
|
ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
|
|
pli, coeff_shift, bs);
|
|
if (pli < 2)
|
|
cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
|
|
else
|
|
cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
|
|
}
|
|
}
|
|
cdef_search_ctx->sb_index[sb_count] =
|
|
MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
|
|
}
|
|
|
|
// MSE calculation at frame level.
|
|
// Inputs:
|
|
// cdef_search_ctx: Pointer to the structure containing parameters related to
|
|
// CDEF search context.
|
|
// Returns:
|
|
// Nothing will be returned. Contents of cdef_search_ctx will be modified.
|
|
static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
|
|
struct aom_internal_error_info *error_info) {
|
|
// Loop over each sb.
|
|
for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
|
|
for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
|
|
// Checks if cdef processing can be skipped for particular sb.
|
|
if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
|
|
// Calculate mse for each sb and store the relevant sb index.
|
|
av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
|
|
cdef_search_ctx->sb_count);
|
|
cdef_search_ctx->sb_count++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Allocates memory for members of CdefSearchCtx.
|
|
// Inputs:
|
|
// cdef_search_ctx: Pointer to the structure containing parameters
|
|
// related to CDEF search context.
|
|
// Returns:
|
|
// Nothing will be returned. Contents of cdef_search_ctx will be modified.
|
|
static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) {
|
|
const int nvfb = cdef_search_ctx->nvfb;
|
|
const int nhfb = cdef_search_ctx->nhfb;
|
|
CHECK_MEM_ERROR(
|
|
cm, cdef_search_ctx->sb_index,
|
|
aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0])));
|
|
cdef_search_ctx->sb_count = 0;
|
|
CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0],
|
|
aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
|
|
CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1],
|
|
aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
|
|
}
|
|
|
|
// Deallocates the memory allocated for members of CdefSearchCtx.
|
|
// Inputs:
|
|
// cdef_search_ctx: Pointer to the structure containing parameters
|
|
// related to CDEF search context.
|
|
// Returns:
|
|
// Nothing will be returned.
|
|
void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
|
|
if (cdef_search_ctx) {
|
|
aom_free(cdef_search_ctx->mse[0]);
|
|
cdef_search_ctx->mse[0] = NULL;
|
|
aom_free(cdef_search_ctx->mse[1]);
|
|
cdef_search_ctx->mse[1] = NULL;
|
|
aom_free(cdef_search_ctx->sb_index);
|
|
cdef_search_ctx->sb_index = NULL;
|
|
}
|
|
}
|
|
|
|
// Initialize the parameters related to CDEF search context.
|
|
// Inputs:
|
|
// frame: Pointer to compressed frame buffer
|
|
// ref: Pointer to the frame buffer holding the source frame
|
|
// cm: Pointer to top level common structure
|
|
// xd: Pointer to common current coding block structure
|
|
// cdef_search_ctx: Pointer to the structure containing parameters related to
|
|
// CDEF search context.
|
|
// pick_method: Search method used to select CDEF parameters
|
|
// Returns:
|
|
// Nothing will be returned. Contents of cdef_search_ctx will be modified.
|
|
static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
|
|
const YV12_BUFFER_CONFIG *ref,
|
|
AV1_COMMON *cm, MACROBLOCKD *xd,
|
|
CdefSearchCtx *cdef_search_ctx,
|
|
CDEF_PICK_METHOD pick_method) {
|
|
const CommonModeInfoParams *const mi_params = &cm->mi_params;
|
|
const int num_planes = av1_num_planes(cm);
|
|
cdef_search_ctx->mi_params = &cm->mi_params;
|
|
cdef_search_ctx->ref = ref;
|
|
cdef_search_ctx->nvfb =
|
|
(mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
|
cdef_search_ctx->nhfb =
|
|
(mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
|
cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
|
|
cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
|
|
cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
|
|
cdef_search_ctx->num_planes = num_planes;
|
|
cdef_search_ctx->pick_method = pick_method;
|
|
cdef_search_ctx->sb_count = 0;
|
|
cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
|
|
av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
|
|
num_planes);
|
|
// Initialize plane wise information.
|
|
for (int pli = 0; pli < num_planes; pli++) {
|
|
cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
|
|
cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
|
|
cdef_search_ctx->bsize[pli] =
|
|
cdef_search_ctx->ydec[pli]
|
|
? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
|
|
: (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
|
|
cdef_search_ctx->mi_wide_l2[pli] =
|
|
MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
|
|
cdef_search_ctx->mi_high_l2[pli] =
|
|
MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
|
|
cdef_search_ctx->plane[pli] = xd->plane[pli];
|
|
}
|
|
// Function pointer initialization.
|
|
#if CONFIG_AV1_HIGHBITDEPTH
|
|
if (cm->seq_params->use_highbitdepth) {
|
|
cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
|
|
cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
|
|
} else {
|
|
cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
|
|
cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
|
|
}
|
|
#else
|
|
cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
|
|
cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
|
|
#endif
|
|
}
|
|
|
|
void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
|
|
int is_screen_content) {
|
|
const int bd = cm->seq_params->bit_depth;
|
|
const int q =
|
|
av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
|
|
CdefInfo *const cdef_info = &cm->cdef_info;
|
|
// Check the speed feature to avoid extra signaling.
|
|
if (skip_cdef) {
|
|
cdef_info->cdef_bits = 1;
|
|
cdef_info->nb_cdef_strengths = 2;
|
|
} else {
|
|
cdef_info->cdef_bits = 0;
|
|
cdef_info->nb_cdef_strengths = 1;
|
|
}
|
|
cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
|
|
|
|
int predicted_y_f1 = 0;
|
|
int predicted_y_f2 = 0;
|
|
int predicted_uv_f1 = 0;
|
|
int predicted_uv_f2 = 0;
|
|
if (is_screen_content) {
|
|
predicted_y_f1 =
|
|
(int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
|
|
predicted_y_f2 =
|
|
(int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
|
|
predicted_uv_f1 =
|
|
(int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
|
|
predicted_uv_f2 =
|
|
(int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
|
|
predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
|
|
predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
|
|
predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
|
|
predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
|
|
} else {
|
|
if (!frame_is_intra_only(cm)) {
|
|
predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
|
|
q * 0.0068615186f + 0.02709886f),
|
|
0, 15);
|
|
predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
|
|
q * 0.0013993345f + 0.03831067f),
|
|
0, 3);
|
|
predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
|
|
q * 0.0034628846f + 0.00887099f),
|
|
0, 15);
|
|
predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
|
|
q * 0.00028223585f + 0.05576307f),
|
|
0, 3);
|
|
} else {
|
|
predicted_y_f1 = clamp(
|
|
(int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
|
|
0, 15);
|
|
predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
|
|
q * 0.0027798624f + 0.0079405f),
|
|
0, 3);
|
|
predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
|
|
q * 0.012892405f - 0.00748388f),
|
|
0, 15);
|
|
predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
|
|
q * 0.00035520183f + 0.00228092f),
|
|
0, 3);
|
|
}
|
|
}
|
|
cdef_info->cdef_strengths[0] =
|
|
predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
|
|
cdef_info->cdef_uv_strengths[0] =
|
|
predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
|
|
|
|
// mbmi->cdef_strength is already set in the encoding stage. We don't need to
|
|
// set it again here.
|
|
if (skip_cdef) {
|
|
cdef_info->cdef_strengths[1] = 0;
|
|
cdef_info->cdef_uv_strengths[1] = 0;
|
|
return;
|
|
}
|
|
|
|
const CommonModeInfoParams *const mi_params = &cm->mi_params;
|
|
const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
|
const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
|
MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
|
|
// mbmi is NULL when real-time rate control library is used.
|
|
if (!mbmi) return;
|
|
for (int r = 0; r < nvfb; ++r) {
|
|
for (int c = 0; c < nhfb; ++c) {
|
|
MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
|
|
current_mbmi->cdef_strength = 0;
|
|
}
|
|
mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
|
|
}
|
|
}
|
|
|
|
void av1_cdef_search(AV1_COMP *cpi) {
|
|
AV1_COMMON *cm = &cpi->common;
|
|
CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
|
|
|
|
assert(cdef_control != CDEF_NONE);
|
|
// For CDEF_ADAPTIVE, turning off CDEF around qindex 32 was best for still
|
|
// pictures
|
|
if ((cdef_control == CDEF_REFERENCE &&
|
|
cpi->ppi->rtc_ref.non_reference_frame) ||
|
|
(cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
|
|
(cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
|
|
cpi->oxcf.rc_cfg.cq_level <= 32)) {
|
|
CdefInfo *const cdef_info = &cm->cdef_info;
|
|
cdef_info->nb_cdef_strengths = 1;
|
|
cdef_info->cdef_bits = 0;
|
|
cdef_info->cdef_strengths[0] = 0;
|
|
cdef_info->cdef_uv_strengths[0] = 0;
|
|
return;
|
|
}
|
|
|
|
// Indicate if external RC is used for testing
|
|
const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl;
|
|
if (rtc_ext_rc) {
|
|
av1_pick_cdef_from_qp(cm, 0, 0);
|
|
return;
|
|
}
|
|
CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method;
|
|
if (pick_method == CDEF_PICK_FROM_Q) {
|
|
const int use_screen_content_model =
|
|
cm->quant_params.base_qindex >
|
|
AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
|
|
cpi->rc.best_quality + 5) &&
|
|
cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
|
|
av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb,
|
|
use_screen_content_model);
|
|
return;
|
|
}
|
|
const CommonModeInfoParams *const mi_params = &cm->mi_params;
|
|
const int damping = 3 + (cm->quant_params.base_qindex >> 6);
|
|
const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
|
|
pick_method <= CDEF_FAST_SEARCH_LVL5);
|
|
const int num_planes = av1_num_planes(cm);
|
|
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
|
|
|
|
if (!cpi->cdef_search_ctx)
|
|
CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx,
|
|
aom_malloc(sizeof(*cpi->cdef_search_ctx)));
|
|
CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx;
|
|
|
|
// Initialize parameters related to CDEF search context.
|
|
cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx,
|
|
pick_method);
|
|
// Allocate CDEF search context buffers.
|
|
cdef_alloc_data(cm, cdef_search_ctx);
|
|
// Frame level mse calculation.
|
|
if (cpi->mt_info.num_workers > 1) {
|
|
av1_cdef_mse_calc_frame_mt(cpi);
|
|
} else {
|
|
cdef_mse_calc_frame(cdef_search_ctx, cm->error);
|
|
}
|
|
|
|
/* Search for different number of signaling bits. */
|
|
int nb_strength_bits = 0;
|
|
uint64_t best_rd = UINT64_MAX;
|
|
CdefInfo *const cdef_info = &cm->cdef_info;
|
|
int sb_count = cdef_search_ctx->sb_count;
|
|
uint64_t(*mse[2])[TOTAL_STRENGTHS];
|
|
mse[0] = cdef_search_ctx->mse[0];
|
|
mse[1] = cdef_search_ctx->mse[1];
|
|
/* Calculate the maximum number of bits required to signal CDEF strengths at
|
|
* block level */
|
|
const int total_strengths = nb_cdef_strengths[pick_method];
|
|
const int joint_strengths =
|
|
num_planes > 1 ? total_strengths * total_strengths : total_strengths;
|
|
const int max_signaling_bits =
|
|
joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
|
|
int rdmult = cpi->td.mb.rdmult;
|
|
for (int i = 0; i <= 3; i++) {
|
|
if (i > max_signaling_bits) break;
|
|
int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
|
|
int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
|
|
const int nb_strengths = 1 << i;
|
|
uint64_t tot_mse;
|
|
if (num_planes > 1) {
|
|
tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
|
|
mse, sb_count, pick_method);
|
|
} else {
|
|
tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
|
|
pick_method);
|
|
}
|
|
|
|
const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
|
|
(num_planes > 1 ? 2 : 1);
|
|
const int rate_cost = av1_cost_literal(total_bits);
|
|
const uint64_t dist = tot_mse * 16;
|
|
const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
|
|
if (rd < best_rd) {
|
|
best_rd = rd;
|
|
nb_strength_bits = i;
|
|
memcpy(cdef_info->cdef_strengths, best_lev0,
|
|
nb_strengths * sizeof(best_lev0[0]));
|
|
if (num_planes > 1) {
|
|
memcpy(cdef_info->cdef_uv_strengths, best_lev1,
|
|
nb_strengths * sizeof(best_lev1[0]));
|
|
}
|
|
}
|
|
}
|
|
|
|
cdef_info->cdef_bits = nb_strength_bits;
|
|
cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
|
|
for (int i = 0; i < sb_count; i++) {
|
|
uint64_t best_mse = UINT64_MAX;
|
|
int best_gi = 0;
|
|
for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
|
|
uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
|
|
if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
|
|
if (curr < best_mse) {
|
|
best_gi = gi;
|
|
best_mse = curr;
|
|
}
|
|
}
|
|
mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
|
|
best_gi;
|
|
}
|
|
if (fast) {
|
|
for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
|
|
const int luma_strength = cdef_info->cdef_strengths[j];
|
|
const int chroma_strength = cdef_info->cdef_uv_strengths[j];
|
|
int pri_strength, sec_strength;
|
|
|
|
STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
|
|
luma_strength);
|
|
STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
|
|
chroma_strength);
|
|
}
|
|
}
|
|
|
|
// For CDEF_ADAPTIVE, set primary and secondary CDEF at reduced strength for
|
|
// qindexes 33 through 220.
|
|
// Note 1: for odd strengths, the 0.5 discarded by ">> 1" is a significant
|
|
// part of the strength when the strength is small, and because there are
|
|
// few strength levels, odd strengths are reduced significantly more than a
|
|
// half. This is intended behavior for reduced strength.
|
|
// For example: a pri strength of 3 becomes 1, and a sec strength of 1
|
|
// becomes 0.
|
|
// Note 2: a (signaled) sec strength value of 3 is special as it results in an
|
|
// actual sec strength of 4. We tried adding +1 to the sec strength 3 so it
|
|
// maps to a reduced sec strength of 2. However, on Daala's subset1, the
|
|
// resulting SSIMULACRA 2 scores were either exactly the same (at cpu-used 6),
|
|
// or within noise level (at cpu-used 3). Given that there were no discernible
|
|
// improvements, this special mapping was left out for reduced strength.
|
|
if (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
|
|
(cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
|
|
cpi->oxcf.rc_cfg.cq_level <= 220) {
|
|
for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
|
|
const int luma_strength = cdef_info->cdef_strengths[j];
|
|
const int chroma_strength = cdef_info->cdef_uv_strengths[j];
|
|
|
|
const int new_pri_luma_strength =
|
|
(luma_strength / CDEF_SEC_STRENGTHS) >> 1;
|
|
const int new_sec_luma_strength =
|
|
(luma_strength % CDEF_SEC_STRENGTHS) >> 1;
|
|
const int new_pri_chroma_strength =
|
|
(chroma_strength / CDEF_SEC_STRENGTHS) >> 1;
|
|
const int new_sec_chroma_strength =
|
|
(chroma_strength % CDEF_SEC_STRENGTHS) >> 1;
|
|
|
|
cdef_info->cdef_strengths[j] =
|
|
new_pri_luma_strength * CDEF_SEC_STRENGTHS + new_sec_luma_strength;
|
|
cdef_info->cdef_uv_strengths[j] =
|
|
new_pri_chroma_strength * CDEF_SEC_STRENGTHS +
|
|
new_sec_chroma_strength;
|
|
}
|
|
}
|
|
|
|
cdef_info->cdef_damping = damping;
|
|
// Deallocate CDEF search context buffers.
|
|
av1_cdef_dealloc_data(cdef_search_ctx);
|
|
}
|