213 lines
7.1 KiB
C++
213 lines
7.1 KiB
C++
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
|
//
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
#if defined(LIB_JPEGLI_ENTROPY_CODING_INL_H_) == defined(HWY_TARGET_TOGGLE)
|
|
#ifdef LIB_JPEGLI_ENTROPY_CODING_INL_H_
|
|
#undef LIB_JPEGLI_ENTROPY_CODING_INL_H_
|
|
#else
|
|
#define LIB_JPEGLI_ENTROPY_CODING_INL_H_
|
|
#endif
|
|
|
|
#include "lib/jxl/base/compiler_specific.h"
|
|
|
|
HWY_BEFORE_NAMESPACE();
|
|
namespace jpegli {
|
|
namespace HWY_NAMESPACE {
|
|
namespace {
|
|
|
|
// These templates are not found via ADL.
|
|
using hwy::HWY_NAMESPACE::Abs;
|
|
using hwy::HWY_NAMESPACE::Add;
|
|
using hwy::HWY_NAMESPACE::And;
|
|
using hwy::HWY_NAMESPACE::AndNot;
|
|
using hwy::HWY_NAMESPACE::Compress;
|
|
using hwy::HWY_NAMESPACE::CountTrue;
|
|
using hwy::HWY_NAMESPACE::Eq;
|
|
using hwy::HWY_NAMESPACE::GetLane;
|
|
using hwy::HWY_NAMESPACE::MaskFromVec;
|
|
using hwy::HWY_NAMESPACE::Max;
|
|
using hwy::HWY_NAMESPACE::Not;
|
|
using hwy::HWY_NAMESPACE::Or;
|
|
using hwy::HWY_NAMESPACE::ShiftRight;
|
|
using hwy::HWY_NAMESPACE::Shl;
|
|
using hwy::HWY_NAMESPACE::Sub;
|
|
|
|
using DI = HWY_FULL(int32_t);
|
|
constexpr DI di;
|
|
|
|
template <typename DI, class V>
|
|
JXL_INLINE V NumBits(DI di, const V x) {
|
|
// TODO(szabadka) Add faster implementations for some specific architectures.
|
|
const auto b1 = And(x, Set(di, 1));
|
|
const auto b2 = And(x, Set(di, 2));
|
|
const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1));
|
|
const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4));
|
|
const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11));
|
|
const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26));
|
|
const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57));
|
|
const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120));
|
|
const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247));
|
|
const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502));
|
|
const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013));
|
|
const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036));
|
|
return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))),
|
|
Max(Max(b9, b10), Max(b11, b12)));
|
|
}
|
|
|
|
// Coefficient indexes pre-multiplied by 16 for the symbol calculation.
|
|
HWY_ALIGN constexpr int32_t kIndexes[64] = {
|
|
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
|
|
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
|
|
416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
|
|
624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816,
|
|
832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008,
|
|
};
|
|
|
|
JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block,
|
|
int32_t* JXL_RESTRICT nonzero_idx) {
|
|
const auto zero = Zero(di);
|
|
HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1};
|
|
const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes));
|
|
int num_nonzeros = 0;
|
|
int k = 0;
|
|
{
|
|
const auto coef = Load(di, block);
|
|
const auto idx = Load(di, kIndexes);
|
|
const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero)));
|
|
const auto nzero_coef = Compress(coef, nonzero_mask);
|
|
const auto nzero_idx = Compress(idx, nonzero_mask);
|
|
StoreU(nzero_coef, di, &block[num_nonzeros]);
|
|
StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
|
|
num_nonzeros += CountTrue(di, nonzero_mask);
|
|
k += Lanes(di);
|
|
}
|
|
for (; k < DCTSIZE2; k += Lanes(di)) {
|
|
const auto coef = Load(di, &block[k]);
|
|
const auto idx = Load(di, &kIndexes[k]);
|
|
const auto nonzero_mask = Not(Eq(coef, zero));
|
|
const auto nzero_coef = Compress(coef, nonzero_mask);
|
|
const auto nzero_idx = Compress(idx, nonzero_mask);
|
|
StoreU(nzero_coef, di, &block[num_nonzeros]);
|
|
StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
|
|
num_nonzeros += CountTrue(di, nonzero_mask);
|
|
}
|
|
return num_nonzeros;
|
|
}
|
|
|
|
JXL_INLINE void ComputeSymbols(const int num_nonzeros,
|
|
int32_t* JXL_RESTRICT nonzero_idx,
|
|
int32_t* JXL_RESTRICT block,
|
|
int32_t* JXL_RESTRICT symbols) {
|
|
nonzero_idx[-1] = -16;
|
|
const auto one = Set(di, 1);
|
|
const auto offset = Set(di, 16);
|
|
for (int i = 0; i < num_nonzeros; i += Lanes(di)) {
|
|
const auto idx = Load(di, &nonzero_idx[i]);
|
|
const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]);
|
|
const auto coeff = Load(di, &block[i]);
|
|
const auto nbits = NumBits(di, Abs(coeff));
|
|
const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff);
|
|
const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one));
|
|
const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset));
|
|
Store(symbol, di, symbols + i);
|
|
Store(bits, di, block + i);
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
int NumNonZero8x8ExceptDC(const T* block) {
|
|
const HWY_CAPPED(T, 8) di;
|
|
|
|
const auto zero = Zero(di);
|
|
// Add FFFF for every zero coefficient, negate to get #zeros.
|
|
auto neg_sum_zero = zero;
|
|
{
|
|
// First row has DC, so mask
|
|
const size_t y = 0;
|
|
HWY_ALIGN const T dc_mask_lanes[8] = {-1};
|
|
|
|
for (size_t x = 0; x < 8; x += Lanes(di)) {
|
|
const auto dc_mask = Load(di, dc_mask_lanes + x);
|
|
|
|
// DC counts as zero so we don't include it in nzeros.
|
|
const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x]));
|
|
|
|
neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
|
|
}
|
|
}
|
|
// Remaining rows: no mask
|
|
for (size_t y = 1; y < 8; y++) {
|
|
for (size_t x = 0; x < 8; x += Lanes(di)) {
|
|
const auto coef = Load(di, &block[y * 8 + x]);
|
|
neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
|
|
}
|
|
}
|
|
|
|
// We want 64 - sum_zero, add because neg_sum_zero is already negated.
|
|
return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero));
|
|
}
|
|
|
|
template <typename T, bool zig_zag_order>
|
|
void ComputeTokensForBlock(const T* block, int last_dc, int dc_ctx, int ac_ctx,
|
|
Token** tokens_ptr) {
|
|
Token* next_token = *tokens_ptr;
|
|
coeff_t temp2;
|
|
coeff_t temp;
|
|
temp = block[0] - last_dc;
|
|
if (temp == 0) {
|
|
*next_token++ = Token(dc_ctx, 0, 0);
|
|
} else {
|
|
temp2 = temp;
|
|
if (temp < 0) {
|
|
temp = -temp;
|
|
temp2--;
|
|
}
|
|
int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
|
|
int dc_mask = (1 << dc_nbits) - 1;
|
|
*next_token++ = Token(dc_ctx, dc_nbits, temp2 & dc_mask);
|
|
}
|
|
int num_nonzeros = NumNonZero8x8ExceptDC(block);
|
|
for (int k = 1; k < 64; ++k) {
|
|
if (num_nonzeros == 0) {
|
|
*next_token++ = Token(ac_ctx, 0, 0);
|
|
break;
|
|
}
|
|
int r = 0;
|
|
if (zig_zag_order) {
|
|
while ((temp = block[k]) == 0) {
|
|
r++;
|
|
k++;
|
|
}
|
|
} else {
|
|
while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
|
|
r++;
|
|
k++;
|
|
}
|
|
}
|
|
--num_nonzeros;
|
|
if (temp < 0) {
|
|
temp = -temp;
|
|
temp2 = ~temp;
|
|
} else {
|
|
temp2 = temp;
|
|
}
|
|
while (r > 15) {
|
|
*next_token++ = Token(ac_ctx, 0xf0, 0);
|
|
r -= 16;
|
|
}
|
|
int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
|
|
int ac_mask = (1 << ac_nbits) - 1;
|
|
int symbol = (r << 4u) + ac_nbits;
|
|
*next_token++ = Token(ac_ctx, symbol, temp2 & ac_mask);
|
|
}
|
|
*tokens_ptr = next_token;
|
|
}
|
|
|
|
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
} // namespace
|
|
} // namespace HWY_NAMESPACE
|
|
} // namespace jpegli
|
|
HWY_AFTER_NAMESPACE();
|
|
#endif // LIB_JPEGLI_ENTROPY_CODING_INL_H_
|