209 lines
8.1 KiB
C++
209 lines
8.1 KiB
C++
// Copyright 2019 Google LLC
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <stddef.h>
|
|
|
|
#undef HWY_TARGET_INCLUDE
|
|
#define HWY_TARGET_INCLUDE "tests/table_test.cc"
|
|
#include "hwy/foreach_target.h" // IWYU pragma: keep
|
|
#include "hwy/highway.h"
|
|
#include "hwy/tests/test_util-inl.h"
|
|
|
|
HWY_BEFORE_NAMESPACE();
|
|
namespace hwy {
|
|
namespace HWY_NAMESPACE {
|
|
|
|
struct TestTableLookupLanes {
|
|
template <class T, class D>
|
|
HWY_NOINLINE void operator()(T /*unused*/, D d) {
|
|
const RebindToSigned<D> di;
|
|
using TI = TFromD<decltype(di)>;
|
|
#if HWY_TARGET != HWY_SCALAR
|
|
const size_t N = Lanes(d);
|
|
auto idx = AllocateAligned<TI>(N);
|
|
auto expected = AllocateAligned<T>(N);
|
|
HWY_ASSERT(idx && expected);
|
|
ZeroBytes(idx.get(), N * sizeof(TI));
|
|
const auto v = Iota(d, 1);
|
|
|
|
if (N <= 8) { // Test all permutations
|
|
for (size_t i0 = 0; i0 < N; ++i0) {
|
|
idx[0] = static_cast<TI>(i0);
|
|
|
|
for (size_t i1 = 0; i1 < N; ++i1) {
|
|
if (N >= 2) idx[1] = static_cast<TI>(i1);
|
|
for (size_t i2 = 0; i2 < N; ++i2) {
|
|
if (N >= 4) idx[2] = static_cast<TI>(i2);
|
|
for (size_t i3 = 0; i3 < N; ++i3) {
|
|
if (N >= 4) idx[3] = static_cast<TI>(i3);
|
|
|
|
for (size_t i = 0; i < N; ++i) {
|
|
expected[i] = ConvertScalarTo<T>(idx[i] + 1); // == v[idx[i]]
|
|
}
|
|
|
|
const auto opaque1 = IndicesFromVec(d, Load(di, idx.get()));
|
|
const auto actual1 = TableLookupLanes(v, opaque1);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1);
|
|
|
|
const auto opaque2 = SetTableIndices(d, idx.get());
|
|
const auto actual2 = TableLookupLanes(v, opaque2);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Too many permutations to test exhaustively; choose one with repeated
|
|
// and cross-block indices and ensure indices do not exceed #lanes.
|
|
// For larger vectors, upper lanes will be zero.
|
|
HWY_ALIGN TI idx_source[16] = {1, 3, 2, 2, 8, 1, 7, 6,
|
|
15, 14, 14, 15, 4, 9, 8, 5};
|
|
for (size_t i = 0; i < N; ++i) {
|
|
idx[i] = (i < 16) ? idx_source[i] : 0;
|
|
// Avoid undefined results / asan error for scalar by capping indices.
|
|
if (idx[i] >= static_cast<TI>(N)) {
|
|
idx[i] = static_cast<TI>(N - 1);
|
|
}
|
|
expected[i] = ConvertScalarTo<T>(idx[i] + 1); // == v[idx[i]]
|
|
}
|
|
|
|
const auto opaque1 = IndicesFromVec(d, Load(di, idx.get()));
|
|
const auto actual1 = TableLookupLanes(v, opaque1);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1);
|
|
|
|
const auto opaque2 = SetTableIndices(d, idx.get());
|
|
const auto actual2 = TableLookupLanes(v, opaque2);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2);
|
|
}
|
|
#else
|
|
const TI index = 0;
|
|
const auto v = Set(d, 1);
|
|
const auto opaque1 = SetTableIndices(d, &index);
|
|
HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque1));
|
|
const auto opaque2 = IndicesFromVec(d, Zero(di));
|
|
HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque2));
|
|
#endif
|
|
}
|
|
};
|
|
|
|
HWY_NOINLINE void TestAllTableLookupLanes() {
|
|
ForAllTypes(ForPartialVectors<TestTableLookupLanes>());
|
|
}
|
|
|
|
struct TestTwoTablesLookupLanes {
|
|
template <class T, class D>
|
|
HWY_NOINLINE void operator()(T /*unused*/, D d) {
|
|
const RebindToUnsigned<D> du;
|
|
using TU = TFromD<decltype(du)>;
|
|
|
|
const size_t N = Lanes(d);
|
|
const size_t twiceN = N * 2;
|
|
auto idx = AllocateAligned<TU>(twiceN);
|
|
auto expected = AllocateAligned<T>(twiceN);
|
|
HWY_ASSERT(idx && expected);
|
|
ZeroBytes(idx.get(), twiceN * sizeof(TU));
|
|
const auto a = Iota(d, 1);
|
|
const auto b = Add(a, Set(d, ConvertScalarTo<T>(N)));
|
|
|
|
if (twiceN <= 8) { // Test all permutations
|
|
for (size_t i0 = 0; i0 < twiceN; ++i0) {
|
|
idx[0] = static_cast<TU>(i0);
|
|
|
|
for (size_t i1 = 0; i1 < twiceN; ++i1) {
|
|
if (twiceN >= 2) idx[1] = static_cast<TU>(i1);
|
|
for (size_t i2 = 0; i2 < twiceN; ++i2) {
|
|
if (twiceN >= 4) idx[2] = static_cast<TU>(i2);
|
|
for (size_t i3 = 0; i3 < twiceN; ++i3) {
|
|
if (twiceN >= 4) idx[3] = static_cast<TU>(i3);
|
|
|
|
for (size_t i = 0; i < twiceN; ++i) {
|
|
expected[i] = ConvertScalarTo<T>(idx[i] + 1); // == v[idx[i]]
|
|
}
|
|
|
|
const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get()));
|
|
const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N));
|
|
const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a);
|
|
const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b);
|
|
|
|
const auto opaque2_a = SetTableIndices(d, idx.get());
|
|
const auto opaque2_b = SetTableIndices(d, idx.get() + N);
|
|
const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a);
|
|
const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
constexpr size_t kLanesPerBlock = 16 / sizeof(T);
|
|
constexpr size_t kMaxBlockIdx = static_cast<size_t>(LimitsMax<TU>()) >> 1;
|
|
static_assert(kMaxBlockIdx > 0, "kMaxBlockIdx > 0 must be true");
|
|
|
|
const size_t num_of_blocks_per_vect = HWY_MAX(N / kLanesPerBlock, 1);
|
|
const size_t num_of_blocks_to_check =
|
|
HWY_MIN(num_of_blocks_per_vect * 2, kMaxBlockIdx);
|
|
|
|
for (size_t i = 0; i < num_of_blocks_to_check; i++) {
|
|
// Too many permutations to test exhaustively; choose one with repeated
|
|
// and cross-block indices and ensure indices do not exceed #lanes.
|
|
// For larger vectors, upper lanes will be zero.
|
|
HWY_ALIGN TU idx_source[16] = {1, 3, 2, 2, 8, 1, 7, 6,
|
|
15, 14, 14, 15, 4, 9, 8, 5};
|
|
for (size_t j = 0; j < twiceN; ++j) {
|
|
idx[j] = static_cast<TU>((i * kLanesPerBlock + idx_source[j & 15] +
|
|
(j & static_cast<size_t>(-16))) &
|
|
(twiceN - 1));
|
|
expected[j] = ConvertScalarTo<T>(idx[j] + 1); // == v[idx[j]]
|
|
}
|
|
|
|
const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get()));
|
|
const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N));
|
|
const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a);
|
|
const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b);
|
|
|
|
const auto opaque2_a = SetTableIndices(d, idx.get());
|
|
const auto opaque2_b = SetTableIndices(d, idx.get() + N);
|
|
const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a);
|
|
const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a);
|
|
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
HWY_NOINLINE void TestAllTwoTablesLookupLanes() {
|
|
ForAllTypes(ForPartialVectors<TestTwoTablesLookupLanes>());
|
|
}
|
|
|
|
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
} // namespace HWY_NAMESPACE
|
|
} // namespace hwy
|
|
HWY_AFTER_NAMESPACE();
|
|
|
|
#if HWY_ONCE
|
|
|
|
namespace hwy {
|
|
HWY_BEFORE_TEST(HwyTableTest);
|
|
HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanes);
|
|
HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanes);
|
|
} // namespace hwy
|
|
|
|
#endif
|