65 lines
2.5 KiB
C++
65 lines
2.5 KiB
C++
// Copyright 2020 Google LLC
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Demo of functions that might be called from multiple SIMD modules (either
|
|
// other -inl.h files, or a .cc file between begin/end_target-inl). This is
|
|
// optional - all SIMD code can reside in .cc files. However, this allows
|
|
// splitting code into different files while still inlining instead of requiring
|
|
// calling through function pointers.
|
|
|
|
// Per-target include guard. This is only required when using dynamic dispatch,
|
|
// i.e. including foreach_target.h. For static dispatch, a normal include
|
|
// guard would be fine because the header is only compiled once.
|
|
#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == \
|
|
defined(HWY_TARGET_TOGGLE)
|
|
#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
|
|
#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
|
|
#else
|
|
#define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
|
|
#endif
|
|
|
|
// It is fine to #include normal or *-inl headers.
|
|
#include "hwy/highway.h"
|
|
|
|
HWY_BEFORE_NAMESPACE();
|
|
namespace skeleton {
|
|
namespace HWY_NAMESPACE {
|
|
|
|
// Highway ops reside here; ADL does not find templates nor builtins.
|
|
namespace hn = hwy::HWY_NAMESPACE;
|
|
|
|
// Example of a type-agnostic (caller-specified lane type) and width-agnostic
|
|
// (uses best available instruction set) function in a header.
|
|
//
|
|
// Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
|
|
template <class D, typename T>
|
|
HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
|
|
const T* HWY_RESTRICT add_array,
|
|
const size_t size, T* HWY_RESTRICT x_array) {
|
|
for (size_t i = 0; i < size; i += hn::Lanes(d)) {
|
|
const auto mul = hn::Load(d, mul_array + i);
|
|
const auto add = hn::Load(d, add_array + i);
|
|
auto x = hn::Load(d, x_array + i);
|
|
x = hn::MulAdd(mul, x, add);
|
|
hn::Store(x, d, x_array + i);
|
|
}
|
|
}
|
|
|
|
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
} // namespace HWY_NAMESPACE
|
|
} // namespace skeleton
|
|
HWY_AFTER_NAMESPACE();
|
|
|
|
#endif // include guard
|