467 lines
17 KiB
C++
467 lines
17 KiB
C++
// Copyright 2020 Google LLC
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
|
|
#define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
|
|
|
|
// SIMD/multicore-friendly planar image representation with row accessors.
|
|
|
|
#include <string.h>
|
|
|
|
#include <utility> // std::move
|
|
|
|
#include "hwy/aligned_allocator.h"
|
|
#include "hwy/base.h"
|
|
|
|
namespace hwy {
|
|
|
|
// Type-independent parts of Image<> - reduces code duplication and facilitates
|
|
// moving member function implementations to cc file.
|
|
struct HWY_CONTRIB_DLLEXPORT ImageBase {
|
|
// Returns required alignment in bytes for externally allocated memory.
|
|
static size_t VectorSize();
|
|
|
|
// Returns distance [bytes] between the start of two consecutive rows, a
|
|
// multiple of VectorSize but NOT kAlias (see implementation).
|
|
static size_t BytesPerRow(size_t xsize, size_t sizeof_t);
|
|
|
|
// No allocation (for output params or unused images)
|
|
ImageBase()
|
|
: xsize_(0),
|
|
ysize_(0),
|
|
bytes_per_row_(0),
|
|
bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
|
|
|
|
// Allocates memory (this is the common case)
|
|
ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
|
|
|
|
// References but does not take ownership of external memory. Useful for
|
|
// interoperability with other libraries. `aligned` must be aligned to a
|
|
// multiple of VectorSize() and `bytes_per_row` must also be a multiple of
|
|
// VectorSize() or preferably equal to BytesPerRow().
|
|
ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
|
|
|
|
// Copy construction/assignment is forbidden to avoid inadvertent copies,
|
|
// which can be very expensive. Use CopyImageTo() instead.
|
|
ImageBase(const ImageBase& other) = delete;
|
|
ImageBase& operator=(const ImageBase& other) = delete;
|
|
|
|
// Move constructor (required for returning Image from function)
|
|
ImageBase(ImageBase&& other) noexcept = default;
|
|
|
|
// Move assignment (required for std::vector)
|
|
ImageBase& operator=(ImageBase&& other) noexcept = default;
|
|
|
|
void Swap(ImageBase& other);
|
|
|
|
// Useful for pre-allocating image with some padding for alignment purposes
|
|
// and later reporting the actual valid dimensions. Caller is responsible
|
|
// for ensuring xsize/ysize are <= the original dimensions.
|
|
void ShrinkTo(const size_t xsize, const size_t ysize) {
|
|
xsize_ = static_cast<uint32_t>(xsize);
|
|
ysize_ = static_cast<uint32_t>(ysize);
|
|
// NOTE: we can't recompute bytes_per_row for more compact storage and
|
|
// better locality because that would invalidate the image contents.
|
|
}
|
|
|
|
// How many pixels.
|
|
HWY_INLINE size_t xsize() const { return xsize_; }
|
|
HWY_INLINE size_t ysize() const { return ysize_; }
|
|
|
|
// NOTE: do not use this for copying rows - the valid xsize may be much less.
|
|
HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
|
|
|
|
// Raw access to byte contents, for interfacing with other libraries.
|
|
// Unsigned char instead of char to avoid surprises (sign extension).
|
|
HWY_INLINE uint8_t* bytes() {
|
|
void* p = bytes_.get();
|
|
return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
|
|
}
|
|
HWY_INLINE const uint8_t* bytes() const {
|
|
const void* p = bytes_.get();
|
|
return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
|
|
}
|
|
|
|
protected:
|
|
// Returns pointer to the start of a row.
|
|
HWY_INLINE void* VoidRow(const size_t y) const {
|
|
#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
|
|
if (y >= ysize_) {
|
|
HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_);
|
|
}
|
|
#endif
|
|
|
|
void* row = bytes_.get() + y * bytes_per_row_;
|
|
return HWY_ASSUME_ALIGNED(row, 64);
|
|
}
|
|
|
|
enum class Padding {
|
|
// Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
|
|
kRoundUp,
|
|
// Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
|
|
// vector to be initialized. If done by default, this would suppress
|
|
// legitimate msan warnings. We therefore require users to explicitly call
|
|
// InitializePadding before using unaligned loads (e.g. convolution).
|
|
kUnaligned
|
|
};
|
|
|
|
// Initializes the minimum bytes required to suppress msan warnings from
|
|
// legitimate (according to Padding mode) vector loads/stores on the right
|
|
// border, where some lanes are uninitialized and assumed to be unused.
|
|
void InitializePadding(size_t sizeof_t, Padding padding);
|
|
|
|
// (Members are non-const to enable assignment during move-assignment.)
|
|
uint32_t xsize_; // In valid pixels, not including any padding.
|
|
uint32_t ysize_;
|
|
size_t bytes_per_row_; // Includes padding.
|
|
AlignedFreeUniquePtr<uint8_t[]> bytes_;
|
|
};
|
|
|
|
// Single channel, aligned rows separated by padding. T must be POD.
|
|
//
|
|
// 'Single channel' (one 2D array per channel) simplifies vectorization
|
|
// (repeating the same operation on multiple adjacent components) without the
|
|
// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
|
|
// can easily iterate over all components in a row and Image requires no
|
|
// knowledge of the pixel format beyond the component type "T".
|
|
//
|
|
// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
|
|
// false sharing between two threads operating on adjacent rows.
|
|
//
|
|
// 'Padding' is still relevant because vectors could potentially be larger than
|
|
// a cache line. By rounding up row sizes to the vector size, we allow
|
|
// reading/writing ALIGNED vectors whose first lane is a valid sample. This
|
|
// avoids needing a separate loop to handle remaining unaligned lanes.
|
|
//
|
|
// This image layout could also be achieved with a vector and a row accessor
|
|
// function, but a class wrapper with support for "deleter" allows wrapping
|
|
// existing memory allocated by clients without copying the pixels. It also
|
|
// provides convenient accessors for xsize/ysize, which shortens function
|
|
// argument lists. Supports move-construction so it can be stored in containers.
|
|
template <typename ComponentType>
|
|
class Image : public ImageBase {
|
|
public:
|
|
using T = ComponentType;
|
|
|
|
Image() = default;
|
|
Image(const size_t xsize, const size_t ysize)
|
|
: ImageBase(xsize, ysize, sizeof(T)) {}
|
|
Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
|
|
void* aligned)
|
|
: ImageBase(xsize, ysize, bytes_per_row, aligned) {}
|
|
|
|
void InitializePaddingForUnalignedAccesses() {
|
|
InitializePadding(sizeof(T), Padding::kUnaligned);
|
|
}
|
|
|
|
HWY_INLINE const T* ConstRow(const size_t y) const {
|
|
return static_cast<const T*>(VoidRow(y));
|
|
}
|
|
HWY_INLINE const T* ConstRow(const size_t y) {
|
|
return static_cast<const T*>(VoidRow(y));
|
|
}
|
|
|
|
// Returns pointer to non-const. This allows passing const Image* parameters
|
|
// when the callee is only supposed to fill the pixels, as opposed to
|
|
// allocating or resizing the image.
|
|
HWY_INLINE T* MutableRow(const size_t y) const {
|
|
return static_cast<T*>(VoidRow(y));
|
|
}
|
|
HWY_INLINE T* MutableRow(const size_t y) {
|
|
return static_cast<T*>(VoidRow(y));
|
|
}
|
|
|
|
// Returns number of pixels (some of which are padding) per row. Useful for
|
|
// computing other rows via pointer arithmetic. WARNING: this must
|
|
// NOT be used to determine xsize.
|
|
HWY_INLINE intptr_t PixelsPerRow() const {
|
|
return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
|
|
}
|
|
};
|
|
|
|
using ImageF = Image<float>;
|
|
|
|
// A bundle of 3 same-sized images. To fill an existing Image3 using
|
|
// single-channel producers, we also need access to each const Image*. Const
|
|
// prevents breaking the same-size invariant, while still allowing pixels to be
|
|
// changed via MutableRow.
|
|
template <typename ComponentType>
|
|
class Image3 {
|
|
public:
|
|
using T = ComponentType;
|
|
using ImageT = Image<T>;
|
|
static constexpr size_t kNumPlanes = 3;
|
|
|
|
Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
|
|
|
|
Image3(const size_t xsize, const size_t ysize)
|
|
: planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
|
|
ImageT(xsize, ysize)} {}
|
|
|
|
Image3(Image3&& other) noexcept {
|
|
for (size_t i = 0; i < kNumPlanes; i++) {
|
|
planes_[i] = std::move(other.planes_[i]);
|
|
}
|
|
}
|
|
|
|
Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
|
|
if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
|
|
HWY_ABORT(
|
|
"Not same size: %d x %d, %d x %d, %d x %d\n",
|
|
static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()),
|
|
static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()),
|
|
static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize()));
|
|
}
|
|
planes_[0] = std::move(plane0);
|
|
planes_[1] = std::move(plane1);
|
|
planes_[2] = std::move(plane2);
|
|
}
|
|
|
|
// Copy construction/assignment is forbidden to avoid inadvertent copies,
|
|
// which can be very expensive. Use CopyImageTo instead.
|
|
Image3(const Image3& other) = delete;
|
|
Image3& operator=(const Image3& other) = delete;
|
|
|
|
Image3& operator=(Image3&& other) noexcept {
|
|
for (size_t i = 0; i < kNumPlanes; i++) {
|
|
planes_[i] = std::move(other.planes_[i]);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
|
|
return static_cast<const T*>(VoidPlaneRow(c, y));
|
|
}
|
|
HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
|
|
return static_cast<const T*>(VoidPlaneRow(c, y));
|
|
}
|
|
|
|
HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
|
|
return static_cast<T*>(VoidPlaneRow(c, y));
|
|
}
|
|
HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
|
|
return static_cast<T*>(VoidPlaneRow(c, y));
|
|
}
|
|
|
|
HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
|
|
|
|
void Swap(Image3& other) {
|
|
for (size_t c = 0; c < 3; ++c) {
|
|
other.planes_[c].Swap(planes_[c]);
|
|
}
|
|
}
|
|
|
|
void ShrinkTo(const size_t xsize, const size_t ysize) {
|
|
for (ImageT& plane : planes_) {
|
|
plane.ShrinkTo(xsize, ysize);
|
|
}
|
|
}
|
|
|
|
// Sizes of all three images are guaranteed to be equal.
|
|
HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
|
|
HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
|
|
// Returns offset [bytes] from one row to the next row of the same plane.
|
|
// WARNING: this must NOT be used to determine xsize, nor for copying rows -
|
|
// the valid xsize may be much less.
|
|
HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
|
|
// Returns number of pixels (some of which are padding) per row. Useful for
|
|
// computing other rows via pointer arithmetic. WARNING: this must NOT be used
|
|
// to determine xsize.
|
|
HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
|
|
|
|
private:
|
|
// Returns pointer to the start of a row.
|
|
HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
|
|
#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
|
|
if (c >= kNumPlanes || y >= ysize()) {
|
|
HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c),
|
|
static_cast<int>(y), static_cast<int>(ysize()));
|
|
}
|
|
#endif
|
|
// Use the first plane's stride because the compiler might not realize they
|
|
// are all equal. Thus we only need a single multiplication for all planes.
|
|
const size_t row_offset = y * planes_[0].bytes_per_row();
|
|
const void* row = planes_[c].bytes() + row_offset;
|
|
return static_cast<const T * HWY_RESTRICT>(
|
|
HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
|
|
}
|
|
|
|
private:
|
|
ImageT planes_[kNumPlanes];
|
|
};
|
|
|
|
using Image3F = Image3<float>;
|
|
|
|
// Rectangular region in image(s). Factoring this out of Image instead of
|
|
// shifting the pointer by x0/y0 allows this to apply to multiple images with
|
|
// different resolutions. Can compare size via SameSize(rect1, rect2).
|
|
class Rect {
|
|
public:
|
|
// Most windows are xsize_max * ysize_max, except those on the borders where
|
|
// begin + size_max > end.
|
|
constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
|
|
size_t ysize_max, size_t xend, size_t yend)
|
|
: x0_(xbegin),
|
|
y0_(ybegin),
|
|
xsize_(ClampedSize(xbegin, xsize_max, xend)),
|
|
ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
|
|
|
|
// Construct with origin and known size (typically from another Rect).
|
|
constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
|
|
: x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
|
|
|
|
// Construct a rect that covers a whole image.
|
|
template <typename Image>
|
|
explicit Rect(const Image& image)
|
|
: Rect(0, 0, image.xsize(), image.ysize()) {}
|
|
|
|
Rect() : Rect(0, 0, 0, 0) {}
|
|
|
|
Rect(const Rect&) = default;
|
|
Rect& operator=(const Rect&) = default;
|
|
|
|
Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
|
|
size_t ysize_max) {
|
|
return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
|
|
y0_ + ysize_);
|
|
}
|
|
|
|
template <typename T>
|
|
const T* ConstRow(const Image<T>* image, size_t y) const {
|
|
return image->ConstRow(y + y0_) + x0_;
|
|
}
|
|
|
|
template <typename T>
|
|
T* MutableRow(const Image<T>* image, size_t y) const {
|
|
return image->MutableRow(y + y0_) + x0_;
|
|
}
|
|
|
|
template <typename T>
|
|
const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
|
|
return image.ConstPlaneRow(c, y + y0_) + x0_;
|
|
}
|
|
|
|
template <typename T>
|
|
T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
|
|
return image->MutablePlaneRow(c, y + y0_) + x0_;
|
|
}
|
|
|
|
// Returns true if this Rect fully resides in the given image. ImageT could be
|
|
// Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
|
|
template <class ImageT>
|
|
bool IsInside(const ImageT& image) const {
|
|
return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
|
|
}
|
|
|
|
size_t x0() const { return x0_; }
|
|
size_t y0() const { return y0_; }
|
|
size_t xsize() const { return xsize_; }
|
|
size_t ysize() const { return ysize_; }
|
|
|
|
private:
|
|
// Returns size_max, or whatever is left in [begin, end).
|
|
static constexpr size_t ClampedSize(size_t begin, size_t size_max,
|
|
size_t end) {
|
|
return (begin + size_max <= end) ? size_max
|
|
: (end > begin ? end - begin : 0);
|
|
}
|
|
|
|
size_t x0_;
|
|
size_t y0_;
|
|
|
|
size_t xsize_;
|
|
size_t ysize_;
|
|
};
|
|
|
|
// Works for any image-like input type(s).
|
|
template <class Image1, class Image2>
|
|
HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
|
|
return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
|
|
}
|
|
|
|
// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
|
|
// We assume the radius (distance outside the image) is small compared to the
|
|
// image size, otherwise this might not terminate.
|
|
// The mirror is outside the last column (border pixel is also replicated).
|
|
static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
|
|
const int64_t xsize) {
|
|
HWY_DASSERT(xsize != 0);
|
|
|
|
// TODO(janwas): replace with branchless version
|
|
while (x < 0 || x >= xsize) {
|
|
if (x < 0) {
|
|
x = -x - 1;
|
|
} else {
|
|
x = 2 * xsize - 1 - x;
|
|
}
|
|
}
|
|
return static_cast<size_t>(x);
|
|
}
|
|
|
|
// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
|
|
|
|
// Mirrors (repeating the edge pixel once). Useful for convolutions.
|
|
struct WrapMirror {
|
|
HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
|
|
return Mirror(coord, static_cast<int64_t>(size));
|
|
}
|
|
};
|
|
|
|
// Returns the same coordinate, for when we know "coord" is already valid (e.g.
|
|
// interior of an image).
|
|
struct WrapUnchanged {
|
|
HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
|
|
return static_cast<size_t>(coord);
|
|
}
|
|
};
|
|
|
|
// Similar to Wrap* but for row pointers (reduces Row() multiplications).
|
|
|
|
class WrapRowMirror {
|
|
public:
|
|
template <class View>
|
|
WrapRowMirror(const View& image, size_t ysize)
|
|
: first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
|
|
|
|
const float* operator()(const float* const HWY_RESTRICT row,
|
|
const int64_t stride) const {
|
|
if (row < first_row_) {
|
|
const int64_t num_before = first_row_ - row;
|
|
// Mirrored; one row before => row 0, two before = row 1, ...
|
|
return first_row_ + num_before - stride;
|
|
}
|
|
if (row > last_row_) {
|
|
const int64_t num_after = row - last_row_;
|
|
// Mirrored; one row after => last row, two after = last - 1, ...
|
|
return last_row_ - num_after + stride;
|
|
}
|
|
return row;
|
|
}
|
|
|
|
private:
|
|
const float* const HWY_RESTRICT first_row_;
|
|
const float* const HWY_RESTRICT last_row_;
|
|
};
|
|
|
|
struct WrapRowUnchanged {
|
|
HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
|
|
int64_t /*stride*/) const {
|
|
return row;
|
|
}
|
|
};
|
|
|
|
} // namespace hwy
|
|
|
|
#endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
|