216 lines
7.9 KiB
Rust
216 lines
7.9 KiB
Rust
use alloc::{boxed::Box, vec, vec::Vec};
|
|
|
|
/// A set of "packed pair" test seeds. Each seed serves as the base for the
|
|
/// generation of many other tests. In essence, the seed captures the pair of
|
|
/// bytes we used for a predicate and first byte among our needle. The tests
|
|
/// generated from each seed essentially vary the length of the needle and
|
|
/// haystack, while using the rare/first byte configuration from the seed.
|
|
///
|
|
/// The purpose of this is to test many different needle/haystack lengths.
|
|
/// In particular, some of the vector optimizations might only have bugs
|
|
/// in haystacks of a certain size.
|
|
const SEEDS: &[Seed] = &[
|
|
// Why not use different 'first' bytes? It seemed like a good idea to be
|
|
// able to configure it, but when I wrote the test generator below, it
|
|
// didn't seem necessary to use for reasons that I forget.
|
|
Seed { first: b'x', index1: b'y', index2: b'z' },
|
|
Seed { first: b'x', index1: b'x', index2: b'z' },
|
|
Seed { first: b'x', index1: b'y', index2: b'x' },
|
|
Seed { first: b'x', index1: b'x', index2: b'x' },
|
|
Seed { first: b'x', index1: b'y', index2: b'y' },
|
|
];
|
|
|
|
/// Runs a host of "packed pair" search tests.
|
|
///
|
|
/// These tests specifically look for the occurrence of a possible substring
|
|
/// match based on a pair of bytes matching at the right offsets.
|
|
pub(crate) struct Runner {
|
|
fwd: Option<
|
|
Box<
|
|
dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
|
|
>,
|
|
>,
|
|
}
|
|
|
|
impl Runner {
|
|
/// Create a new test runner for "packed pair" substring search.
|
|
pub(crate) fn new() -> Runner {
|
|
Runner { fwd: None }
|
|
}
|
|
|
|
/// Run all tests. This panics on the first failure.
|
|
///
|
|
/// If the implementation being tested returns `None` for a particular
|
|
/// haystack/needle combination, then that test is skipped.
|
|
///
|
|
/// This runs tests on both the forward and reverse implementations given.
|
|
/// If either (or both) are missing, then tests for that implementation are
|
|
/// skipped.
|
|
pub(crate) fn run(self) {
|
|
if let Some(mut fwd) = self.fwd {
|
|
for seed in SEEDS.iter() {
|
|
for t in seed.generate() {
|
|
match fwd(&t.haystack, &t.needle, t.index1, t.index2) {
|
|
None => continue,
|
|
Some(result) => {
|
|
assert_eq!(
|
|
t.fwd, result,
|
|
"FORWARD, needle: {:?}, haystack: {:?}, \
|
|
index1: {:?}, index2: {:?}",
|
|
t.needle, t.haystack, t.index1, t.index2,
|
|
)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Set the implementation for forward "packed pair" substring search.
|
|
///
|
|
/// If the closure returns `None`, then it is assumed that the given
|
|
/// test cannot be applied to the particular implementation and it is
|
|
/// skipped. For example, if a particular implementation only supports
|
|
/// needles or haystacks for some minimum length.
|
|
///
|
|
/// If this is not set, then forward "packed pair" search is not tested.
|
|
pub(crate) fn fwd(
|
|
mut self,
|
|
search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
|
|
) -> Runner {
|
|
self.fwd = Some(Box::new(search));
|
|
self
|
|
}
|
|
}
|
|
|
|
/// A test that represents the input and expected output to a "packed pair"
|
|
/// search function. The test should be able to run with any "packed pair"
|
|
/// implementation and get the expected output.
|
|
struct Test {
|
|
haystack: Vec<u8>,
|
|
needle: Vec<u8>,
|
|
index1: u8,
|
|
index2: u8,
|
|
fwd: Option<usize>,
|
|
}
|
|
|
|
impl Test {
|
|
/// Create a new "packed pair" test from a seed and some given offsets to
|
|
/// the pair of bytes to use as a predicate in the seed's needle.
|
|
///
|
|
/// If a valid test could not be constructed, then None is returned.
|
|
/// (Currently, we take the approach of massaging tests to be valid
|
|
/// instead of rejecting them outright.)
|
|
fn new(
|
|
seed: Seed,
|
|
index1: usize,
|
|
index2: usize,
|
|
haystack_len: usize,
|
|
needle_len: usize,
|
|
fwd: Option<usize>,
|
|
) -> Option<Test> {
|
|
let mut index1: u8 = index1.try_into().unwrap();
|
|
let mut index2: u8 = index2.try_into().unwrap();
|
|
// The '#' byte is never used in a haystack (unless we're expecting
|
|
// a match), while the '@' byte is never used in a needle.
|
|
let mut haystack = vec![b'@'; haystack_len];
|
|
let mut needle = vec![b'#'; needle_len];
|
|
needle[0] = seed.first;
|
|
needle[index1 as usize] = seed.index1;
|
|
needle[index2 as usize] = seed.index2;
|
|
// If we're expecting a match, then make sure the needle occurs
|
|
// in the haystack at the expected position.
|
|
if let Some(i) = fwd {
|
|
haystack[i..i + needle.len()].copy_from_slice(&needle);
|
|
}
|
|
// If the operations above lead to rare offsets pointing to the
|
|
// non-first occurrence of a byte, then adjust it. This might lead
|
|
// to redundant tests, but it's simpler than trying to change the
|
|
// generation process I think.
|
|
if let Some(i) = crate::memchr(seed.index1, &needle) {
|
|
index1 = u8::try_from(i).unwrap();
|
|
}
|
|
if let Some(i) = crate::memchr(seed.index2, &needle) {
|
|
index2 = u8::try_from(i).unwrap();
|
|
}
|
|
Some(Test { haystack, needle, index1, index2, fwd })
|
|
}
|
|
}
|
|
|
|
/// Data that describes a single prefilter test seed.
|
|
#[derive(Clone, Copy)]
|
|
struct Seed {
|
|
first: u8,
|
|
index1: u8,
|
|
index2: u8,
|
|
}
|
|
|
|
impl Seed {
|
|
const NEEDLE_LENGTH_LIMIT: usize = {
|
|
#[cfg(not(miri))]
|
|
{
|
|
33
|
|
}
|
|
#[cfg(miri)]
|
|
{
|
|
5
|
|
}
|
|
};
|
|
|
|
const HAYSTACK_LENGTH_LIMIT: usize = {
|
|
#[cfg(not(miri))]
|
|
{
|
|
65
|
|
}
|
|
#[cfg(miri)]
|
|
{
|
|
8
|
|
}
|
|
};
|
|
|
|
/// Generate a series of prefilter tests from this seed.
|
|
fn generate(self) -> impl Iterator<Item = Test> {
|
|
let len_start = 2;
|
|
// The iterator below generates *a lot* of tests. The number of
|
|
// tests was chosen somewhat empirically to be "bearable" when
|
|
// running the test suite.
|
|
//
|
|
// We use an iterator here because the collective haystacks of all
|
|
// these test cases add up to enough memory to OOM a conservative
|
|
// sandbox or a small laptop.
|
|
(len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| {
|
|
let index_start = len_start - 1;
|
|
(index_start..needle_len).flat_map(move |index1| {
|
|
(index1..needle_len).flat_map(move |index2| {
|
|
(needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map(
|
|
move |haystack_len| {
|
|
Test::new(
|
|
self,
|
|
index1,
|
|
index2,
|
|
haystack_len,
|
|
needle_len,
|
|
None,
|
|
)
|
|
.into_iter()
|
|
.chain(
|
|
(0..=(haystack_len - needle_len)).flat_map(
|
|
move |output| {
|
|
Test::new(
|
|
self,
|
|
index1,
|
|
index2,
|
|
haystack_len,
|
|
needle_len,
|
|
Some(output),
|
|
)
|
|
},
|
|
),
|
|
)
|
|
},
|
|
)
|
|
})
|
|
})
|
|
})
|
|
}
|
|
}
|