304 lines
10 KiB
Rust
304 lines
10 KiB
Rust
// This file is part of ICU4X. For terms of use, please see the file
|
|
// called LICENSE at the top level of the ICU4X source tree
|
|
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
|
|
|
|
use icu_collections::char16trie::{Char16Trie, TrieResult};
|
|
use zerovec::ZeroVec;
|
|
|
|
#[test]
|
|
fn empty() {
|
|
let trie_data = toml::from_str::<TestFile>(include_str!("data/char16trie/empty.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
let res = trie.iter().next('h');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
}
|
|
|
|
#[test]
|
|
fn a() {
|
|
let trie_data = toml::from_str::<TestFile>(include_str!("data/char16trie/test_a.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
let mut iter = trie.iter();
|
|
let res = iter.next('h');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
|
|
let mut iter = trie.iter();
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::FinalValue(1));
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
}
|
|
|
|
#[test]
|
|
fn a_b() {
|
|
let trie_data = toml::from_str::<TestFile>(include_str!("data/char16trie/test_a_ab.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
let mut iter = trie.iter();
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::Intermediate(1));
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
|
|
let mut iter = trie.iter();
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::Intermediate(1));
|
|
let res = iter.next('b');
|
|
assert_eq!(res, TrieResult::FinalValue(100));
|
|
let res = iter.next('b');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
}
|
|
|
|
#[test]
|
|
fn shortest_branch() {
|
|
let trie_data =
|
|
toml::from_str::<TestFile>(include_str!("data/char16trie/test_shortest_branch.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
let mut iter = trie.iter();
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::FinalValue(1000));
|
|
let res = iter.next('b');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
|
|
let mut iter = trie.iter();
|
|
let res = iter.next('b');
|
|
assert_eq!(res, TrieResult::FinalValue(2000));
|
|
let res = iter.next('a');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
}
|
|
|
|
#[test]
|
|
fn branches() {
|
|
let trie_data = toml::from_str::<TestFile>(include_str!("data/char16trie/test_branches.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
for (query, expected) in [
|
|
("a", TrieResult::FinalValue(0x10)),
|
|
("cc", TrieResult::FinalValue(0x40)),
|
|
("e", TrieResult::FinalValue(0x100)),
|
|
("ggg", TrieResult::FinalValue(0x400)),
|
|
("i", TrieResult::FinalValue(0x1000)),
|
|
("kkkk", TrieResult::FinalValue(0x4000)),
|
|
("n", TrieResult::FinalValue(0x10000)),
|
|
("ppppp", TrieResult::FinalValue(0x40000)),
|
|
("r", TrieResult::FinalValue(0x100000)),
|
|
("sss", TrieResult::FinalValue(0x200000)),
|
|
("t", TrieResult::FinalValue(0x400000)),
|
|
("uu", TrieResult::FinalValue(0x800000)),
|
|
("vv", TrieResult::FinalValue(0x7fffffff)),
|
|
("zz", TrieResult::FinalValue(-2147483648)),
|
|
] {
|
|
let mut iter = trie.iter();
|
|
for (i, chr) in query.chars().enumerate() {
|
|
let res = iter.next(chr);
|
|
if i + 1 == query.len() {
|
|
assert_eq!(res, expected);
|
|
} else {
|
|
assert_eq!(res, TrieResult::NoValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn long_sequence() {
|
|
let trie_data =
|
|
toml::from_str::<TestFile>(include_str!("data/char16trie/test_long_sequence.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
for (query, expected) in [
|
|
("a", TrieResult::Intermediate(-1)),
|
|
// sequence of linear-match nodes
|
|
(
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
TrieResult::Intermediate(-2),
|
|
),
|
|
// more than 256 units
|
|
(
|
|
concat!(
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
),
|
|
TrieResult::FinalValue(-3),
|
|
),
|
|
] {
|
|
let mut iter = trie.iter();
|
|
for (i, chr) in query.chars().enumerate() {
|
|
let res = iter.next(chr);
|
|
if i + 1 == query.len() {
|
|
assert_eq!(res, expected);
|
|
} else if i == 0 {
|
|
assert_eq!(res, TrieResult::Intermediate(-1));
|
|
} else if i == 51 {
|
|
assert_eq!(res, TrieResult::Intermediate(-2));
|
|
} else {
|
|
assert_eq!(res, TrieResult::NoValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn long_branch() {
|
|
let trie_data =
|
|
toml::from_str::<TestFile>(include_str!("data/char16trie/test_long_branch.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
for (query, expected) in [
|
|
("a", TrieResult::FinalValue(-2)),
|
|
("b", TrieResult::FinalValue(-1)),
|
|
("c", TrieResult::FinalValue(0)),
|
|
("d2", TrieResult::FinalValue(1)),
|
|
("f", TrieResult::FinalValue(0x3f)),
|
|
("g", TrieResult::FinalValue(0x40)),
|
|
("h", TrieResult::FinalValue(0x41)),
|
|
("j23", TrieResult::FinalValue(0x1900)),
|
|
("j24", TrieResult::FinalValue(0x19ff)),
|
|
("j25", TrieResult::FinalValue(0x1a00)),
|
|
("k2", TrieResult::FinalValue(0x1a80)),
|
|
("k3", TrieResult::FinalValue(0x1aff)),
|
|
("l234567890", TrieResult::Intermediate(0x1b00)),
|
|
("l234567890123", TrieResult::FinalValue(0x1b01)),
|
|
(
|
|
"nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn",
|
|
TrieResult::FinalValue(0x10ffff),
|
|
),
|
|
(
|
|
"oooooooooooooooooooooooooooooooooooooooooooooooooooooo",
|
|
TrieResult::FinalValue(0x110000),
|
|
),
|
|
(
|
|
"pppppppppppppppppppppppppppppppppppppppppppppppppppppp",
|
|
TrieResult::FinalValue(0x120000),
|
|
),
|
|
("r", TrieResult::FinalValue(0x333333)),
|
|
("s2345", TrieResult::FinalValue(0x4444444)),
|
|
("t234567890", TrieResult::FinalValue(0x77777777)),
|
|
("z", TrieResult::FinalValue(-2147483647)),
|
|
] {
|
|
let mut iter = trie.iter();
|
|
for (i, chr) in query.chars().enumerate() {
|
|
let res = iter.next(chr);
|
|
if i + 1 == query.len() {
|
|
assert_eq!(res, expected);
|
|
} else if query == "l234567890123" && i == 9 {
|
|
assert_eq!(res, TrieResult::Intermediate(0x1b00));
|
|
} else {
|
|
assert_eq!(res, TrieResult::NoValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn compact() {
|
|
let trie_data = toml::from_str::<TestFile>(include_str!("data/char16trie/test_compact.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
for (query, expected) in [
|
|
("+", TrieResult::Intermediate(0)),
|
|
("+august", TrieResult::FinalValue(8)),
|
|
("+december", TrieResult::FinalValue(12)),
|
|
("+july", TrieResult::FinalValue(7)),
|
|
("+june", TrieResult::FinalValue(6)),
|
|
("+november", TrieResult::FinalValue(11)),
|
|
("+october", TrieResult::FinalValue(10)),
|
|
("+september", TrieResult::FinalValue(9)),
|
|
("-", TrieResult::Intermediate(0)),
|
|
("-august", TrieResult::FinalValue(8)),
|
|
("-december", TrieResult::FinalValue(12)),
|
|
("-july", TrieResult::FinalValue(7)),
|
|
("-june", TrieResult::FinalValue(6)),
|
|
("-november", TrieResult::FinalValue(11)),
|
|
("-october", TrieResult::FinalValue(10)),
|
|
("-september", TrieResult::FinalValue(9)),
|
|
("xjuly", TrieResult::FinalValue(7)),
|
|
("xjune", TrieResult::FinalValue(6)),
|
|
] {
|
|
let mut iter = trie.iter();
|
|
for (i, chr) in query.chars().enumerate() {
|
|
let res = iter.next(chr);
|
|
if i + 1 == query.len() {
|
|
assert_eq!(res, expected);
|
|
} else if chr == '-' || chr == '+' {
|
|
assert_eq!(res, TrieResult::Intermediate(0));
|
|
} else {
|
|
assert_eq!(res, TrieResult::NoValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn months() {
|
|
let trie_data = toml::from_str::<TestFile>(include_str!("data/char16trie/months.toml"))
|
|
.unwrap()
|
|
.ucharstrie
|
|
.data;
|
|
let trie = Char16Trie::new(ZeroVec::from_slice_or_alloc(trie_data.as_slice()));
|
|
|
|
let mut iter = trie.iter();
|
|
for (chr, expected) in [
|
|
('j', TrieResult::NoValue),
|
|
('u', TrieResult::NoValue),
|
|
('n', TrieResult::Intermediate(6)),
|
|
('e', TrieResult::FinalValue(6)),
|
|
] {
|
|
let res = iter.next(chr);
|
|
assert_eq!(res, expected);
|
|
}
|
|
let res = iter.next('h');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
|
|
let mut iter = trie.iter();
|
|
for (chr, expected) in [
|
|
('j', TrieResult::NoValue),
|
|
('u', TrieResult::NoValue),
|
|
('l', TrieResult::NoValue),
|
|
('y', TrieResult::FinalValue(7)),
|
|
] {
|
|
let res = iter.next(chr);
|
|
assert_eq!(res, expected);
|
|
}
|
|
let res = iter.next('h');
|
|
assert_eq!(res, TrieResult::NoMatch);
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
pub struct TestFile {
|
|
ucharstrie: Char16TrieVec,
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
pub struct Char16TrieVec {
|
|
data: Vec<u16>,
|
|
}
|