trisquel-icecat/icecat/intl/icu_capi/cpp/examples/properties/test.cpp

251 lines
8.5 KiB
C++

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
#include "../../include/ICU4XCodePointSetData.hpp"
#include "../../include/ICU4XUnicodeSetData.hpp"
#include "../../include/ICU4XCodePointMapData16.hpp"
#include "../../include/ICU4XCodePointMapData8.hpp"
#include "../../include/ICU4XPropertyValueNameToEnumMapper.hpp"
#include "../../include/ICU4XGeneralCategoryNameToMaskMapper.hpp"
#include "../../include/ICU4XLogger.hpp"
#include <iostream>
int test_set_property(ICU4XCodePointSetData data, char32_t included, char32_t excluded) {
bool contains1 = data.contains(included);
bool contains2 = data.contains(excluded);
std::cout << std::hex; // print hex for U+####
if (contains1 && !contains2) {
std::cout << "Set correctly contains U+" << included << " and not U+" << excluded << std::endl;
} else {
std::cout << "Set returns wrong result on U+" << included << " or U+" << excluded << std::endl;
return 1;
}
return 0;
}
int test_map_16_property(ICU4XCodePointMapData16 data, char32_t sample, uint32_t expected) {
uint32_t actual = data.get(sample);
std::cout << std::hex; // print hex for U+####
if (actual == expected) {
std::cout << "Code point U+" << sample << " correctly mapped to 0x" << actual << std::endl;
} else {
std::cout << "Code point U+" << sample << " incorrectly mapped to 0x" << actual << std::endl;
return 1;
}
return 0;
}
int test_map_8_property(ICU4XCodePointMapData8 data, char32_t sample, uint32_t expected) {
uint32_t actual = data.get(sample);
std::cout << std::hex; // print hex for U+####
if (actual == expected) {
std::cout << "Code point U+" << sample << " correctly mapped to 0x" << actual << std::endl;
} else {
std::cout << "Code point U+" << sample << " incorrectly mapped to 0x" << actual << std::endl;
return 1;
}
return 0;
}
int main() {
ICU4XLogger::init_simple_logger();
ICU4XDataProvider dp = ICU4XDataProvider::create_compiled();
int result;
result = test_set_property(
ICU4XCodePointSetData::load_ascii_hex_digit(dp).ok().value(),
u'3',
u''
);
if (result != 0) {
return result;
}
result = test_map_16_property(
ICU4XCodePointMapData16::load_script(dp).ok().value(),
u'',
17 // Script::Han
);
if (result != 0) {
return result;
}
result = test_map_8_property(
ICU4XCodePointMapData8::load_general_category(dp).ok().value(),
u'',
5 // GeneralCategory::OtherLetter
);
if (result != 0) {
return result;
}
result = test_map_8_property(
ICU4XCodePointMapData8::load_bidi_class(dp).ok().value(),
u'ع',
13 // GeneralCategory::ArabicLetter
);
if (result != 0) {
return result;
}
ICU4XUnicodeSetData basic_emoji = ICU4XUnicodeSetData::load_basic_emoji(dp).ok().value();
std::string letter = u8"hello";
if (!basic_emoji.contains_char(U'🔥')) {
std::cout << "Character 🔥 not found in Basic_Emoji set" << std::endl;
result = 1;
}
if (!basic_emoji.contains(u8"🗺️")) {
std::cout << "String \"🗺️\" (U+1F5FA U+FE0F) not found in Basic_Emoji set" << std::endl;
result = 1;
}
if (basic_emoji.contains_char(U'a')) {
std::cout << "Character a found in Basic_Emoji set" << std::endl;
result = 1;
}
if (basic_emoji.contains(u8"aa")) {
std::cout << "String \"aa\" found in Basic_Emoji set" << std::endl;
result = 1;
}
if (result != 0) {
return result;
} else {
std::cout << "Basic_Emoji set contains appropriate characters" << std::endl;
}
ICU4XLocale locale = ICU4XLocale::create_from_string("bn").ok().value();
ICU4XUnicodeSetData exemplars = ICU4XUnicodeSetData::load_exemplars_main(dp, locale).ok().value();
if (!exemplars.contains_char(U'')) {
std::cout << "Character 'ব' not found in Bangla exemplar chars set" << std::endl;
result = 1;
}
if (!exemplars.contains(u8"ক্ষ")) {
std::cout << "String \"ক্ষ\" (U+0995U+09CDU+09B7) not found in Bangla exemplar chars set" << std::endl;
result = 1;
}
if (exemplars.contains_char(U'a')) {
std::cout << "Character a found in Bangla exemplar chars set" << std::endl;
result = 1;
}
if (exemplars.contains(u8"aa")) {
std::cout << "String \"aa\" not found in Bangla exemplar chars set" << std::endl;
result = 1;
}
if (result != 0) {
return result;
} else {
std::cout << "Bangla exemplar chars set contains appropriate characters" << std::endl;
}
ICU4XPropertyValueNameToEnumMapper mapper = ICU4XPropertyValueNameToEnumMapper::load_script(dp).ok().value();
int32_t script = mapper.get_strict("Brah");
if (script != 65) {
std::cout << "Expected discriminant 64 for script name `Brah`, found " << script << std::endl;
result = 1;
}
script = mapper.get_strict("Brahmi");
if (script != 65) {
std::cout << "Expected discriminant 64 for script name `Brahmi`, found " << script << std::endl;
result = 1;
}
script = mapper.get_loose("brah");
if (script != 65) {
std::cout << "Expected discriminant 64 for (loose matched) script name `brah`, found " << script << std::endl;
result = 1;
}
script = mapper.get_strict("Linear_Z");
if (script != -1) {
std::cout << "Expected no value for fake script name `Linear_Z`, found " << script << std::endl;
result = 1;
}
if (result != 0) {
return result;
} else {
std::cout << "Script name mapper returns correct values" << std::endl;
}
ICU4XGeneralCategoryNameToMaskMapper mask_mapper = ICU4XGeneralCategoryNameToMaskMapper::load(dp).ok().value();
int32_t mask = mask_mapper.get_strict("Lu");
if (mask != 0x02) {
std::cout << "Expected discriminant 0x02 for mask name `Lu`, found " << mask << std::endl;
result = 1;
}
mask = mask_mapper.get_strict("L");
if (mask != 0x3e) {
std::cout << "Expected discriminant 0x3e for mask name `Lu`, found " << mask << std::endl;
result = 1;
}
mask = mask_mapper.get_strict("Letter");
if (mask != 0x3e) {
std::cout << "Expected discriminant 0x3e for mask name `Letter`, found " << mask << std::endl;
result = 1;
}
mask = mask_mapper.get_loose("l");
if (mask != 0x3e) {
std::cout << "Expected discriminant 0x3e for mask name `l`, found " << mask << std::endl;
result = 1;
}
mask = mask_mapper.get_strict("letter");
if (mask != 0) {
std::cout << "Expected no mask for (strict matched) name `letter`, found " << mask << std::endl;
result = 1;
}
mask = mask_mapper.get_strict("EverythingLol");
if (mask != 0) {
std::cout << "Expected no mask for nonexistant name `EverythingLol`, found " << mask << std::endl;
result = 1;
}
if (result != 0) {
return result;
} else {
std::cout << "Mask name mapper returns correct values" << std::endl;
}
mask = mask_mapper.get_strict("Lu");
ICU4XCodePointMapData8 gc = ICU4XCodePointMapData8::load_general_category(dp).ok().value();
auto ranges = gc.iter_ranges_for_mask(mask);
auto next = ranges.next();
if (next.done) {
std::cout << "Got empty iterator!";
result = 1;
}
if (next.start != U'A' || next.end != U'Z') {
std::cout << "Expected range [" << U'A' << ", " << U'Z' << "], got range [" << next.start << ", " << next.end << "]" << std::endl;
result = 1;
}
// Test iteration to completion for a small set
mask = mask_mapper.get_strict("Control");
ranges = gc.iter_ranges_for_mask(mask);
next = ranges.next();
if (next.start != 0 || next.end != 0x1f) {
std::cout << "Expected range [0, 0x1f], got range [" << next.start << ", " << next.end << "]" << std::endl;
result = 1;
}
std::cout << "Found ranges for gc=Control:";
while (!next.done) {
std::cout << " [" << next.start << ", " << next.end << "]";
next = ranges.next();
}
std::cout << std::endl;
if (result != 0) {
return result;
} else {
std::cout << "Ranges iterator works" << std::endl;
}
return 0;
}