diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-25 19:04:49 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-25 19:04:49 +0100 |
commit | 93f5f4322298b23fc6a1a1ca55187956ce30bfe8 (patch) | |
tree | 329e7d18af6f4541a60ecb0343e233370a8885fc /src/test-unicode.cpp | |
parent | 216e03ce9e1b891cbbfaea942c4cd607d29b67f7 (diff) |
Test code reorderung and formatting
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r-- | src/test-unicode.cpp | 347 |
1 files changed, 172 insertions, 175 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index a30be70..34ae13f 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -68,39 +68,185 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t { namespace std { #ifdef __cpp_char8_t -std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s) -{ - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast<uint8_t>(c)); - os << "]"; + std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s) + { + os << "["; + for (auto& c: s) + os << " " << std::to_string(static_cast<uint8_t>(c)); + os << "]"; - return os; -} + return os; + } #endif -std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s) + std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s) + { + os << "["; + for (auto& c: s) + os << " " << std::to_string(static_cast<uint16_t>(c)); + os << "]"; + + return os; + } + + std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s) + { + os << "["; + for (auto& c: s) + os << " " << std::to_string(static_cast<uint32_t>(c)); + os << "]"; + + return os; + } + + // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert + template<class Facet> + struct deletable_facet : Facet + { + template<class ...Args> + deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {} + ~deletable_facet() {} + }; + + // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?) + std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16; + std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32; + + template<typename From, typename To> + std::basic_string<To> std_convert(const std::basic_string<From>& s); + + template<> + std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s) + { + std::string a{s.begin(), s.end()}; + a = conv32.to_bytes(conv32.from_bytes(a)); + return std::basic_string<utf8_t>{a.begin(), a.end()}; + } + + template<> + std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s) + { + std::string a{s.begin(), s.end()}; + return conv16.from_bytes(a); + } + + template<> + std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s) + { + std::string a{s.begin(), s.end()}; + return conv32.from_bytes(a); + } + + template<> + std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s) + { + auto result{conv16.to_bytes(s)}; + return std::basic_string<utf8_t>(result.begin(), result.end()); + } + + template<> + std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s) + { + return conv16.from_bytes(conv16.to_bytes(s)); + } + + template<> + std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s) + { + return conv32.from_bytes(conv16.to_bytes(s)); + } + + template<> + std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s) + { + auto result{conv32.to_bytes(s)}; + return std::basic_string<utf8_t>(result.begin(), result.end()); + } + + template<> + std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s) + { + return conv16.from_bytes(conv32.to_bytes(s)); + } + + template<> + std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s) + { + return conv32.from_bytes(conv32.to_bytes(s)); + } +} + +// check assumptions about environment +BOOST_AUTO_TEST_CASE(string_u8string) { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast<uint16_t>(c)); - os << "]"; + std::string a{"\xc3\xa4"}; + + std::basic_string<utf8_t> b{a.begin(), a.end()}; + + BOOST_CHECK(b == std::basic_string<utf8_t>{u8"ä"}); + + a = std::string{b.begin(), b.end()}; + + BOOST_CHECK(a == std::string{"\xc3\xa4"}); - return os; + BOOST_CHECK(sizeof(size_t) == 4 || sizeof(size_t) == 8); + + std::cout << "Detected CPU Accu size: " << (sizeof(size_t) * 8) << std::endl; } -std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s) +// check environment: demonstrate how boost convert u8->u8 throws exception on invalid input +BOOST_AUTO_TEST_CASE(utf_to_utf_failure_boost_u8_u8) { - os << "["; - for (auto& c: s) - os << " " << std::to_string(static_cast<uint32_t>(c)); - os << "]"; + for (auto& s: failure_strings_char8_t) { + try { + auto result1{boost::locale::conv::utf_to_utf<utf8_t, utf8_t>(s, boost::locale::conv::stop)}; + BOOST_FAIL("Expected boost convert to fail"); + } catch(...) { + // expected + } + } +} - return os; +// check environment: demonstrate how boost convert u8->u16 throws exception on invalid input +BOOST_AUTO_TEST_CASE(utf_to_utf_failure_boost_u8_u16) +{ + for (auto& s: failure_strings_char8_t) { + try { + auto result1{boost::locale::conv::utf_to_utf<char16_t, utf8_t>(s, boost::locale::conv::stop)}; + BOOST_FAIL("Expected boost convert to fail"); + } catch(...) { + // expected + } + } } +// check environment: demonstrate how std u8->u8 throws exception on invalid input +BOOST_AUTO_TEST_CASE(utf_to_utf_failure_std_u8_u8) +{ + for (auto& s: failure_strings_char8_t) { + try { + auto result2{std_convert<utf8_t, utf8_t>(s)}; + BOOST_FAIL("Expected std_convert to fail"); + } catch(...) { + // expected + } + + } } +// check environment: demonstrate how std u8->u16 throws exception on invalid input +BOOST_AUTO_TEST_CASE(utf_to_utf_failure_std_u8_u16) +{ + for (auto& s: failure_strings_char8_t) { + try { + auto result2{std_convert<utf8_t, char16_t>(s)}; + BOOST_FAIL("Expected std_convert to fail"); + } catch(...) { + // expected + } + + } +} template<size_t i = 0, size_t j = 0, typename... Ts> void test_utf_to_utf(std::tuple<Ts...>& t) { @@ -365,84 +511,6 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type } } -// utility wrapper to adapt locale-bound facets for wstring/wbuffer convert -template<class Facet> -struct deletable_facet : Facet -{ - template<class ...Args> - deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {} - ~deletable_facet() {} -}; - -namespace { - // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?) - std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16; - std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32; - - template<typename From, typename To> - std::basic_string<To> std_convert(const std::basic_string<From>& s); - - template<> - std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s) - { - std::string a{s.begin(), s.end()}; - a = conv32.to_bytes(conv32.from_bytes(a)); - return std::basic_string<utf8_t>{a.begin(), a.end()}; - } - - template<> - std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s) - { - std::string a{s.begin(), s.end()}; - return conv16.from_bytes(a); - } - - template<> - std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s) - { - std::string a{s.begin(), s.end()}; - return conv32.from_bytes(a); - } - - template<> - std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s) - { - auto result{conv16.to_bytes(s)}; - return std::basic_string<utf8_t>(result.begin(), result.end()); - } - - template<> - std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s) - { - return conv16.from_bytes(conv16.to_bytes(s)); - } - - template<> - std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s) - { - return conv32.from_bytes(conv16.to_bytes(s)); - } - - template<> - std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s) - { - auto result{conv32.to_bytes(s)}; - return std::basic_string<utf8_t>(result.begin(), result.end()); - } - - template<> - std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s) - { - return conv16.from_bytes(conv32.to_bytes(s)); - } - - template<> - std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s) - { - return conv32.from_bytes(conv32.to_bytes(s)); - } -} - class CPUTimer { public: @@ -513,18 +581,18 @@ void test_random_valid(random_context& rc, size_t length, const std::string& des test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description); } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) { - random_context rc; + random_context rc{127}; - test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings"); + test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings"); } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) { - random_context rc{127}; + random_context rc; - test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings"); + test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings"); } // Test ISO and UTF encodings @@ -616,74 +684,3 @@ BOOST_AUTO_TEST_CASE(is_valid_utf) BOOST_CHECK(unicode::is_valid_utf<unicode::UTF_8>(u8"äöü")); } -// check assumptions about environment -BOOST_AUTO_TEST_CASE(string_u8string) -{ - std::string a{"\xc3\xa4"}; - - std::basic_string<utf8_t> b{a.begin(), a.end()}; - - BOOST_CHECK(b == std::basic_string<utf8_t>{u8"ä"}); - - a = std::string{b.begin(), b.end()}; - - BOOST_CHECK(a == std::string{"\xc3\xa4"}); - - BOOST_CHECK(sizeof(size_t) == 4 || sizeof(size_t) == 8); - - std::cout << "Detected CPU Accu size: " << (sizeof(size_t) * 8) << std::endl; -} - -// check environment: demonstrate how boost convert u8->u8 throws exception on invalid input -BOOST_AUTO_TEST_CASE(utf_to_utf_failure_boost_u8_u8) -{ - for (auto& s: failure_strings_char8_t) { - try { - auto result1{boost::locale::conv::utf_to_utf<utf8_t, utf8_t>(s, boost::locale::conv::stop)}; - BOOST_FAIL("Expected boost convert to fail"); - } catch(...) { - // expected - } - } -} - -// check environment: demonstrate how boost convert u8->u16 throws exception on invalid input -BOOST_AUTO_TEST_CASE(utf_to_utf_failure_boost_u8_u16) -{ - for (auto& s: failure_strings_char8_t) { - try { - auto result1{boost::locale::conv::utf_to_utf<char16_t, utf8_t>(s, boost::locale::conv::stop)}; - BOOST_FAIL("Expected boost convert to fail"); - } catch(...) { - // expected - } - } -} - -// check environment: demonstrate how std u8->u8 throws exception on invalid input -BOOST_AUTO_TEST_CASE(utf_to_utf_failure_std_u8_u8) -{ - for (auto& s: failure_strings_char8_t) { - try { - auto result2{std_convert<utf8_t, utf8_t>(s)}; - BOOST_FAIL("Expected std_convert to fail"); - } catch(...) { - // expected - } - - } -} - -// check environment: demonstrate how std u8->u16 throws exception on invalid input -BOOST_AUTO_TEST_CASE(utf_to_utf_failure_std_u8_u16) -{ - for (auto& s: failure_strings_char8_t) { - try { - auto result2{std_convert<utf8_t, char16_t>(s)}; - BOOST_FAIL("Expected std_convert to fail"); - } catch(...) { - // expected - } - - } -} |