diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-12-21 15:36:48 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-12-21 15:36:48 +0100 |
commit | 3ca9f389084a2defe1fff2046dd3450e0b242e58 (patch) | |
tree | c6e8ad716db3d1cbadf33c421425803a2e89cd1b /src/test-unicode.cpp | |
parent | f3025691d12727bbab138c13680cc21a451626b6 (diff) |
Added comparison tests with boost::locale::conv and std::wstring_convert
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r-- | src/test-unicode.cpp | 205 |
1 files changed, 115 insertions, 90 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index d00a33d..c325f6c 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -5,12 +5,16 @@ #include <boost/test/data/monomorphic.hpp> #include <boost/test/data/test_case.hpp> +#include <boost/locale.hpp> + #include <array> #include <chrono> +#include <codecvt> #include <deque> #include <exception> #include <limits> #include <list> +#include <locale> #include <random> #include <string> #include <tuple> @@ -258,10 +262,11 @@ BOOST_AUTO_TEST_CASE(is_valid_unicode) } struct random_context { + random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {} std::random_device rd; // OS random number engine to seed RNG (below) std::mt19937 gen{rd()}; std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units - std::uniform_int_distribution<unsigned long> code_point_distribution{0, 0x10FFFF - 0x800}; + std::uniform_int_distribution<unsigned long> code_point_distribution; }; // generates valid and invalid strings of different type @@ -293,7 +298,7 @@ std::u32string generate_random_string(random_context& rc, size_t length) } template<typename From, typename ToTypesCollectionType, size_t i = 0> -void test_random(random_context& rc, size_t length) +void test_random_invalid(random_context& rc, size_t length) { //std::cerr << "LENGTH: " << length << std::endl; typedef typename std::tuple_element<i,ToTypesCollectionType>::type To; @@ -347,7 +352,7 @@ void test_random(random_context& rc, size_t length) // iterate over remaining To types if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value) - test_random<From, ToTypesCollectionType, i + 1>(rc, length); + test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length); } BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type) @@ -355,134 +360,154 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type random_context rc; for (int i = 0; i < 10; i++) { - test_random<T,types_collection_type>(rc, rc.sequence_length(rc.gen)); + test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen)); } } -BOOST_AUTO_TEST_CASE(random_sequences_valid) +// utility wrapper to adapt locale-bound facets for wstring/wbuffer convert +template<class Facet> +struct deletable_facet : Facet { - random_context rc; + template<class ...Args> + deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {} + ~deletable_facet() {} +}; - // Fill UTF-32 data list - std::vector<std::u32string> u32list; - std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); +namespace { + // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?) + std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16; + std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32; + + template<typename From, typename To> + std::basic_string<To> std_convert(const std::basic_string<From>& s); - // Fill UTF-16 data list - std::vector<std::u16string> u16list; - std::transform(u32list.begin(), u32list.end(), std::back_inserter(u16list), [](const std::u32string& s){return unicode::convert<unicode::UTF_32, unicode::UTF_16>(s);}); - - // Fill UTF-8 data list - std::vector<std::basic_string<utf8_t>> u8list; - std::transform(u32list.begin(), u32list.end(), std::back_inserter(u8list), [](const std::u32string& s){return unicode::convert<unicode::UTF_32, unicode::UTF_8>(s);}); - - for (const auto& i : u32list) { - std::u32string s32{unicode::convert<unicode::UTF_32, unicode::UTF_32>(i)}; - BOOST_CHECK(s32.size() == i.size()); - std::u16string s16{unicode::convert<unicode::UTF_32, unicode::UTF_16>(i)}; - BOOST_CHECK(s16.size() >= i.size()); - std::basic_string<utf8_t> s8{unicode::convert<unicode::UTF_32, unicode::UTF_8>(i)}; - BOOST_CHECK(s8.size() >= i.size()); + template<> + std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s) + { + return s; } - for (const auto& i : u16list) { - std::u32string s32{unicode::convert<unicode::UTF_16, unicode::UTF_32>(i)}; - BOOST_CHECK(s32.size() > 0 || i.size() == 0); - std::u16string s16{unicode::convert<unicode::UTF_16, unicode::UTF_16>(i)}; - BOOST_CHECK(s16.size() == i.size()); - std::basic_string<utf8_t> s8{unicode::convert<unicode::UTF_16, unicode::UTF_8>(i)}; - BOOST_CHECK(s8.size() >= i.size()); + template<> + std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s) + { + std::string a{s.begin(), s.end()}; + return conv16.from_bytes(a); } - for (const auto& i : u8list) { - std::u32string s32{unicode::convert<unicode::UTF_8, unicode::UTF_32>(i)}; - BOOST_CHECK(s32.size() > 0 || i.size() == 0); - std::u16string s16{unicode::convert<unicode::UTF_8, unicode::UTF_16>(i)}; - BOOST_CHECK(s16.size() > 0 || i.size() == 0); - std::basic_string<utf8_t> s8{unicode::convert<unicode::UTF_8, unicode::UTF_8>(i)}; - BOOST_CHECK(s8.size() == i.size()); + template<> + std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s) + { + std::string a{s.begin(), s.end()}; + return conv32.from_bytes(a); } + template<> + std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s) { - // Performance test UTF-32 -> UTF-32 - auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u32list) { - std::u32string s{unicode::convert<unicode::UTF_32, unicode::UTF_32>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-32: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + auto result{conv16.to_bytes(s)}; + return std::basic_string<utf8_t>(result.begin(), result.end()); } + template<> + std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s) { - // Performance test UTF-32 -> UTF-16 - auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u32list) { - std::u16string s{unicode::convert<unicode::UTF_32, unicode::UTF_16>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-16: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + return s; } + template<> + std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s) { - // Performance test UTF-32 -> UTF-8 - auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u32list) { - std::basic_string<utf8_t> s{unicode::convert<unicode::UTF_32, unicode::UTF_8>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-8: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + return conv32.from_bytes(conv16.to_bytes(s)); } + template<> + std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s) { - // Performance test UTF-16 -> UTF-32 - auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u16list) { - std::u32string s{unicode::convert<unicode::UTF_16, unicode::UTF_32>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-32: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + auto result{conv32.to_bytes(s)}; + return std::basic_string<utf8_t>(result.begin(), result.end()); } + template<> + std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s) { - // Performance test UTF-16 -> UTF-16 - auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u16list) { - std::u16string s{unicode::convert<unicode::UTF_16, unicode::UTF_16>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-16: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + return conv16.from_bytes(conv32.to_bytes(s)); } + template<> + std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s) { - // Performance test UTF-16 -> UTF-8 - auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u16list) { - std::basic_string<utf8_t> s{unicode::convert<unicode::UTF_16, unicode::UTF_8>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-8: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + return s; } +} +template<typename From, typename ToTypesCollectionType, size_t index = 0> +void test_random_valid(random_context& rc, size_t length, const std::string& description) +{ + typedef typename std::tuple_element<index,ToTypesCollectionType>::type To; + + // Fill UTF-32 data list: source for tests + std::vector<std::u32string> u32list; + std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); + + // Fill From data list + std::vector<From> list; + std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){ + return unicode::convert<unicode::UTF_32, typename unicode::Encoding<typename From::value_type>::Facet>(s); + }); + + for (int i = 0; i < list.size(); i++) { + BOOST_CHECK(list[i].size() >= u32list[i].size()); + To result{unicode::convert<typename unicode::Encoding<typename From::value_type>::Facet,typename unicode::Encoding<typename To::value_type>::Facet>(list[i])}; + BOOST_CHECK(result.size() >= u32list[i].size()); + } + { - // Performance test UTF-8 -> UTF-32 auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u8list) { - std::u32string s{unicode::convert<unicode::UTF_8, unicode::UTF_32>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-32: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + for (const auto& i: list) + To result{unicode::convert<typename unicode::Encoding<typename From::value_type>::Facet,typename unicode::Encoding<typename To::value_type>::Facet>(i)}; + std::cout << "Performance test for converting " << list.size() << + " " << description << + " from UTF-" << (sizeof(typename From::value_type) * 8) << + " to UTF-" << (sizeof(typename To::value_type) * 8) << ": " << + std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << "s" << + std::endl; } - + { - // Performance test UTF-8 -> UTF-16 auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u8list) { - std::u16string s{unicode::convert<unicode::UTF_8, unicode::UTF_16>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-16: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + for (const auto& i: list) + To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)}; + std::cout << " -> Compare to boost::locale::conv::utf_to_utf: " << + std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << "s" << + std::endl; } { - // Performance test UTF-8 -> UTF-8 auto t0{std::chrono::steady_clock::now()}; - for (const auto& i : u8list) { - std::basic_string<utf8_t> s{unicode::convert<unicode::UTF_8, unicode::UTF_8>(i)}; - } - std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-8: " << std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << std::endl; + for (const auto& i: list) + To result{std_convert<typename From::value_type, typename To::value_type>(i)}; + std::cout << " -> Compare to std::wstring_convert: " << + std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count() << "s" << + std::endl; } + // iterate over remaining To types + if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value) + test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) +{ + random_context rc; + + test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings"); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) +{ + random_context rc{127}; + + test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings"); } // Test ISO and UTF encodings |