From 7aff3a1a8439e1465e4e5ca99fa4d1e18fe3df38 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Fri, 17 Dec 2021 20:04:09 +0100 Subject: Added tests --- src/test-unicode.cpp | 175 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 158 insertions(+), 17 deletions(-) diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index c793399..29e5c2e 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -53,7 +53,9 @@ std::vector> failure_strings_char16_t { }; std::vector> failure_strings_char32_t { + U"\xD800 and more text", // invalid unicode (surrogate half) U"blabla \xD801", // invalid unicode (surrogate half) + U"moreblabla \xDFFF", // invalid unicode (surrogate half) U"\x10000000", // invalid unicode (number too big) }; @@ -259,26 +261,44 @@ struct random_context { std::random_device rd; // OS random number engine to seed RNG (below) std::mt19937 gen{rd()}; std::uniform_int_distribution sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units + std::uniform_int_distribution code_point_distribution{0, 0x10FFFF - 0x800}; }; +// generates valid and invalid strings of different type template -T generate_random(random_context& rc, size_t length) +T generate_random_invalid(random_context& rc, size_t length) { // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC - std::uniform_int_distribution code_unit(std::numeric_limits::max()); // code unit value + std::uniform_int_distribution code_unit{0, std::numeric_limits::max()}; // code unit value T result; std::generate_n(std::back_inserter(result), length, [&](){return static_cast(code_unit(rc.gen));}); return result; } +char32_t generate_random_char(random_context& rc) +{ + auto result {rc.code_point_distribution(rc.gen)}; + if (result >= 0xD800) + result += 0x800; + return static_cast(result); +} + +std::u32string generate_random_string(random_context& rc, size_t length) +{ + std::u32string result; + std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);}); + + return result; +} + template void test_random(random_context& rc, size_t length) { //std::cerr << "LENGTH: " << length << std::endl; typedef typename std::tuple_element::type To; - From r {static_cast(generate_random(rc, length))}; + From r {static_cast(generate_random_invalid(rc, length))}; // base type interface try { @@ -330,28 +350,139 @@ void test_random(random_context& rc, size_t length) test_random(rc, length); } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type) { random_context rc; - int i{}; - // run for 1s (debug) 10s (release) = total time for all random_sequences types! -#ifdef _DEBUG - const auto timeout{1.0s}; -#else - const auto timeout{10.0s}; -#endif + for (int i = 0; i < 10; i++) { + test_random(rc, rc.sequence_length(rc.gen)); + } +} - auto timeout_stamp { std::chrono::steady_clock::now() + (timeout / std::tuple_size::value)}; +BOOST_AUTO_TEST_CASE(random_sequences_valid) +{ + random_context rc; - while (!(std::chrono::steady_clock::now() > timeout_stamp)) { - test_random(rc, rc.sequence_length(rc.gen)); - i++; + // Fill UTF-32 data list + std::vector u32list; + std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); + + // Fill UTF-16 data list + std::vector u16list; + std::transform(u32list.begin(), u32list.end(), std::back_inserter(u16list), [](const std::u32string& s){return unicode::convert(s);}); + + // Fill UTF-8 data list + std::vector u8list; + std::transform(u32list.begin(), u32list.end(), std::back_inserter(u8list), [](const std::u32string& s){return unicode::convert(s);}); + + for (const auto& i : u32list) { + std::u32string s32{unicode::convert(i)}; + BOOST_CHECK(s32.size() == i.size()); + std::u16string s16{unicode::convert(i)}; + BOOST_CHECK(s16.size() >= i.size()); + std::u8string s8{unicode::convert(i)}; + BOOST_CHECK(s8.size() >= i.size()); + } + + for (const auto& i : u16list) { + std::u32string s32{unicode::convert(i)}; + BOOST_CHECK(s32.size() > 0 || i.size() == 0); + std::u16string s16{unicode::convert(i)}; + BOOST_CHECK(s16.size() == i.size()); + std::u8string s8{unicode::convert(i)}; + BOOST_CHECK(s8.size() >= i.size()); + } + + for (const auto& i : u8list) { + std::u32string s32{unicode::convert(i)}; + BOOST_CHECK(s32.size() > 0 || i.size() == 0); + std::u16string s16{unicode::convert(i)}; + BOOST_CHECK(s16.size() > 0 || i.size() == 0); + std::u8string s8{unicode::convert(i)}; + BOOST_CHECK(s8.size() == i.size()); + } + + { + // Performance test UTF-32 -> UTF-32 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u32list) { + std::u32string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-32: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; } - BOOST_CHECK_MESSAGE(i > 1, "Not enough iterations done!"); + { + // Performance test UTF-32 -> UTF-16 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u32list) { + std::u16string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-16: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-32 -> UTF-8 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u32list) { + std::u8string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-32 to UTF-8: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-16 -> UTF-32 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u16list) { + std::u32string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-32: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-16 -> UTF-16 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u16list) { + std::u16string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-16: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-16 -> UTF-8 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u16list) { + std::u8string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-16 to UTF-8: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-8 -> UTF-32 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u8list) { + std::u32string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-32: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-8 -> UTF-16 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u8list) { + std::u16string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-16: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } + + { + // Performance test UTF-8 -> UTF-8 + auto t0{std::chrono::steady_clock::now()}; + for (const auto& i : u8list) { + std::u8string s{unicode::convert(i)}; + } + std::cout << "Performance test for converting 1M strings from UTF-8 to UTF-8: " << std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << std::endl; + } - std::cout << "random_sequences: Completed " << i << " iterations for long random code unit sequences for " << typeid(typename T::value_type).name() << std::endl; } // Test ISO and UTF encodings @@ -366,6 +497,15 @@ BOOST_AUTO_TEST_CASE(convert) BOOST_CHECK_THROW(((void)std::string{unicode::convert("\xa4")}), std::invalid_argument); // € not available in ISO-8859-1 + BOOST_CHECK_THROW(((void)std::string{unicode::convert(u8"\u20ac")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert(u"\u20ac")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert(U"\u20ac")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert(u8"\u732b")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert(u"\u732b")}), std::invalid_argument); + BOOST_CHECK_THROW(((void)std::string{unicode::convert(U"\u732b")}), std::invalid_argument); + + BOOST_CHECK_THROW((unicode::convert(std::u32string{(char32_t*)"\x00\xD8\x00\x00\x00\x00\x00\x00"})) , std::invalid_argument); + BOOST_CHECK((unicode::convert(u8"abc")) == std::u16string{u"abc"}); BOOST_CHECK((unicode::convert(U"abc")) == std::u16string{u"abc"}); @@ -431,6 +571,7 @@ BOOST_AUTO_TEST_CASE(is_valid_utf) BOOST_CHECK(unicode::is_valid_utf(u8"äöü")); } +// check assumptions about environment BOOST_AUTO_TEST_CASE(string_u8string) { std::string a{"\xc3\xa4"}; -- cgit v1.2.3