#define BOOST_TEST_MODULE unicode_test #include #include #include #include #include #include #if BOOST_VERSION > 106700 // CPU Timer in Debian 10 boost is broken, so leave it to std::chrono wall clock #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "test-helper.h" using namespace std::chrono_literals; using namespace std::string_literals; typedef std::tuple, std::basic_string, std::basic_string> types_collection_type; // LCG for generating deterministic mixed data, see also https://arxiv.org/pdf/2001.05304.pdf uint8_t generate_byte() { static uint64_t x{1}; const static uint32_t a{0x915f77f5}; const static uint32_t c{12345}; const static uint32_t m_mask{0xFFFFFFFF}; x = (x * a + c) & m_mask; return (x >> 16) & 0xFF; } // max is inclusive template T generate_value(T max = std::numeric_limits::max()) { uint64_t max_modulo{ static_cast(0x100000000ULL) - (0x100000000ULL % (max + 1))}; uint32_t value{}; do { for (int i = 0; i < sizeof(value); ++i) { value = (value << 8) | generate_byte(); } } while (static_cast(value) >= max_modulo); return static_cast(value % (max + 1)); } // generates valid and invalid strings of different type template T generate_string_invalid(size_t length) { T result; std::generate_n(std::back_inserter(result), length, [&](){return generate_value();}); return result; } char32_t generate_char(char32_t max = 0x10FFFF - 0x800) { char32_t result {generate_value(max)}; if (result >= 0xD800) result += 0x800; return static_cast(result); } std::u32string generate_string(char32_t max, size_t length) { std::u32string result; std::generate_n(std::back_inserter(result), length, [&](){return generate_char(max);}); return result; } template void test_string_invalid(size_t length) { //std::cerr << "LENGTH: " << length << std::endl; typedef typename std::tuple_element::type To; From r {static_cast(generate_string_invalid(length))}; // base type interface try { To result{unicode::convert(r)}; if (r.empty()) { BOOST_CHECK(result.empty()); } else { BOOST_CHECK(!result.empty()); } } catch (const std::invalid_argument&) { // OK: this is an expected exception for convert() on bad input } catch (const std::exception& ex) { BOOST_ERROR("Unexpected error on convert(): " << ex.what()); } // container type interface try { To result{unicode::convert(r)}; if (r.empty()) { BOOST_CHECK(result.empty()); } else { BOOST_CHECK(!result.empty()); } } catch (const std::invalid_argument&) { // OK: this is an expected exception for convert() on bad input } catch (const std::exception& ex) { BOOST_ERROR("Unexpected error on convert(): " << ex.what()); } // encoding interface try { To result{unicode::convert,typename unicode::Encoding_t>(r)}; if (r.empty()) { BOOST_CHECK(result.empty()); } else { BOOST_CHECK(!result.empty()); } } catch (const std::invalid_argument&) { // OK: this is an expected exception for convert() on bad input } catch (const std::exception& ex) { BOOST_ERROR("Unexpected error on convert(): " << ex.what()); } // iterate over remaining To types if constexpr (i + 1 < std::tuple_size::value) test_string_invalid(length); } BOOST_AUTO_TEST_CASE_TEMPLATE(sequences_invalid, T, types_collection_type) { for (int i = 0; i < 10; i++) { test_string_invalid(generate_value(100000)); } } class CPUTimer { public: CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now()) { } ~CPUTimer() { #if BOOST_VERSION > 106700 auto elapsed_cpu{mCPUTimer.elapsed()}; #endif std::cout << mName << ": " << std::chrono::duration(std::chrono::steady_clock::now() - mWallTime0).count() << "s" << #if BOOST_VERSION > 106700 " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" << #endif std::endl; } private: std::string mName; std::chrono::time_point mWallTime0; #if BOOST_VERSION > 106700 boost::timer::cpu_timer mCPUTimer; #endif }; template void test_string_valid(char32_t max, size_t length, const std::string& description) { typedef typename std::tuple_element::type To; // Fill UTF-32 data list: source for tests std::vector u32list; std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_string(max, generate_value(100000));}); // Fill From data list std::vector list; std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){ return unicode::convert>(s); }); for (size_t i = 0; i < list.size(); i++) { BOOST_CHECK(list[i].size() >= u32list[i].size()); To result{unicode::convert,typename unicode::Encoding_t>(list[i])}; BOOST_CHECK(result.size() >= u32list[i].size()); auto boost_result{boost::locale::conv::utf_to_utf(list[i])}; BOOST_CHECK_EQUAL(result, boost_result); } { CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) + " "s + description + " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) + " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8)); for (const auto& i: list) To result{unicode::convert,typename unicode::Encoding_t>(i)}; } { CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf"); for (const auto& i: list) To result{boost::locale::conv::utf_to_utf(i)}; } { CPUTimer timer(" -> Compare to std::wstring_convert"); for (const auto& i: list) To result{std_convert(i)}; } // iterate over remaining To types if constexpr (index + 1 < std::tuple_size::value) test_string_valid(max, length, description); } BOOST_AUTO_TEST_CASE_TEMPLATE(sequences_valid_ascii, T, types_collection_type) { test_string_valid(127, generate_value(100000), "ASCII only strings"); } BOOST_AUTO_TEST_CASE_TEMPLATE(sequences_valid_all_unicode, T, types_collection_type) { test_string_valid(0x10FFFF - 0x800, generate_value(100000), "All Unicode strings"); }