summaryrefslogtreecommitdiffhomepage
path: root/src/test-unicode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r--src/test-unicode.cpp313
1 files changed, 2 insertions, 311 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 6eb523e..1ea704b 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -27,6 +27,8 @@
#include <unicode.h>
+#include "test-helper.h"
+
using namespace std::chrono_literals;
using namespace std::string_literals;
@@ -95,123 +97,6 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t {
U"\x10000000", // invalid unicode (number too big)
};
-// output operators must be in same namespace as the type itself
-namespace std {
-
-#ifdef __cpp_char8_t
- std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s)
- {
- os << "[";
- for (auto& c: s)
- os << " " << std::to_string(static_cast<uint8_t>(c));
- os << "]";
-
- return os;
- }
-#endif
-
- std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s)
- {
- os << "[";
- for (auto& c: s)
- os << " " << std::to_string(static_cast<uint16_t>(c));
- os << "]";
-
- return os;
- }
-
- std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s)
- {
- os << "[";
- for (auto& c: s)
- os << " " << std::to_string(static_cast<uint32_t>(c));
- os << "]";
-
- return os;
- }
-
-} // namespace std
-
-namespace {
-
- // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert
- template<class Facet>
- struct deletable_facet : Facet
- {
- template<class ...Args>
- deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {}
- ~deletable_facet() {}
- };
-
- // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?)
- std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16;
- std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32;
-
- template<typename From, typename To>
- std::basic_string<To> std_convert(const std::basic_string<From>& s);
-
- template<>
- std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s)
- {
- std::string a{s.begin(), s.end()};
- a = conv32.to_bytes(conv32.from_bytes(a));
- return std::basic_string<utf8_t>{a.begin(), a.end()};
- }
-
- template<>
- std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s)
- {
- std::string a{s.begin(), s.end()};
- return conv16.from_bytes(a);
- }
-
- template<>
- std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s)
- {
- std::string a{s.begin(), s.end()};
- return conv32.from_bytes(a);
- }
-
- template<>
- std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s)
- {
- auto result{conv16.to_bytes(s)};
- return std::basic_string<utf8_t>(result.begin(), result.end());
- }
-
- template<>
- std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s)
- {
- return conv16.from_bytes(conv16.to_bytes(s));
- }
-
- template<>
- std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s)
- {
- return conv32.from_bytes(conv16.to_bytes(s));
- }
-
- template<>
- std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s)
- {
- auto result{conv32.to_bytes(s)};
- return std::basic_string<utf8_t>(result.begin(), result.end());
- }
-
- template<>
- std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s)
- {
- return conv16.from_bytes(conv32.to_bytes(s));
- }
-
- template<>
- std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s)
- {
- return conv32.from_bytes(conv32.to_bytes(s));
- }
-
-} // namespace
-
// check assumptions about environment
BOOST_AUTO_TEST_CASE(string_u8string)
{
@@ -457,200 +342,6 @@ BOOST_AUTO_TEST_CASE(is_valid_unicode)
BOOST_CHECK(!unicode::is_valid_unicode(0xDFFF));
}
-struct random_context {
- random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {}
- std::random_device rd; // OS random number engine to seed RNG (below)
- std::mt19937 gen{rd()};
- std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units
- std::uniform_int_distribution<unsigned long> code_point_distribution;
-};
-
-// generates valid and invalid strings of different type
-template<typename T>
-T generate_random_invalid(random_context& rc, size_t length)
-{
- // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC
- std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value
- T result;
- std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));});
-
- return result;
-}
-
-char32_t generate_random_char(random_context& rc)
-{
- auto result {rc.code_point_distribution(rc.gen)};
- if (result >= 0xD800)
- result += 0x800;
- return static_cast<char32_t>(result);
-}
-
-std::u32string generate_random_string(random_context& rc, size_t length)
-{
- std::u32string result;
- std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);});
-
- return result;
-}
-
-template<typename From, typename ToTypesCollectionType, size_t i = 0>
-void test_random_invalid(random_context& rc, size_t length)
-{
- //std::cerr << "LENGTH: " << length << std::endl;
- typedef typename std::tuple_element<i,ToTypesCollectionType>::type To;
-
- From r {static_cast<From>(generate_random_invalid<From>(rc, length))};
-
- // base type interface
- try {
- To result{unicode::convert<typename From::value_type,typename To::value_type>(r)};
-
- if (r.empty()) {
- BOOST_CHECK(result.empty());
- } else {
- BOOST_CHECK(!result.empty());
- }
- } catch (const std::invalid_argument&) {
- // OK: this is an expected exception for convert() on bad input
- } catch (const std::exception& ex) {
- BOOST_ERROR("Unexpected error on convert(): " << ex.what());
- }
-
- // container type interface
- try {
- To result{unicode::convert<From, To>(r)};
-
- if (r.empty()) {
- BOOST_CHECK(result.empty());
- } else {
- BOOST_CHECK(!result.empty());
- }
- } catch (const std::invalid_argument&) {
- // OK: this is an expected exception for convert() on bad input
- } catch (const std::exception& ex) {
- BOOST_ERROR("Unexpected error on convert(): " << ex.what());
- }
-
- // encoding interface
- try {
- To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)};
-
- if (r.empty()) {
- BOOST_CHECK(result.empty());
- } else {
- BOOST_CHECK(!result.empty());
- }
- } catch (const std::invalid_argument&) {
- // OK: this is an expected exception for convert() on bad input
- } catch (const std::exception& ex) {
- BOOST_ERROR("Unexpected error on convert(): " << ex.what());
- }
-
- // iterate over remaining To types
- if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value)
- test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length);
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type)
-{
- random_context rc;
-
- for (int i = 0; i < 10; i++) {
- test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen));
- }
-}
-
-class CPUTimer
-{
-public:
- CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now())
- {
- }
-
- ~CPUTimer()
- {
-#if BOOST_VERSION > 106700
- auto elapsed_cpu{mCPUTimer.elapsed()};
-#endif
- std::cout << mName << ": " << std::chrono::duration<double>(std::chrono::steady_clock::now() - mWallTime0).count() <<
- "s" <<
-#if BOOST_VERSION > 106700
- " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" <<
-#endif
- std::endl;
- }
-
-private:
- std::string mName;
- std::chrono::time_point<std::chrono::steady_clock> mWallTime0;
-#if BOOST_VERSION > 106700
- boost::timer::cpu_timer mCPUTimer;
-#endif
-};
-
-template<typename From, typename ToTypesCollectionType, size_t index = 0>
-void test_random_valid(random_context& rc, size_t length, const std::string& description)
-{
- typedef typename std::tuple_element<index,ToTypesCollectionType>::type To;
-
- // Fill UTF-32 data list: source for tests
- std::vector<std::u32string> u32list;
- std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));});
-
- // Fill From data list
- std::vector<From> list;
- std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){
- return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s);
- });
-
- for (size_t i = 0; i < list.size(); i++) {
- BOOST_CHECK(list[i].size() >= u32list[i].size());
- To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])};
- BOOST_CHECK(result.size() >= u32list[i].size());
- auto boost_result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(list[i])};
- BOOST_CHECK_EQUAL(result, boost_result);
- }
-
- {
- CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) +
- " "s + description +
- " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) +
- " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8));
- for (const auto& i: list)
- To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(i)};
- }
-
- {
- CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf");
- for (const auto& i: list)
- To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)};
- }
-
- {
- CPUTimer timer(" -> Compare to std::wstring_convert");
- for (const auto& i: list)
- To result{std_convert<typename From::value_type, typename To::value_type>(i)};
- }
-
- // iterate over remaining To types
- if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value)
- test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description);
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type)
-{
- random_context rc{127};
-
- test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings");
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type)
-{
- random_context rc;
-
- test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings");
-}
-
// Test ISO encodings
BOOST_AUTO_TEST_CASE(convert_iso)
{