From 563557be9c97496b7435bef4e64730a379e55037 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Tue, 28 Dec 2021 14:35:36 +0100 Subject: UTF and ISO are Encodings, not Facets --- include/unicode.h | 29 ++++++++++++++++------------- src/test-unicode.cpp | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/include/unicode.h b/include/unicode.h index be91d77..4e4c7eb 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -576,7 +576,7 @@ namespace unicode { typename iso_back_insert_iterator::string_type& s; }; - // Facet for convert() and ISO-8859-* + // Encoding for convert() and ISO-8859-* template struct ISO_8859 { @@ -599,7 +599,7 @@ namespace unicode { } }; - // Facet for convert() and UTF-* + // Encoding for convert() and UTF-* template struct UTF { @@ -622,7 +622,7 @@ namespace unicode { } }; - // Facet for convert() + // Encoding for convert() typedef ISO_8859, iso_back_insert_iterator<>> ISO_8859_1; typedef ISO_8859, iso_back_insert_iterator> ISO_8859_15; @@ -731,7 +731,7 @@ namespace unicode { }; // class ArchitectureOptimizer - // From and To are facets + // From and To are Encodings template::value, bool> = true> typename To::string_type convert_optimized(const typename From::string_type& s) { @@ -777,7 +777,7 @@ namespace unicode { return result; } - // From and To are facets + // From and To are Encodings template::value, bool> = true> typename To::string_type convert(const typename From::string_type& s) { @@ -799,7 +799,7 @@ namespace unicode { } } - // Helper to get correct Facet from char type, e.g. Encoding::Facet + // Helper to get correct Encoding from char type, e.g. Encoding::type or Encoding_t template struct Encoding { @@ -808,21 +808,24 @@ namespace unicode { template<> struct Encoding { - typedef UTF_8 Facet; + typedef UTF_8 type; }; template<> struct Encoding { - typedef UTF_16 Facet; + typedef UTF_16 type; }; template<> struct Encoding { - typedef UTF_32 Facet; + typedef UTF_32 type; }; + template + using Encoding_t = typename Encoding::type; + // From and To are from: utf8_t (i.e. char or char8_t (C++20)), char16_t and char32_t, char, wchar_t, uint8_t, uint16_t, uint32_t template, @@ -884,11 +887,11 @@ namespace unicode { return true; } - // Facet version - template::value, bool> = true> - bool is_valid_utf(const typename Facet::string_type& s) + // Encoding version + template::value, bool> = true> + bool is_valid_utf(const typename Encoding::string_type& s) { - return validate_utf(s); + return validate_utf(s); } } // namespace unicode diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 99e164b..70ec453 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -281,8 +281,8 @@ void test_utf_to_utf(std::tuple& t) BOOST_CHECK_MESSAGE(std::get(t) == result, "Container: From " << typeid(From).name() << "(" << i << ", " << std::get(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get(t) << "), got " << result); // test facet interface - result = unicode::convert::Facet, typename unicode::Encoding::Facet>(std::get(t)); - BOOST_CHECK_MESSAGE(std::get(t) == result, "Facet: From " << typeid(From).name() << "(" << i << ", " << std::get(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get(t) << "), got " << result); + result = unicode::convert, typename unicode::Encoding_t>(std::get(t)); + BOOST_CHECK_MESSAGE(std::get(t) == result, "Encoding: From " << typeid(From).name() << "(" << i << ", " << std::get(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get(t) << "), got " << result); // test actual results by comparing with boost::locale::conv results BOOST_CHECK_EQUAL(result, (boost::locale::conv::utf_to_utf(std::get(t)))); @@ -317,9 +317,9 @@ void test_is_valid_utf(std::tuple& t) result = unicode::is_valid_utf(std::get(t)); BOOST_CHECK_MESSAGE(result == true, "is_valid_utf w/ " << typeid(T).name() << "(" << i << ", " << std::get(t) << "), got " << result); - // test via Facet - result = unicode::is_valid_utf::Facet>(std::get(t)); - BOOST_CHECK_MESSAGE(result == true, "is_valid_utf w/ " << typeid(typename unicode::Encoding::Facet).name() << "(" << i << ", " << std::get(t) << "), got " << result); + // test via Encoding + result = unicode::is_valid_utf>(std::get(t)); + BOOST_CHECK_MESSAGE(result == true, "is_valid_utf w/ " << typeid(typename unicode::Encoding_t).name() << "(" << i << ", " << std::get(t) << "), got " << result); // iterate over other combinations if constexpr (i + 1 < std::tuple_size::type>::value) @@ -350,7 +350,7 @@ void test_utf_to_utf_failure(std::basic_string& s) // via container try { - (void) unicode::convert::Facet::string_type, typename unicode::Encoding::Facet::string_type>(s); + (void) unicode::convert::string_type, typename unicode::Encoding_t::string_type>(s); BOOST_ERROR("Container type: Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); } catch (const std::invalid_argument&) { // OK: this is an expected exception for convert() on bad input @@ -360,8 +360,8 @@ void test_utf_to_utf_failure(std::basic_string& s) // via facet try { - (void) unicode::convert::Facet,typename unicode::Encoding::Facet>(s); - BOOST_ERROR("Facet: Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); + (void) unicode::convert,typename unicode::Encoding_t>(s); + BOOST_ERROR("Encoding: Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name()); } catch (const std::invalid_argument&) { // OK: this is an expected exception for convert() on bad input } catch (const std::exception& ex) { @@ -393,7 +393,7 @@ void test_is_valid_utf_failure(std::basic_string& s) BOOST_CHECK_MESSAGE(unicode::is_valid_utf>(s) == false, "Expected bad UTF at index: " << index << ", " << typeid(T).name()); - BOOST_CHECK_MESSAGE(unicode::is_valid_utf::Facet>(s) == false, "Expected bad UTF at index: " << index << ", " << typeid(typename unicode::Encoding::Facet).name()); + BOOST_CHECK_MESSAGE(unicode::is_valid_utf>(s) == false, "Expected bad UTF at index: " << index << ", " << typeid(typename unicode::Encoding_t).name()); // iterate over remaining types if constexpr (index + 1 < std::tuple_size::value) @@ -506,7 +506,7 @@ void test_random_invalid(random_context& rc, size_t length) // facet interface try { - To result{unicode::convert::Facet,typename unicode::Encoding::Facet>(r)}; + To result{unicode::convert,typename unicode::Encoding_t>(r)}; if (r.empty()) { BOOST_CHECK(result.empty()); @@ -573,12 +573,12 @@ void test_random_valid(random_context& rc, size_t length, const std::string& des // Fill From data list std::vector list; std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){ - return unicode::convert::Facet>(s); + return unicode::convert>(s); }); for (int i = 0; i < list.size(); i++) { BOOST_CHECK(list[i].size() >= u32list[i].size()); - To result{unicode::convert::Facet,typename unicode::Encoding::Facet>(list[i])}; + To result{unicode::convert,typename unicode::Encoding_t>(list[i])}; BOOST_CHECK(result.size() >= u32list[i].size()); auto boost_result{boost::locale::conv::utf_to_utf(list[i])}; BOOST_CHECK_EQUAL(result, boost_result); @@ -590,7 +590,7 @@ void test_random_valid(random_context& rc, size_t length, const std::string& des " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) + " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8)); for (const auto& i: list) - To result{unicode::convert::Facet,typename unicode::Encoding::Facet>(i)}; + To result{unicode::convert,typename unicode::Encoding_t>(i)}; } { -- cgit v1.2.3