summaryrefslogtreecommitdiffhomepage
path: root/src/test-unicode.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-28 12:46:30 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-28 12:46:30 +0100
commit403c885d67f79c637ebcb303722adfd6a4b8195e (patch)
treed8f40c674a5c65176e028a1c7bb9122baa2e7756 /src/test-unicode.cpp
parent970ba4111160fbf78351b21a024c46c0978e0440 (diff)
Optimize UTF validation
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r--src/test-unicode.cpp77
1 files changed, 58 insertions, 19 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 2675989..99e164b 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -283,6 +283,9 @@ void test_utf_to_utf(std::tuple<Ts...>& t)
// test facet interface
result = unicode::convert<typename unicode::Encoding<typename From::value_type>::Facet, typename unicode::Encoding<typename To::value_type>::Facet>(std::get<i>(t));
BOOST_CHECK_MESSAGE(std::get<j>(t) == result, "Facet: From " << typeid(From).name() << "(" << i << ", " << std::get<i>(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get<j>(t) << "), got " << result);
+
+ // test actual results by comparing with boost::locale::conv results
+ BOOST_CHECK_EQUAL(result, (boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(std::get<i>(t))));
// iterate over other combinations
if constexpr (i + 1 < std::tuple_size<typename std::remove_reference<decltype(t)>::type>::value)
@@ -650,26 +653,10 @@ BOOST_AUTO_TEST_CASE(convert)
BOOST_CHECK((unicode::convert<char, char32_t>("äöü")) == std::u32string{U"äöü"});
-#ifdef _WIN32
- BOOST_CHECK(sizeof(wchar_t) == 2);
-#else // Unix like
- BOOST_CHECK(sizeof(wchar_t) == 4);
-#endif
-
- // For the following checks, wchar_t size and encoding is system dependent:
- // Windows: UTF-16
- // Linux: UTF-32
- BOOST_CHECK((unicode::convert<char, wchar_t>("äöü")) == std::wstring{L"äöü"});
- BOOST_CHECK((unicode::convert<char, wchar_t>("\u732b")) == std::wstring{L"\u732b"});
- BOOST_CHECK((unicode::convert<char, wchar_t>("\U0001F63A")) == std::wstring{L"\U0001F63A"});
- BOOST_CHECK((unicode::convert<wchar_t, char32_t>(L"\U0001F63A")) == std::u32string{U"\U0001F63A"});
- BOOST_CHECK((unicode::convert<wchar_t, utf8_t>(L"\U0001F63A")) == std::basic_string<utf8_t>{(utf8_t*)"\U0001F63A"});
+ // vector
+ BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<char16_t>>(std::vector<char>{})) == std::vector<char16_t>{});
+ BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<char16_t>>(std::vector<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::vector<char16_t>{u'ä', u'ö', u'ü'}));
- BOOST_CHECK((unicode::convert<std::string, std::wstring>(std::string{"äöü"})) == std::wstring{L"äöü"});
-
- BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{})) == std::vector<wchar_t>{});
- BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::vector<wchar_t>{L'ä', L'ö', L'ü'}));
-
// deque
BOOST_CHECK((unicode::convert<std::deque<char>, std::deque<wchar_t>>(std::deque<char>{})) == std::deque<wchar_t>{});
BOOST_CHECK((unicode::convert<std::deque<char>, std::deque<wchar_t>>(std::deque<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::deque<wchar_t>{L'ä', L'ö', L'ü'}));
@@ -703,6 +690,58 @@ BOOST_AUTO_TEST_CASE(convert)
BOOST_CHECK((unicode::convert<std::array<uint8_t, 6>, std::list<uint16_t>>(std::array<uint8_t, 6>{0xc3, 0xa4, 0xc3, 0xb6, 0xc3, 0xbc})) == (std::list<uint16_t>{L'ä', L'ö', L'ü'}));
}
+// wchar_t specific tests: system dependent
+BOOST_AUTO_TEST_CASE(convert_wstring)
+{
+#ifdef _WIN32
+ BOOST_CHECK(sizeof(wchar_t) == 2);
+#else // Unix like
+ BOOST_CHECK(sizeof(wchar_t) == 4);
+#endif
+
+ // For the following checks, wchar_t size and encoding is system dependent:
+ // Windows: UTF-16
+ // Linux: UTF-32
+ BOOST_CHECK((unicode::convert<char, wchar_t>("äöü")) == std::wstring{L"äöü"});
+ BOOST_CHECK((unicode::convert<char, wchar_t>("\u732b")) == std::wstring{L"\u732b"});
+ BOOST_CHECK((unicode::convert<char, wchar_t>("\U0001F63A")) == std::wstring{L"\U0001F63A"});
+ BOOST_CHECK((unicode::convert<wchar_t, char32_t>(L"\U0001F63A")) == std::u32string{U"\U0001F63A"});
+ BOOST_CHECK((unicode::convert<wchar_t, utf8_t>(L"\U0001F63A")) == std::basic_string<utf8_t>{(utf8_t*)"\U0001F63A"});
+
+ BOOST_CHECK((unicode::convert<std::string, std::wstring>(std::string{"äöü"})) == std::wstring{L"äöü"});
+
+ BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{})) == std::vector<wchar_t>{});
+ BOOST_CHECK((unicode::convert<std::vector<char>, std::vector<wchar_t>>(std::vector<char>{'\xc3', '\xa4', '\xc3', '\xb6', '\xc3', '\xbc'})) == (std::vector<wchar_t>{L'ä', L'ö', L'ü'}));
+
+ std::u16string u16_value{u"\U0001F63A"};
+ std::u32string u32_value{U"\U0001F63A"};
+ std::wstring w_value{L"\U0001F63A"};
+
+ std::u16string result_u16_value{unicode::convert<std::wstring, std::u16string>(w_value)};
+ std::u32string result_u32_value{unicode::convert<std::wstring, std::u32string>(w_value)};
+ std::wstring result_w_value_1{unicode::convert<std::u16string, std::wstring>(u16_value)};
+ std::wstring result_w_value_2{unicode::convert<std::u32string, std::wstring>(u32_value)};
+
+ BOOST_CHECK_EQUAL(u16_value.size(), 2);
+ BOOST_CHECK_EQUAL(u32_value.size(), 1);
+ BOOST_CHECK_EQUAL(result_u16_value.size(), 2);
+ BOOST_CHECK_EQUAL(result_u32_value.size(), 1);
+ BOOST_CHECK_EQUAL(u16_value, result_u16_value);
+ BOOST_CHECK_EQUAL(u32_value, result_u32_value);
+ BOOST_CHECK(w_value == result_w_value_1);
+ BOOST_CHECK(w_value == result_w_value_2);
+#ifdef _WIN32
+ BOOST_CHECK_EQUAL(w_value.size(), 2);
+ BOOST_CHECK_EQUAL(result_w_value_1.size(), 2);
+ BOOST_CHECK_EQUAL(result_w_value_2.size(), 2);
+#else // Unix like
+ BOOST_CHECK_EQUAL(w_value.size(), 1);
+ BOOST_CHECK_EQUAL(result_w_value_1.size(), 1);
+ BOOST_CHECK_EQUAL(result_w_value_2.size(), 1);
+#endif
+
+}
+
BOOST_AUTO_TEST_CASE(is_valid_utf)
{
BOOST_CHECK(unicode::is_valid_utf<char16_t>(u"äöü"));