summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-30 17:10:49 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-30 17:10:49 +0100
commitb9833fd7e6c6f3373978d56f7a7b00ebdb02a94b (patch)
treeb5d9071e9a7fe8b0b1c84ea44c02bd9942d32e89
parentc5b5aadd8ac8faeeb9e04076166df4d685633cf6 (diff)
Extended tests
-rw-r--r--src/test-unicode.cpp58
1 files changed, 53 insertions, 5 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 70ec453..7216cff 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -38,11 +38,37 @@ typedef std::tuple<std::basic_string<utf8_t>, std::basic_string<char16_t>, std::
// Success cases: convert string to all other types, respectively
std::vector<types_collection_type> success_sets {
SUCCESS_TUPLE(""),
+
+ // various string
SUCCESS_TUPLE("ASCII string1"),
SUCCESS_TUPLE("Täst just looks like German"),
SUCCESS_TUPLE("\u732b is chinese for cat"),
SUCCESS_TUPLE("\U0001F63A"),
SUCCESS_TUPLE("\U0001F63A is a smiling cat"),
+
+ // separators
+ SUCCESS_TUPLE("abc\r\ndef"),
+ SUCCESS_TUPLE("äöü\0\u20ac"),
+
+ // optimization relevant strings
+ SUCCESS_TUPLE("01234567\u20ac01234567"),
+ SUCCESS_TUPLE("0123456\u20ac01234567"),
+ SUCCESS_TUPLE("012345\u20ac01234567"),
+ SUCCESS_TUPLE("01234\u20ac01234567"),
+ SUCCESS_TUPLE("0123\u20ac01234567"),
+ SUCCESS_TUPLE("012\u20ac01234567"),
+ SUCCESS_TUPLE("01\u20ac01234567"),
+ SUCCESS_TUPLE("0\u20ac01234567"),
+ SUCCESS_TUPLE("\u20ac01234567"),
+ SUCCESS_TUPLE("0123456701234567\u20ac0123456701234567"),
+ SUCCESS_TUPLE("012345670123456\u20ac0123456701234567"),
+ SUCCESS_TUPLE("01234567012345\u20ac0123456701234567"),
+ SUCCESS_TUPLE("0123456701234\u20ac0123456701234567"),
+ SUCCESS_TUPLE("012345670123\u20ac0123456701234567"),
+ SUCCESS_TUPLE("01234567012\u20ac0123456701234567"),
+ SUCCESS_TUPLE("0123456701\u20ac0123456701234567"),
+ SUCCESS_TUPLE("012345670\u20ac0123456701234567"),
+ SUCCESS_TUPLE("01234567\u20ac0123456701234567"),
};
// Error cases: throwing upon convert to all other types
@@ -624,29 +650,51 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_colle
test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings");
}
-// Test ISO and UTF encodings
-BOOST_AUTO_TEST_CASE(convert)
+// Test ISO encodings
+BOOST_AUTO_TEST_CASE(convert_iso)
{
BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_1>({})}) == std::string{});
BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_1>("abc")}) == std::string{"abc"});
- BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_1>("äöü")}) == std::string{"äöü"});
+ BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_1>("\xe4\xf6\xfc")}) == std::string{"\xe4\xf6\xfc"}); // Latin-1 äöü
BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_1>("\xa4")}) == std::string{"\xa4"}); // €
- BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_15,unicode::ISO_8859_15>("\xa4")}) == std::string{"\xa4"}); // €
+ BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_15>({})}) == std::string{});
+ BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_15>("abc")}) == std::string{"abc"});
+ BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_15>("\xe4\xf6\xfc")}) == std::string{"\xe4\xf6\xfc"}); // Latin-1 äöü
+ BOOST_CHECK((std::string{unicode::convert<unicode::ISO_8859_15,unicode::ISO_8859_15>("\xa4")}) == std::string{"\xa4"}); // €
+ BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::ISO_8859_1,unicode::ISO_8859_15>("\xa4")}), std::invalid_argument);
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::ISO_8859_15,unicode::ISO_8859_1>("\xa4")}), std::invalid_argument); // € not available in ISO-8859-1
-
+}
+
+// Test conversion between ISO and UTF encodings
+BOOST_AUTO_TEST_CASE(convert_iso_utf)
+{
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_1>(u8"\u20ac")}), std::invalid_argument);
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_16,unicode::ISO_8859_1>(u"\u20ac")}), std::invalid_argument);
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_32,unicode::ISO_8859_1>(U"\u20ac")}), std::invalid_argument);
+
+ BOOST_CHECK((std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_15>(u8"\u20ac")}) == std::string{"\xa4"}); // €
+ BOOST_CHECK((std::string{unicode::convert<unicode::UTF_16,unicode::ISO_8859_15>(u"\u20ac")}) == std::string{"\xa4"}); // €
+ BOOST_CHECK((std::string{unicode::convert<unicode::UTF_32,unicode::ISO_8859_15>(U"\u20ac")}) == std::string{"\xa4"}); // €
+
+ BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_15>(u8"\u00A4")}), std::invalid_argument); // currency sign: Latin-1, but not Latin-15
+ BOOST_CHECK((std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_1>(u8"\u00A4")}) == std::string{"\xa4"});
+
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_8,unicode::ISO_8859_15>(u8"\u732b")}), std::invalid_argument);
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_16,unicode::ISO_8859_15>(u"\u732b")}), std::invalid_argument);
BOOST_CHECK_THROW(((void)std::string{unicode::convert<unicode::UTF_32,unicode::ISO_8859_15>(U"\u732b")}), std::invalid_argument);
+}
+// Test UTF encodings
+BOOST_AUTO_TEST_CASE(convert_utf)
+{
BOOST_CHECK_THROW((unicode::convert<unicode::UTF_32,unicode::UTF_8>(std::u32string{(char32_t*)"\x00\xD8\x00\x00\x00\x00\x00\x00"})) , std::invalid_argument);
BOOST_CHECK((unicode::convert<unicode::UTF_8,unicode::UTF_16>(u8"abc")) == std::u16string{u"abc"});
BOOST_CHECK((unicode::convert<unicode::UTF_32,unicode::UTF_16>(U"abc")) == std::u16string{u"abc"});
+
+ BOOST_CHECK((unicode::convert<unicode::UTF_8,unicode::UTF_16>(u8"a\0bc")) == std::u16string{u"a\0bc"});
BOOST_CHECK((unicode::convert<utf8_t,char16_t>(u8"abc")) == std::u16string{u"abc"});
BOOST_CHECK((unicode::convert<char32_t,char16_t>(U"abc")) == std::u16string{u"abc"});