summaryrefslogtreecommitdiffhomepage
path: root/src/test-unicode.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-02-04 15:21:11 +0100
committerRoland Reichwein <mail@reichwein.it>2021-02-04 15:21:11 +0100
commit6a12dddc641be34b323835a495399715790811e0 (patch)
tree566278167dd803dca69b85794770d2cf4a46ab53 /src/test-unicode.cpp
parentaac210c3ac70679f2d805b3a7ab4d4ad9a6a6808 (diff)
Bugfixes
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r--src/test-unicode.cpp25
1 files changed, 18 insertions, 7 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index c169fc9..5529d2c 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -35,13 +35,11 @@ std::vector<types_collection_type> success_sets {
// Error cases: throwing upon convert to all other types
std::vector<std::basic_string<utf8_t>> failure_strings_char8_t {
- // Note: don't encode this as u8"" since MSVC will interpret \x80 as \u0080,
-// yet to be encoded to UTF-8 for execution encoding
- "\x80", // utf-8 continuation byte
- "\x81", // utf-8 continuation byte
- "\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä"
- "\xF8\x80\x80\x80\x80", // overlong encoding
- "\xF7\xBF\xBF\xBF", // valid encoding of invalid code point
+ u8"\x80", // utf-8 continuation byte
+ u8"\x81", // utf-8 continuation byte
+ u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä"
+ u8"\xF8\x80\x80\x80\x80", // overlong encoding
+ u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point
};
std::vector<std::basic_string<char16_t>> failure_strings_char16_t {
@@ -333,6 +331,19 @@ BOOST_AUTO_TEST_CASE(convert)
BOOST_CHECK((unicode::convert<char32_t,char16_t>(U"abc")) == std::u16string{u"abc"});
}
+BOOST_AUTO_TEST_CASE(string_u8string)
+{
+ std::string a{"\xc3\xa4"};
+
+ std::basic_string<utf8_t> b{a.begin(), a.end()};
+
+ BOOST_CHECK(b == std::basic_string<utf8_t>{u8"ä"});
+
+ a = std::string{b.begin(), b.end()};
+
+ BOOST_CHECK(a == std::string{"\xc3\xa4"});
+}
+
// TODO:
//
// char8_t, char16_t, char32_t, char, wchar_t (UTF-16 on Windows, UTF-32 on Linux)