diff options
author | Roland Reichwein <mail@reichwein.it> | 2021-02-04 15:21:11 +0100 |
---|---|---|
committer | Roland Reichwein <mail@reichwein.it> | 2021-02-04 15:21:11 +0100 |
commit | 6a12dddc641be34b323835a495399715790811e0 (patch) | |
tree | 566278167dd803dca69b85794770d2cf4a46ab53 /src/test-unicode.cpp | |
parent | aac210c3ac70679f2d805b3a7ab4d4ad9a6a6808 (diff) |
Bugfixes
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r-- | src/test-unicode.cpp | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index c169fc9..5529d2c 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -35,13 +35,11 @@ std::vector<types_collection_type> success_sets { // Error cases: throwing upon convert to all other types std::vector<std::basic_string<utf8_t>> failure_strings_char8_t { - // Note: don't encode this as u8"" since MSVC will interpret \x80 as \u0080, -// yet to be encoded to UTF-8 for execution encoding - "\x80", // utf-8 continuation byte - "\x81", // utf-8 continuation byte - "\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" - "\xF8\x80\x80\x80\x80", // overlong encoding - "\xF7\xBF\xBF\xBF", // valid encoding of invalid code point + u8"\x80", // utf-8 continuation byte + u8"\x81", // utf-8 continuation byte + u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" + u8"\xF8\x80\x80\x80\x80", // overlong encoding + u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point }; std::vector<std::basic_string<char16_t>> failure_strings_char16_t { @@ -333,6 +331,19 @@ BOOST_AUTO_TEST_CASE(convert) BOOST_CHECK((unicode::convert<char32_t,char16_t>(U"abc")) == std::u16string{u"abc"}); } +BOOST_AUTO_TEST_CASE(string_u8string) +{ + std::string a{"\xc3\xa4"}; + + std::basic_string<utf8_t> b{a.begin(), a.end()}; + + BOOST_CHECK(b == std::basic_string<utf8_t>{u8"ä"}); + + a = std::string{b.begin(), b.end()}; + + BOOST_CHECK(a == std::string{"\xc3\xa4"}); +} + // TODO: // // char8_t, char16_t, char32_t, char, wchar_t (UTF-16 on Windows, UTF-32 on Linux) |