From 6a12dddc641be34b323835a495399715790811e0 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Thu, 4 Feb 2021 15:21:11 +0100 Subject: Bugfixes --- Makefile | 3 ++- src/test-unicode.cpp | 25 ++++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 7ed522b..7ea075e 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ LIBS+=-fuse-ld=lld endif ifeq ($(COMPILER_SUITE),clang) +# libc++ is buggy for C++20: std::hash not implemented. Fixed in LLVM-12 CXXFLAGS+=-stdlib=libc++ endif @@ -55,7 +56,7 @@ LIBS+= \ else LIBS+= \ -lstdc++ -#-lstdc++fs +-lstdc++fs endif SRC=\ diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index c169fc9..5529d2c 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -35,13 +35,11 @@ std::vector success_sets { // Error cases: throwing upon convert to all other types std::vector> failure_strings_char8_t { - // Note: don't encode this as u8"" since MSVC will interpret \x80 as \u0080, -// yet to be encoded to UTF-8 for execution encoding - "\x80", // utf-8 continuation byte - "\x81", // utf-8 continuation byte - "\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" - "\xF8\x80\x80\x80\x80", // overlong encoding - "\xF7\xBF\xBF\xBF", // valid encoding of invalid code point + u8"\x80", // utf-8 continuation byte + u8"\x81", // utf-8 continuation byte + u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä" + u8"\xF8\x80\x80\x80\x80", // overlong encoding + u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point }; std::vector> failure_strings_char16_t { @@ -333,6 +331,19 @@ BOOST_AUTO_TEST_CASE(convert) BOOST_CHECK((unicode::convert(U"abc")) == std::u16string{u"abc"}); } +BOOST_AUTO_TEST_CASE(string_u8string) +{ + std::string a{"\xc3\xa4"}; + + std::basic_string b{a.begin(), a.end()}; + + BOOST_CHECK(b == std::basic_string{u8"ä"}); + + a = std::string{b.begin(), b.end()}; + + BOOST_CHECK(a == std::string{"\xc3\xa4"}); +} + // TODO: // // char8_t, char16_t, char32_t, char, wchar_t (UTF-16 on Windows, UTF-32 on Linux) -- cgit v1.2.3