summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-01-25 18:54:25 +0100
committerRoland Reichwein <mail@reichwein.it>2021-01-25 18:54:25 +0100
commit918d015302a004755ce0cf4968793cdf6a61bca8 (patch)
treea5c962b33a46249dfebf2a8506d9acedefac523c /src
parentd8bddb9dc248bb3cc04116c97259ea6f5c13e6d0 (diff)
Add first working conversion UTF-8 -> UTF-16
Diffstat (limited to 'src')
-rw-r--r--src/test-unicode.cpp15
1 files changed, 12 insertions, 3 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 4576d06..41fcd20 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -4,14 +4,23 @@
#include <string>
-//#include <unicode.h>
+#include <unicode.h>
BOOST_AUTO_TEST_CASE(utf8_to_utf16)
{
std::u8string u8{u8"ascii string1"};
- //std::u16string u16{unicode::utf8_to_utf16(u8)};
+ std::u16string u16{unicode::utf8_to_utf16(u8)};
- //BOOST_CHECK_EQUAL(u16, u"ascii string1");
+ BOOST_CHECK(u16 == u"ascii string1");
}
+// TODO:
+// invalid bytes
+// an unexpected continuation byte
+// a non-continuation byte before the end of the character
+// the string ending before the end of the character (which can happen in simple string truncation)
+// an overlong encoding
+// a sequence that decodes to an invalid code point
+//
+// high and low surrogate halves used by UTF-16 (U+D800 through U+DFFF) and code points not encodable by UTF-16 (those after U+10FFFF)