summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-01-24 18:48:42 +0100
committerRoland Reichwein <mail@reichwein.it>2021-01-24 18:48:42 +0100
commit5f7ae62649c79683597e33af673ae1dcf5267917 (patch)
treefd84b19959ec3880a171ad512910eab522060058 /include
Initial commit: Non working initial code
Diffstat (limited to 'include')
-rw-r--r--include/unicode.h123
1 files changed, 123 insertions, 0 deletions
diff --git a/include/unicode.h b/include/unicode.h
new file mode 100644
index 0000000..2969aa0
--- /dev/null
+++ b/include/unicode.h
@@ -0,0 +1,123 @@
+// libunicode
+// Copyright (C) 2021 Roland Reichwein
+
+#pragma once
+
+#include <algorithm>
+#include <string>
+
+namespace {
+
+ struct utf8_iterator
+ {
+ typedef char32_t value_type;
+ typedef char32_t& reference;
+
+ void get_value()
+ {
+ // TODO: set value to current data in *iterator ...
+ value = 'X';
+ }
+
+ size_t get_number_of_utf8_bytes()
+ {
+ // TODO: how many bytes
+ return 1;
+ }
+
+ // pre-increment
+ utf8_iterator& operator++()
+ {
+ iterator += get_number_of_utf8_bytes();
+ return *this;
+ }
+
+ bool operator!=(const utf8_iterator& other) const
+ {
+ return iterator != other.iterator;
+ }
+
+ reference operator*()
+ {
+ get_value();
+ return value;
+ }
+
+ std::u8string::iterator iterator;
+
+ std::u8string::iterator end_iterator;
+ value_type value{};
+ };
+
+ struct utf16_back_insert_iterator
+ {
+ typedef utf16_back_insert_iterator& reference;
+
+ utf16_back_insert_iterator(std::u16string& s): s(s) {}
+
+ // no-op
+ utf16_back_insert_iterator& operator++()
+ {
+ return *this;
+ }
+
+ // support *x = value, together with operator=()
+ reference operator*()
+ {
+ return *this;
+ }
+
+ // append utf-16 word sequence
+ reference operator=(const char32_t& value)
+ {
+ s.push_back(0); // TODO
+ }
+
+ std::u16string& s;
+ };
+
+ utf16_back_insert_iterator utf16_back_inserter(std::u16string& s)
+ {
+ return utf16_back_insert_iterator(s);
+ }
+
+ utf8_iterator utf8_begin(std::u8string& s)
+ {
+ return utf8_iterator{s.begin(), s.end()};
+ }
+
+ utf8_iterator utf8_end(std::u8string& s)
+ {
+ return utf8_iterator{s.end(), s.end()};
+ }
+
+} // namespace
+
+namespace unicode {
+
+// returns number of bytes in UTF-8 byte sequence of first found code point,
+// if found. 0 if none found or sequence empty.
+//size_t utf8_start()
+//{
+//}
+
+std::u16string utf8_to_utf16(const std::u8string& s)
+{
+ std::u16string result;
+
+ std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result));
+
+ return result;
+}
+
+//std::u8string utf16_to_utf8(const std::u16string& s)
+//{
+// std::u8string result;
+//
+// std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result));
+//
+// return result;
+//}
+
+} // namespace unicode
+