summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-01-24 18:48:42 +0100
committerRoland Reichwein <mail@reichwein.it>2021-01-24 18:48:42 +0100
commit5f7ae62649c79683597e33af673ae1dcf5267917 (patch)
treefd84b19959ec3880a171ad512910eab522060058
Initial commit: Non working initial code
-rw-r--r--Makefile52
-rw-r--r--debian/control15
-rw-r--r--include/unicode.h123
-rw-r--r--src/recode.cpp4
-rw-r--r--src/test-unicode.cpp17
5 files changed, 211 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..90471a9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+CXX=clang++-11
+#CXX=g++-10
+
+CXXFLAGS=-O0 -g -D_DEBUG
+#CXXFLAGS=-O2 -DNDEBUG
+
+CXXFLAGS+=-Wall -Iinclude -std=c++20
+
+ifeq ($(CXX),clang++-11)
+CXXFLAGS+=-stdlib=libc++
+endif
+
+LDLIBS+=\
+-lboost_context \
+-lboost_filesystem \
+-lboost_timer \
+-lboost_system \
+
+SRC=\
+ src/recode.cpp \
+ src/test-unicode.cpp
+
+all: src/recode src/test-unicode
+
+test: src/test-unicode
+ src/test-unicode
+
+src/recode: src/recode.o dep
+ $(CXX) $(LDFLAGS) $< $(LDLIBS) $(LIBS) -o $@
+
+src/test-unicode: src/test-unicode.o dep
+ $(CXX) $(LDFLAGS) $< $(LDLIBS) $(LIBS) -o $@
+
+dep: $(SRC:.cpp=.d)
+
+%.d: %.cpp
+ $(CXX) $(CXXFLAGS) -MM -MP -MF $@ -MT $(*D)/$(*F).o -c $<
+
+%.o: %.cpp %.d
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
+clean:
+ -rm -f src/recode src/test-unicode
+ -find . -name '*.o' -o -name '*.d' -o -name '*.gcno' -o -name '*.gcda' | xargs rm -f
+
+install:
+ mkdir -p $(DESTDIR)/usr/include
+ cp include/unicode.h $(DESTDIR)/usr/include
+
+.PHONY: all test clean install dep
+
+-include $(wildcard $(SRC:.cpp=.d))
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..a875755
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,15 @@
+Source: libunicode
+Section: httpd
+Priority: optional
+Maintainer: Roland Reichwein <mail@reichwein.it>
+Build-Depends: debhelper (>= 12), libboost-all-dev | libboost1.71-all-dev, clang | g++-9
+Standards-Version: 4.5.0
+Homepage: http://www.reichwein.it/libunicode/
+
+Package: libunicode-dev
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Homepage: http://www.reichwein.it/libunicode/
+Description: Unicode conversion library
+ libunicode is a locale independent library for conversion between Unicode encodings
+ UTF-8, UTF-16 and UTF-32.
diff --git a/include/unicode.h b/include/unicode.h
new file mode 100644
index 0000000..2969aa0
--- /dev/null
+++ b/include/unicode.h
@@ -0,0 +1,123 @@
+// libunicode
+// Copyright (C) 2021 Roland Reichwein
+
+#pragma once
+
+#include <algorithm>
+#include <string>
+
+namespace {
+
+ struct utf8_iterator
+ {
+ typedef char32_t value_type;
+ typedef char32_t& reference;
+
+ void get_value()
+ {
+ // TODO: set value to current data in *iterator ...
+ value = 'X';
+ }
+
+ size_t get_number_of_utf8_bytes()
+ {
+ // TODO: how many bytes
+ return 1;
+ }
+
+ // pre-increment
+ utf8_iterator& operator++()
+ {
+ iterator += get_number_of_utf8_bytes();
+ return *this;
+ }
+
+ bool operator!=(const utf8_iterator& other) const
+ {
+ return iterator != other.iterator;
+ }
+
+ reference operator*()
+ {
+ get_value();
+ return value;
+ }
+
+ std::u8string::iterator iterator;
+
+ std::u8string::iterator end_iterator;
+ value_type value{};
+ };
+
+ struct utf16_back_insert_iterator
+ {
+ typedef utf16_back_insert_iterator& reference;
+
+ utf16_back_insert_iterator(std::u16string& s): s(s) {}
+
+ // no-op
+ utf16_back_insert_iterator& operator++()
+ {
+ return *this;
+ }
+
+ // support *x = value, together with operator=()
+ reference operator*()
+ {
+ return *this;
+ }
+
+ // append utf-16 word sequence
+ reference operator=(const char32_t& value)
+ {
+ s.push_back(0); // TODO
+ }
+
+ std::u16string& s;
+ };
+
+ utf16_back_insert_iterator utf16_back_inserter(std::u16string& s)
+ {
+ return utf16_back_insert_iterator(s);
+ }
+
+ utf8_iterator utf8_begin(std::u8string& s)
+ {
+ return utf8_iterator{s.begin(), s.end()};
+ }
+
+ utf8_iterator utf8_end(std::u8string& s)
+ {
+ return utf8_iterator{s.end(), s.end()};
+ }
+
+} // namespace
+
+namespace unicode {
+
+// returns number of bytes in UTF-8 byte sequence of first found code point,
+// if found. 0 if none found or sequence empty.
+//size_t utf8_start()
+//{
+//}
+
+std::u16string utf8_to_utf16(const std::u8string& s)
+{
+ std::u16string result;
+
+ std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result));
+
+ return result;
+}
+
+//std::u8string utf16_to_utf8(const std::u16string& s)
+//{
+// std::u8string result;
+//
+// std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result));
+//
+// return result;
+//}
+
+} // namespace unicode
+
diff --git a/src/recode.cpp b/src/recode.cpp
new file mode 100644
index 0000000..8927fe4
--- /dev/null
+++ b/src/recode.cpp
@@ -0,0 +1,4 @@
+int main(int argc, char* argv[])
+{
+ return 0;
+}
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
new file mode 100644
index 0000000..9d41e67
--- /dev/null
+++ b/src/test-unicode.cpp
@@ -0,0 +1,17 @@
+#define BOOST_TEST_MODULE unicode_test
+
+#include <boost/test/included/unit_test.hpp>
+
+#include <string>
+
+#include <unicode.h>
+
+BOOST_AUTO_TEST_CASE(utf8_to_utf16)
+{
+ std::u8string u8{"ascii string1"};
+
+ std::u16string u16{unicode::utf8_to_utf16(u8)};
+
+ BOOST_CHECK_EQ(u16, u"ascii string1");
+}
+