From 5f7ae62649c79683597e33af673ae1dcf5267917 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sun, 24 Jan 2021 18:48:42 +0100 Subject: Initial commit: Non working initial code --- Makefile | 52 ++++++++++++++++++++++ debian/control | 15 +++++++ include/unicode.h | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/recode.cpp | 4 ++ src/test-unicode.cpp | 17 +++++++ 5 files changed, 211 insertions(+) create mode 100644 Makefile create mode 100644 debian/control create mode 100644 include/unicode.h create mode 100644 src/recode.cpp create mode 100644 src/test-unicode.cpp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..90471a9 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +CXX=clang++-11 +#CXX=g++-10 + +CXXFLAGS=-O0 -g -D_DEBUG +#CXXFLAGS=-O2 -DNDEBUG + +CXXFLAGS+=-Wall -Iinclude -std=c++20 + +ifeq ($(CXX),clang++-11) +CXXFLAGS+=-stdlib=libc++ +endif + +LDLIBS+=\ +-lboost_context \ +-lboost_filesystem \ +-lboost_timer \ +-lboost_system \ + +SRC=\ + src/recode.cpp \ + src/test-unicode.cpp + +all: src/recode src/test-unicode + +test: src/test-unicode + src/test-unicode + +src/recode: src/recode.o dep + $(CXX) $(LDFLAGS) $< $(LDLIBS) $(LIBS) -o $@ + +src/test-unicode: src/test-unicode.o dep + $(CXX) $(LDFLAGS) $< $(LDLIBS) $(LIBS) -o $@ + +dep: $(SRC:.cpp=.d) + +%.d: %.cpp + $(CXX) $(CXXFLAGS) -MM -MP -MF $@ -MT $(*D)/$(*F).o -c $< + +%.o: %.cpp %.d + $(CXX) $(CXXFLAGS) -c $< -o $@ + +clean: + -rm -f src/recode src/test-unicode + -find . -name '*.o' -o -name '*.d' -o -name '*.gcno' -o -name '*.gcda' | xargs rm -f + +install: + mkdir -p $(DESTDIR)/usr/include + cp include/unicode.h $(DESTDIR)/usr/include + +.PHONY: all test clean install dep + +-include $(wildcard $(SRC:.cpp=.d)) diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..a875755 --- /dev/null +++ b/debian/control @@ -0,0 +1,15 @@ +Source: libunicode +Section: httpd +Priority: optional +Maintainer: Roland Reichwein +Build-Depends: debhelper (>= 12), libboost-all-dev | libboost1.71-all-dev, clang | g++-9 +Standards-Version: 4.5.0 +Homepage: http://www.reichwein.it/libunicode/ + +Package: libunicode-dev +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Homepage: http://www.reichwein.it/libunicode/ +Description: Unicode conversion library + libunicode is a locale independent library for conversion between Unicode encodings + UTF-8, UTF-16 and UTF-32. diff --git a/include/unicode.h b/include/unicode.h new file mode 100644 index 0000000..2969aa0 --- /dev/null +++ b/include/unicode.h @@ -0,0 +1,123 @@ +// libunicode +// Copyright (C) 2021 Roland Reichwein + +#pragma once + +#include +#include + +namespace { + + struct utf8_iterator + { + typedef char32_t value_type; + typedef char32_t& reference; + + void get_value() + { + // TODO: set value to current data in *iterator ... + value = 'X'; + } + + size_t get_number_of_utf8_bytes() + { + // TODO: how many bytes + return 1; + } + + // pre-increment + utf8_iterator& operator++() + { + iterator += get_number_of_utf8_bytes(); + return *this; + } + + bool operator!=(const utf8_iterator& other) const + { + return iterator != other.iterator; + } + + reference operator*() + { + get_value(); + return value; + } + + std::u8string::iterator iterator; + + std::u8string::iterator end_iterator; + value_type value{}; + }; + + struct utf16_back_insert_iterator + { + typedef utf16_back_insert_iterator& reference; + + utf16_back_insert_iterator(std::u16string& s): s(s) {} + + // no-op + utf16_back_insert_iterator& operator++() + { + return *this; + } + + // support *x = value, together with operator=() + reference operator*() + { + return *this; + } + + // append utf-16 word sequence + reference operator=(const char32_t& value) + { + s.push_back(0); // TODO + } + + std::u16string& s; + }; + + utf16_back_insert_iterator utf16_back_inserter(std::u16string& s) + { + return utf16_back_insert_iterator(s); + } + + utf8_iterator utf8_begin(std::u8string& s) + { + return utf8_iterator{s.begin(), s.end()}; + } + + utf8_iterator utf8_end(std::u8string& s) + { + return utf8_iterator{s.end(), s.end()}; + } + +} // namespace + +namespace unicode { + +// returns number of bytes in UTF-8 byte sequence of first found code point, +// if found. 0 if none found or sequence empty. +//size_t utf8_start() +//{ +//} + +std::u16string utf8_to_utf16(const std::u8string& s) +{ + std::u16string result; + + std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result)); + + return result; +} + +//std::u8string utf16_to_utf8(const std::u16string& s) +//{ +// std::u8string result; +// +// std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result)); +// +// return result; +//} + +} // namespace unicode + diff --git a/src/recode.cpp b/src/recode.cpp new file mode 100644 index 0000000..8927fe4 --- /dev/null +++ b/src/recode.cpp @@ -0,0 +1,4 @@ +int main(int argc, char* argv[]) +{ + return 0; +} diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp new file mode 100644 index 0000000..9d41e67 --- /dev/null +++ b/src/test-unicode.cpp @@ -0,0 +1,17 @@ +#define BOOST_TEST_MODULE unicode_test + +#include + +#include + +#include + +BOOST_AUTO_TEST_CASE(utf8_to_utf16) +{ + std::u8string u8{"ascii string1"}; + + std::u16string u16{unicode::utf8_to_utf16(u8)}; + + BOOST_CHECK_EQ(u16, u"ascii string1"); +} + -- cgit v1.2.3