summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-01-01 21:02:15 +0100
committerRoland Reichwein <mail@reichwein.it>2022-01-01 21:02:15 +0100
commitc969cddf87a2c6d2eb74353f3115a70d166136e5 (patch)
tree2f1aa414cd37a41de064faf6e4121107648d66b2
parent52d4375b10d920a59f1309c272a2e525feb1c25d (diff)
Use own type traits
-rw-r--r--Makefile6
-rw-r--r--include/unicode.h30
-rw-r--r--include/unicode/type_traits.h2
-rw-r--r--include/unicode/utf.h29
-rw-r--r--include/unicode/utf_fwd.h23
5 files changed, 61 insertions, 29 deletions
diff --git a/Makefile b/Makefile
index 36c503d..02498b3 100644
--- a/Makefile
+++ b/Makefile
@@ -139,7 +139,13 @@ DISTFILES= \
src/file.h \
Makefile \
include/unicode.h \
+ include/unicode/endian.h \
+ include/unicode/iso.h \
+ include/unicode/predicate.h \
+ include/unicode/types.h \
include/unicode/type_traits.h \
+ include/unicode/utf.h \
+ include/unicode/utf_fwd.h \
debian/control \
debian/compat \
debian/copyright \
diff --git a/include/unicode.h b/include/unicode.h
index a50f525..eb872ec 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -77,15 +77,15 @@ namespace unicode {
{
if constexpr(sizeof(input_value_type) == sizeof(typename output_string_type::value_type)) {
s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), accu_size / sizeof(input_value_type));
- } else if constexpr(sizeof(input_value_type) == 1) {
+ } else if constexpr(is_utf_8_v<input_value_type>) {
s.append({static_cast<typename output_string_type::value_type>(addr[0]),
static_cast<typename output_string_type::value_type>(addr[1]),
static_cast<typename output_string_type::value_type>(addr[2]),
static_cast<typename output_string_type::value_type>(addr[3])});
- } else if constexpr(sizeof(input_value_type) == 2) {
+ } else if constexpr(is_utf_16_v<input_value_type>) {
s.append({static_cast<typename output_string_type::value_type>(addr[0]),
static_cast<typename output_string_type::value_type>(addr[1])});
- } else if constexpr(sizeof(input_value_type) == 4) {
+ } else if constexpr(is_utf_32_v<input_value_type>) {
s.append({static_cast<typename output_string_type::value_type>(addr[0])});
}
}
@@ -105,7 +105,7 @@ namespace unicode {
{
if constexpr(sizeof(input_value_type) == sizeof(typename output_string_type::value_type)) {
s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), accu_size / sizeof(input_value_type));
- } else if constexpr(sizeof(input_value_type) == 1) {
+ } else if constexpr(is_utf_8_v<input_value_type>) {
s.append({static_cast<typename output_string_type::value_type>(addr[0]),
static_cast<typename output_string_type::value_type>(addr[1]),
static_cast<typename output_string_type::value_type>(addr[2]),
@@ -114,12 +114,12 @@ namespace unicode {
static_cast<typename output_string_type::value_type>(addr[5]),
static_cast<typename output_string_type::value_type>(addr[6]),
static_cast<typename output_string_type::value_type>(addr[7])});
- } else if constexpr(sizeof(input_value_type) == 2) {
+ } else if constexpr(is_utf_16_v<input_value_type>) {
s.append({static_cast<typename output_string_type::value_type>(addr[0]),
static_cast<typename output_string_type::value_type>(addr[1]),
static_cast<typename output_string_type::value_type>(addr[2]),
static_cast<typename output_string_type::value_type>(addr[3])});
- } else if constexpr(sizeof(input_value_type) == 4) {
+ } else if constexpr(is_utf_32_v<input_value_type>) {
s.append({static_cast<typename output_string_type::value_type>(addr[0]),
static_cast<typename output_string_type::value_type>(addr[1])});
}
@@ -174,7 +174,7 @@ namespace unicode {
return result;
}
- template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<(sizeof(To) == 1), bool> = true>
+ template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<is_utf_8_v<To>, bool> = true>
inline void append_utf(std::basic_string<To>& result, const char32_t& value)
{
using From = char32_t;
@@ -190,7 +190,7 @@ namespace unicode {
}
}
- template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<(sizeof(To) == 2), bool> = true>
+ template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<is_utf_16_v<To>, bool> = true>
inline void append_utf(std::basic_string<To>& result, const char32_t& value)
{
if (bits_to_compare <= 16 || value <= 0xFFFF) { // expect value to be already valid Unicode values
@@ -201,7 +201,7 @@ namespace unicode {
}
}
- template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<(sizeof(To) == 4), bool> = true>
+ template<size_t bits_to_compare = 32, typename To, typename std::enable_if_t<is_utf_32_v<To>, bool> = true>
inline void append_utf(std::basic_string<To>& result, const char32_t& value)
{
// expect value to be already valid Unicode values (checked in input iterator)
@@ -211,7 +211,7 @@ namespace unicode {
// Little Endian optimized version for UTF-8
// In block_mode, at least 4 bytes are in accu. On first call, even 8.
// otherwise, at least one code unit is in accu
- template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<(sizeof(From) == 1), bool> = true>
+ template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<is_utf_8_v<From>, bool> = true>
inline static void append_accu(std::basic_string<To>& result, uint64_t& accu, int& bytes_in_accu)
{
#if 1
@@ -265,7 +265,7 @@ namespace unicode {
// Little Endian optimized version for UTF-16
// In block_mode, at least 4 bytes are in accu. On first call, even 8.
// otherwise, at least one code unit is in accu
- template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<(sizeof(From) == 2), bool> = true>
+ template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<is_utf_16_v<From>, bool> = true>
inline static void append_accu(std::basic_string<To>& result, uint64_t& accu, int& bytes_in_accu)
{
#if 1
@@ -282,7 +282,7 @@ namespace unicode {
if ((accu & 0xFC00FC00FC00FC00) == 0xDC00D800DC00D800) {
// found 4 code units forming 3 code points in UTF-16;
// by definition of UTF-16, we have valid unicode values at this point
- if constexpr(sizeof(To) == 4) {
+ if constexpr(is_utf_32_v<To>) {
//result.resize(result.size() + 2);
//*reinterpret_cast<uint64_t*>(&result[result.size() - 2]) = (((accu & 0x03FF000003FF) << 10) | ((accu >> 16) & 0x03FF000003FF)) + 0x0001000000010000;
result.append({
@@ -316,7 +316,7 @@ namespace unicode {
typename To::string_type convert_optimized_utf(const typename From::string_type& s)
{
typename To::string_type result;
- if constexpr(sizeof(typename From::value_type) == 4) {
+ if constexpr(is_utf_32_v<typename From::value_type>) {
for (const auto value: s) {
if (is_valid_unicode(value))
append_utf(result, value);
@@ -324,7 +324,7 @@ namespace unicode {
throw std::invalid_argument("Invalid Unicode character in UTF-32");
}
#if 0
- } else if constexpr(sizeof(typename From::value_type) == 2) {
+ } else if constexpr(is_utf_16_v<typename From::value_type>) {
for (int i = 0; i < s.size(); i++) {
typename From::value_type unit0{s[i]};
if (is_valid_unicode(unit0)) {
@@ -388,7 +388,7 @@ namespace unicode {
} else {
throw std::invalid_argument("Invalid UTF input");
}
- } else if constexpr(accu_size == 8 && is_little_endian() && sizeof(typename From::value_type) == 1 &&
+ } else if constexpr(accu_size == 8 && is_little_endian() && is_utf_8_v<typename From::value_type> &&
is_utf_encoding_v<From> && is_utf_encoding_v<To>) { // endian specific optimization
return convert_optimized_utf<From, To>(s);
} else if constexpr(accu_size == 4 || accu_size == 8) { // accu size specific optimization with speedup for 7bit input
diff --git a/include/unicode/type_traits.h b/include/unicode/type_traits.h
index 3ee1d82..c3507e7 100644
--- a/include/unicode/type_traits.h
+++ b/include/unicode/type_traits.h
@@ -1,6 +1,6 @@
#pragma once
-#include "utf.h"
+#include "utf_fwd.h"
#include <string>
#include <type_traits>
diff --git a/include/unicode/utf.h b/include/unicode/utf.h
index dd504a7..81e8f2b 100644
--- a/include/unicode/utf.h
+++ b/include/unicode/utf.h
@@ -1,5 +1,8 @@
#pragma once
+#include "utf_fwd.h"
+#include "type_traits.h"
+
#include <list>
#include <string>
#include <stdexcept>
@@ -37,7 +40,7 @@ namespace unicode::detail {
(... && is_utf8_followup_byte(bytes)); // left fold for linear evaluation from left to right
}
- template<typename T, typename std::enable_if_t<(sizeof(T) == 1), bool> = true>
+ template<typename T, typename std::enable_if_t<is_utf_8_v<T>, bool> = true>
inline bool validate_utf(const std::basic_string<T>& s)
{
int i{};
@@ -78,7 +81,7 @@ namespace unicode::detail {
}
}
- template<typename T, typename std::enable_if_t<(sizeof(T) == 2), bool> = true>
+ template<typename T, typename std::enable_if_t<is_utf_16_v<T>, bool> = true>
inline bool validate_utf(const std::basic_string<T>& s)
{
int i{};
@@ -95,7 +98,7 @@ namespace unicode::detail {
return true;
}
- template<typename T, typename std::enable_if_t<(sizeof(T) == 4), bool> = true>
+ template<typename T, typename std::enable_if_t<is_utf_32_v<T>, bool> = true>
inline bool validate_utf(const std::basic_string<T>& s)
{
for (auto i: s)
@@ -135,10 +138,10 @@ namespace unicode::detail {
return decode_utf8_leading_byte<sequence_length>(b) | decode_utf8_followup_byte(bytes...);
}
- template<typename T, typename Container=std::basic_string<T>>
+ template<typename T, typename Container>
struct utf_iterator
{
- static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4);
+ static_assert(is_utf_8_v<T> || is_utf_16_v<T> || is_utf_32_v<T>);
typedef T value_type;
typedef char32_t internal_type;
@@ -199,13 +202,13 @@ namespace unicode::detail {
}
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 1), bool> = true>
+ template<class X = value_type, typename std::enable_if_t<is_utf_8_v<X>, bool> = true>
inline internal_type calculate_value()
{
return calculate_utf8_value(static_cast<utf8_t>(get_code_unit<0>()));
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 2), bool> = true>
+ template<class X = value_type, typename std::enable_if_t<is_utf_16_v<X>, bool> = true>
inline internal_type calculate_value()
{
char16_t unit0 {static_cast<char16_t>(get_code_unit<0>())};
@@ -226,7 +229,7 @@ namespace unicode::detail {
}
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 4), bool> = true>
+ template<class X = value_type, typename std::enable_if_t<is_utf_32_v<X>, bool> = true>
inline internal_type calculate_value()
{
internal_type result {static_cast<internal_type>(get_code_unit<0>())};
@@ -296,10 +299,10 @@ namespace unicode::detail {
return utf8_trailing_byte<m - n - 1, From, To>(value);
}
- template<typename T, typename Container=std::basic_string<T>>
+ template<typename T, typename Container>
struct utf_back_insert_iterator
{
- static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4);
+ static_assert(is_utf_8_v<T> || is_utf_16_v<T> || is_utf_32_v<T>);
typedef T value_type;
typedef char32_t internal_type;
@@ -341,7 +344,7 @@ namespace unicode::detail {
}
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 1), bool> = true>
+ template<class X = value_type, typename std::enable_if_t<is_utf_8_v<X>, bool> = true>
inline void append_utf(const internal_type& value)
{
using Y = internal_type;
@@ -357,7 +360,7 @@ namespace unicode::detail {
}
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 2), bool> = true>
+ template<class X = value_type, typename std::enable_if_t<is_utf_16_v<X>, bool> = true>
inline void append_utf(const internal_type& value)
{
if (value <= 0xFFFF) { // expect value to be already valid Unicode values (checked in input iterator)
@@ -368,7 +371,7 @@ namespace unicode::detail {
}
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 4), bool> = true>
+ template<class X = value_type, typename std::enable_if_t<is_utf_32_v<X>, bool> = true>
inline void append_utf(const internal_type& value)
{
// expect value to be already valid Unicode values (checked in input iterator)
diff --git a/include/unicode/utf_fwd.h b/include/unicode/utf_fwd.h
new file mode 100644
index 0000000..f3f6c52
--- /dev/null
+++ b/include/unicode/utf_fwd.h
@@ -0,0 +1,23 @@
+#pragma once
+
+// Forward declarations
+
+#include <string>
+
+namespace unicode::detail {
+
+ template<typename T, typename Container=std::basic_string<T>>
+ struct utf_iterator;
+
+ template<typename T, typename Container=std::basic_string<T>>
+ struct utf_back_insert_iterator;
+
+} // namespace unicode::detail
+
+namespace unicode {
+
+ template<typename InputIt, typename OutputIt>
+ struct UTF;
+
+} // namespace unicode
+