summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-28 19:56:30 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-28 19:56:30 +0100
commita5dc41291537d9518fbbd795d118a3b4bcb9764e (patch)
tree1ad98b5c9b6f7e750dbf9c624469f0b00ac0c8a4 /include
parent2b27deb54fec75ed529776f30be8eeb4ea239257 (diff)
Use fold expressions to simplify code
Diffstat (limited to 'include')
-rw-r--r--include/unicode.h90
1 files changed, 35 insertions, 55 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 2d7bf71..8ac9f55 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -27,13 +27,14 @@ typedef char iso_t;
namespace unicode {
- // usually, char32_t, uint32_t etc.
- template<typename T>
+ // bits_to_compare: limit bits to consider even further than defined by T
+ // T: usually, char32_t, uint32_t etc.
+ template<size_t bits_to_compare = 32, typename T>
static inline bool is_valid_unicode(const T& value) noexcept
{
- if constexpr(sizeof(T) == 1)
+ if constexpr(sizeof(T) == 1 || bits_to_compare <= 15)
return true;
- else if constexpr(sizeof(T) == 2)
+ else if constexpr(sizeof(T) == 2 || bits_to_compare <= 20)
//return value <= 0xD7FF || value >= 0xE000;
return (value & 0xF800) != 0xD800;
else
@@ -213,45 +214,35 @@ namespace unicode::detail {
}
}
- template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 1), bool> = true>
- inline internal_type calculate_value()
+ template<typename... Tbytes>
+ inline internal_type calculate_utf8_value(Tbytes... bytes)
{
- utf8_t byte0 {static_cast<utf8_t>(get_code_unit<0>())};
- if (is_utf8_sequence(byte0)) { // 1 byte: 7 bit ASCII
- std::advance(iterator, 1);
- return decode_utf8_sequence(byte0);
- } else {
- internal_type value{};
- if (size_t remaining{remaining_code_units()}; remaining >= 2) {
- utf8_t byte1 {static_cast<utf8_t>(get_code_unit<1>())};
- if (is_utf8_sequence(byte0, byte1)) { // 2 bytes
- value = decode_utf8_sequence(byte0, byte1);
- std::advance(iterator, 2);
- } else if (remaining >= 3) {
- utf8_t byte2 {static_cast<utf8_t>(get_code_unit<2>())};
- if (is_utf8_sequence(byte0, byte1, byte2)) { // 3 bytes
- value = decode_utf8_sequence(byte0, byte1, byte2);
- std::advance(iterator, 3);
- } else if (remaining >= 4) {
- utf8_t byte3 {static_cast<utf8_t>(get_code_unit<3>())};
- if (is_utf8_sequence(byte0, byte1, byte2, byte3)) { // 4 bytes
- value = decode_utf8_sequence(byte0, byte1, byte2, byte3);
- std::advance(iterator, 4);
- } else
- throw std::invalid_argument("Bad input: Invalid 4 byte sequence");
- } else
- throw std::invalid_argument("Bad UTF-8 input: 4th byte expected, none found (end of sequence)");
- } else
- throw std::invalid_argument("Bad UTF-8 input: 3rd byte expected, none found (end of sequence)");
- } else
- throw std::invalid_argument("Bad UTF-8 input: 2nd byte expected, none found (end of sequence)");
-
- // check only for sequences >= 2 bytes (ASCII is always compliant)
- if (!unicode::is_valid_unicode(value))
- throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(value)));
+ size_t constexpr sequence_length{sizeof...(Tbytes)};
+ static_assert(sequence_length >= 1 && sequence_length <= 4);
- return value;
+ if constexpr(sequence_length > 1) {
+ if (remaining_code_units() < sequence_length)
+ throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence");
}
+
+ if (is_utf8_sequence(bytes...)) {
+ std::advance(iterator, sequence_length);
+ internal_type result{decode_utf8_sequence(bytes...)};
+ if (!unicode::is_valid_unicode<sequence_length * 6>(result))
+ throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast<uint32_t>(result)));
+ return result;
+ } else {
+ if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units
+ return calculate_utf8_value(bytes..., static_cast<utf8_t>(get_code_unit<sequence_length>()));
+ else
+ throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence");
+ }
+ }
+
+ template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 1), bool> = true>
+ inline internal_type calculate_value()
+ {
+ return calculate_utf8_value(static_cast<utf8_t>(get_code_unit<0>()));
}
template<class X = value_type, typename std::enable_if_t<(sizeof(X) == 2), bool> = true>
@@ -380,24 +371,13 @@ namespace unicode::detail {
return trailing_byte<m - n - 1>(value);
}
- template<typename Arg>
- inline void append(Arg&& arg)
- {
- if constexpr (std::is_same<Container, typename std::basic_string<value_type>>::value) {
- s.append({arg});
- } else {
- s.emplace_back(arg);
- }
- }
-
- template<typename Arg, typename... Args>
- inline void append(Arg&& arg, Args&&... args)
+ template<typename... Args>
+ inline void append(Args&&... args)
{
if constexpr (std::is_same<Container, typename std::basic_string<value_type>>::value) {
- s.append({arg, args...});
+ s.append({args...});
} else {
- s.emplace_back(arg);
- append(args...);
+ (s.emplace_back(args), ...);
}
}