From a5dc41291537d9518fbbd795d118a3b4bcb9764e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Tue, 28 Dec 2021 19:56:30 +0100 Subject: Use fold expressions to simplify code --- include/unicode.h | 90 ++++++++++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 55 deletions(-) diff --git a/include/unicode.h b/include/unicode.h index 2d7bf71..8ac9f55 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -27,13 +27,14 @@ typedef char iso_t; namespace unicode { - // usually, char32_t, uint32_t etc. - template + // bits_to_compare: limit bits to consider even further than defined by T + // T: usually, char32_t, uint32_t etc. + template static inline bool is_valid_unicode(const T& value) noexcept { - if constexpr(sizeof(T) == 1) + if constexpr(sizeof(T) == 1 || bits_to_compare <= 15) return true; - else if constexpr(sizeof(T) == 2) + else if constexpr(sizeof(T) == 2 || bits_to_compare <= 20) //return value <= 0xD7FF || value >= 0xE000; return (value & 0xF800) != 0xD800; else @@ -213,45 +214,35 @@ namespace unicode::detail { } } - template = true> - inline internal_type calculate_value() + template + inline internal_type calculate_utf8_value(Tbytes... bytes) { - utf8_t byte0 {static_cast(get_code_unit<0>())}; - if (is_utf8_sequence(byte0)) { // 1 byte: 7 bit ASCII - std::advance(iterator, 1); - return decode_utf8_sequence(byte0); - } else { - internal_type value{}; - if (size_t remaining{remaining_code_units()}; remaining >= 2) { - utf8_t byte1 {static_cast(get_code_unit<1>())}; - if (is_utf8_sequence(byte0, byte1)) { // 2 bytes - value = decode_utf8_sequence(byte0, byte1); - std::advance(iterator, 2); - } else if (remaining >= 3) { - utf8_t byte2 {static_cast(get_code_unit<2>())}; - if (is_utf8_sequence(byte0, byte1, byte2)) { // 3 bytes - value = decode_utf8_sequence(byte0, byte1, byte2); - std::advance(iterator, 3); - } else if (remaining >= 4) { - utf8_t byte3 {static_cast(get_code_unit<3>())}; - if (is_utf8_sequence(byte0, byte1, byte2, byte3)) { // 4 bytes - value = decode_utf8_sequence(byte0, byte1, byte2, byte3); - std::advance(iterator, 4); - } else - throw std::invalid_argument("Bad input: Invalid 4 byte sequence"); - } else - throw std::invalid_argument("Bad UTF-8 input: 4th byte expected, none found (end of sequence)"); - } else - throw std::invalid_argument("Bad UTF-8 input: 3rd byte expected, none found (end of sequence)"); - } else - throw std::invalid_argument("Bad UTF-8 input: 2nd byte expected, none found (end of sequence)"); - - // check only for sequences >= 2 bytes (ASCII is always compliant) - if (!unicode::is_valid_unicode(value)) - throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast(value))); + size_t constexpr sequence_length{sizeof...(Tbytes)}; + static_assert(sequence_length >= 1 && sequence_length <= 4); - return value; + if constexpr(sequence_length > 1) { + if (remaining_code_units() < sequence_length) + throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); } + + if (is_utf8_sequence(bytes...)) { + std::advance(iterator, sequence_length); + internal_type result{decode_utf8_sequence(bytes...)}; + if (!unicode::is_valid_unicode(result)) + throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast(result))); + return result; + } else { + if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units + return calculate_utf8_value(bytes..., static_cast(get_code_unit())); + else + throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence"); + } + } + + template = true> + inline internal_type calculate_value() + { + return calculate_utf8_value(static_cast(get_code_unit<0>())); } template = true> @@ -380,24 +371,13 @@ namespace unicode::detail { return trailing_byte(value); } - template - inline void append(Arg&& arg) - { - if constexpr (std::is_same>::value) { - s.append({arg}); - } else { - s.emplace_back(arg); - } - } - - template - inline void append(Arg&& arg, Args&&... args) + template + inline void append(Args&&... args) { if constexpr (std::is_same>::value) { - s.append({arg, args...}); + s.append({args...}); } else { - s.emplace_back(arg); - append(args...); + (s.emplace_back(args), ...); } } -- cgit v1.2.3