From 9caef0ea244504a189dfdd81c4db7792b193e14f Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Wed, 5 Jan 2022 20:44:48 +0100 Subject: Fix UTF-8 decoding: Guard against remaining bytes before iteration --- include/unicode/utf.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 5db9cac..691d4ba 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -197,14 +197,6 @@ namespace unicode::detail { size_t constexpr sequence_length{sizeof...(Tbytes)}; static_assert(sequence_length >= 1 && sequence_length <= 4); - if constexpr(sequence_length > 1) { - if (remaining_code_units() < sequence_length) -#if __cplusplus >= 202002L - [[unlikely]] -#endif - throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); - } - if (is_utf8_sequence(bytes...)) { std::advance(iterator, sequence_length); internal_type result{decode_utf8_sequence(bytes...)}; @@ -215,9 +207,15 @@ namespace unicode::detail { throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast(result))); return result; } else { - if constexpr(sequence_length <= 3) // template recursion break condition: UTF-8 has 1..4 code units + if constexpr(sequence_length <= 3) { // template recursion break condition: UTF-8 has 1..4 code units + if (remaining_code_units() < sequence_length + 1) +#if __cplusplus >= 202002L + [[unlikely]] +#endif + throw std::invalid_argument("Bad input: Not enough bytes left for decoding UTF-8 sequence"); + return calculate_utf8_value(bytes..., static_cast(get_code_unit())); - else + } else throw std::invalid_argument("Bad UTF-8 input: Invalid 4 byte sequence"); } } -- cgit v1.2.3