summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-01-05 20:43:41 +0100
committerRoland Reichwein <mail@reichwein.it>2022-01-05 20:43:41 +0100
commita138fe998b04693ca350cbc9cd144a4116b4400f (patch)
treeae6e4eb81d3b1a86cee47970bd1e0fcc1668b8d0
parentd75cefda8a5ea08976d6bb512150d7c6891ac73e (diff)
Simplify UTF-8 decoding: 2 byte sequences always contain valid Unicode values
-rw-r--r--include/unicode/optimization.h8
1 files changed, 1 insertions, 7 deletions
diff --git a/include/unicode/optimization.h b/include/unicode/optimization.h
index d7b054d..412c8ab 100644
--- a/include/unicode/optimization.h
+++ b/include/unicode/optimization.h
@@ -248,13 +248,7 @@ namespace unicode {
char32_t value {static_cast<char32_t>(((accu & 0x1F) << 6) | ((accu >> 8) & 0x3f))};
accu >>= 16;
bytes_in_accu -= 2;
- if (is_valid_unicode<11>(value))
- append_utf<11>(result, value);
- else
-#if __cplusplus >= 202002L
- [[unlikely]]
-#endif
- throw std::invalid_argument("Invalid Unicode character in 2 byte UTF-8 sequence");
+ append_utf<11>(result, value); // 11 bit Unicode values are always valid Unicode
} else if ((block_mode || bytes_in_accu >= 3) && (accu & 0xC0C0F0) == 0x8080E0) { // 3 byte sequence
char32_t value {static_cast<char32_t>(((accu & 0x0F) << 12) | ((accu >> 2) & 0x0FC0) | ((accu >> 16) & 0x3f))};
accu >>= 24;