summaryrefslogtreecommitdiffhomepage
path: root/include/unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/unicode.h')
-rw-r--r--include/unicode.h24
1 files changed, 22 insertions, 2 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 2bf17f4..34812df 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -144,7 +144,11 @@ namespace unicode {
if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) {
while (input_distance_bytes(begin, end) >= accu_size) {
typename arch_optimizer::accu_type data{*addr};
- if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) {
+ if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value)
+#if __cplusplus >= 202002L
+ [[likely]]
+#endif
+ {
arch_optimizer::template append(reinterpret_cast<const typename From::value_type*>(addr), result);
begin += accu_size / sizeof(typename From::value_type);
++addr;
@@ -214,7 +218,11 @@ namespace unicode {
template<typename From, typename To, bool block_mode = true, typename std::enable_if_t<is_utf_8_v<From>, bool> = true>
inline static void append_accu(std::basic_string<To>& result, uint64_t& accu, int& bytes_in_accu)
{
- if (block_mode && bytes_in_accu == 8 && (accu & 0x8080808080808080) == 0) {
+ if (block_mode && bytes_in_accu == 8 && (accu & 0x8080808080808080) == 0)
+#if __cplusplus >= 202002L
+ [[likely]]
+#endif
+ {
result.append({
static_cast<To>(accu & 0x7F),
static_cast<To>((accu >> 8) & 0x7F),
@@ -239,6 +247,9 @@ namespace unicode {
if (is_valid_unicode<11>(value))
append_utf<11>(result, value);
else
+#if __cplusplus >= 202002L
+ [[unlikely]]
+#endif
throw std::invalid_argument("Invalid Unicode character in 2 byte UTF-8 sequence");
} else if ((block_mode || bytes_in_accu >= 3) && (accu & 0xC0C0F0) == 0x8080E0) { // 3 byte sequence
char32_t value {static_cast<char32_t>(((accu & 0x0F) << 12) | ((accu >> 2) & 0x0FC0) | ((accu >> 16) & 0x3f))};
@@ -247,6 +258,9 @@ namespace unicode {
if (is_valid_unicode<16>(value))
append_utf<16>(result, value);
else
+#if __cplusplus >= 202002L
+ [[unlikely]]
+#endif
throw std::invalid_argument("Invalid Unicode character in 3 byte UTF-8 sequence");
} else if ((block_mode || bytes_in_accu >= 4) && (accu & 0xC0C0C0F8) == 0x808080F0) { // 4 byte sequence
char32_t value {static_cast<char32_t>(((accu & 0x07) << 18) | ((accu << 4) & 0x3f000) | ((accu >> 10) & 0xFC0) | ((accu >> 24) & 0x3f))};
@@ -255,8 +269,14 @@ namespace unicode {
if (is_valid_unicode<21>(value))
append_utf(result, value);
else
+#if __cplusplus >= 202002L
+ [[unlikely]]
+#endif
throw std::invalid_argument("Invalid Unicode character in 4 byte UTF-8 sequence");
} else
+#if __cplusplus >= 202002L
+ [[unlikely]]
+#endif
throw std::invalid_argument("Invalid UTF-8 byte sequence");
}