summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-26 16:58:04 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-26 16:58:04 +0100
commite0e5623b46fdaa0988faa76af506d5bc1035ee42 (patch)
tree29255219e493122a3f0873d76b57338200b7f171
parent23c31aef916dbfd17774a80258676a963426a698 (diff)
Optimization fixes
-rw-r--r--include/unicode.h29
1 files changed, 22 insertions, 7 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 3d6477c..395f172 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -31,6 +31,13 @@ namespace unicode {
template<typename T>
static inline bool is_valid_unicode(const T& value) noexcept
{
+ if constexpr(sizeof(T) == 1)
+ return true;
+ else if constexpr(sizeof(T) == 2)
+ //return value <= 0xD7FF || value >= 0xE000;
+ return (value & 0xF800) != 0xD800;
+ else
+ //return (value & 0xFFFFF800) != 0x0000D800 && (value >> 16) <= 0x10;
return value <= 0xD7FF || (value >= 0xE000 && value <= 0x10FFFF);
}
@@ -161,7 +168,7 @@ namespace unicode::detail {
{
char16_t unit0 {static_cast<char16_t>(get_code_unit<0>())};
- if (unit0 <= 0xD7FF || unit0 >= 0xE000) { // 1 unit (BMP Basic Multilingual Plane)
+ if (is_valid_unicode(unit0)) { // 1 unit (BMP Basic Multilingual Plane)
std::advance(iterator, 1);
return unit0;
} else {
@@ -547,10 +554,16 @@ namespace unicode {
// std::distance doesn't work here: it is based on "output" distance of iterators
template<class Iterator>
- size_t input_distance(const Iterator& it1, const Iterator& it2)
+ inline size_t input_distance(const Iterator& it1, const Iterator& it2)
{
return it2 - it1;
}
+
+ template<class Iterator>
+ inline size_t input_distance_bytes(const Iterator& it1, const Iterator& it2)
+ {
+ return input_distance(it1, it2) * sizeof(typename Iterator::value_type);
+ }
// Optimizations following:
static const size_t accu_size {sizeof(size_t)};
@@ -637,7 +650,8 @@ namespace unicode {
static_cast<typename output_string_type::value_type>(addr[1])});
}
}
- };
+
+ }; // class ArchitectureOptimizer
// From and To are facets
template<typename From, typename To, std::enable_if_t<std::is_empty<From>::value, bool> = true>
@@ -653,16 +667,17 @@ namespace unicode {
auto end{From::end(s)};
auto back_inserter{To::back_inserter(result)};
auto addr{reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])};
- while (input_distance(begin, end) >= accu_size) {
+ while (input_distance_bytes(begin, end) >= accu_size) {
if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) {
- while (input_distance(begin, end) >= accu_size) {
+ while (input_distance_bytes(begin, end) >= accu_size) {
typename arch_optimizer::accu_type data{*addr};
if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) {
- arch_optimizer::template append<typename From::value_type, typename To::string_type>(reinterpret_cast<const typename From::value_type*>(addr), result);
+ arch_optimizer::template append(reinterpret_cast<const typename From::value_type*>(addr), result);
begin += accu_size / sizeof(typename From::value_type);
++addr;
} else {
- // just advance one code unit for now
+ // just advance one code unit for now and break to trigger unoptimized
+ // version until next accu boundary
back_inserter = *begin;
++begin;
break;