summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-23 19:49:04 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-23 19:49:04 +0100
commit98f9132997353bb3e750e8e2db99ebd474a8dbb6 (patch)
treed30f6c7a612337fc7e8f86e16e7a79eb355f0c9a
parent721064dc293d8915fbb33d83bd983a40dcca180f (diff)
Generalize optimization
-rw-r--r--include/unicode.h63
1 files changed, 54 insertions, 9 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 5774db7..8dedb19 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -548,6 +548,47 @@ namespace unicode {
return it2 - it1;
}
+ template<int value_size>
+ struct ConvertInputOptimizer {};
+
+ template<> struct ConvertInputOptimizer<1>
+ {
+ static const uint32_t ascii_mask { 0x80808080 };
+ };
+
+ template<int value_size>
+ struct ConvertOutputOptimizer {};
+
+ template<> struct ConvertOutputOptimizer<1>
+ {
+ template<typename input_value_type, class output_string_type, int code_units>
+ inline static void append(const input_value_type* addr, output_string_type& s)
+ {
+ s.append(reinterpret_cast<const typename output_string_type::value_type*>(addr), code_units);
+ }
+ };
+
+ template<int AccuSize, class ConvertInputOptimizer, class ConvertOutputOptimizer>
+ struct ArchitectureOptimizer {};
+
+ template<class ConvertInputOptimizer, class ConvertOutputOptimizer>
+ struct ArchitectureOptimizer<8, ConvertInputOptimizer, ConvertOutputOptimizer>
+ {
+ typedef ConvertInputOptimizer input_optimizer;
+ typedef ConvertOutputOptimizer output_optimizer;
+ typedef uint64_t accu_type;
+ static const size_t accu_size {8};
+ static const accu_type addr_mask {accu_size - 1};
+ static const accu_type ascii_mask { ((accu_type)input_optimizer::ascii_mask) << 32 | (accu_type)input_optimizer::ascii_mask };
+ static const accu_type ascii_value { 0ULL };
+
+ template<typename input_value_type, class output_string_type>
+ inline static void append(const input_value_type* addr, output_string_type& s)
+ {
+ output_optimizer::template append<input_value_type, output_string_type, accu_size>(addr, s);
+ }
+ };
+
// From and To are facets
template<typename From, typename To, std::enable_if_t<std::is_empty<From>::value, bool> = true>
typename To::string_type convert(const typename From::string_type& s)
@@ -557,17 +598,21 @@ namespace unicode {
if constexpr(sizeof(typename From::string_type::value_type) == 1 &&
sizeof(typename To::value_type) == 1 &&
sizeof(size_t) >= 8) {
+ typedef ConvertInputOptimizer<sizeof(typename From::string_type::value_type)> input_optimizer;
+ typedef ConvertOutputOptimizer<sizeof(typename To::value_type)> output_optimizer;
+ typedef ArchitectureOptimizer<sizeof(size_t), input_optimizer, output_optimizer> arch_optimizer;
+
auto begin{From::begin(s)};
auto end{From::end(s)};
auto back_inserter{To::back_inserter(result)};
- auto addr{reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])};
- while (input_distance(begin, end) >= 8) {
- if (((uintptr_t)(void*)addr & 7) == 0) {
- while (input_distance(begin, end) >= 8) {
- uint64_t data{*addr};
- if ((data & 0x8080808080808080ULL) == 0ULL) {
- result.append(reinterpret_cast<const typename To::value_type*>(addr), 8);
- begin += 8;
+ auto addr{reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])};
+ while (input_distance(begin, end) >= arch_optimizer::accu_size) {
+ if (((uintptr_t)(void*)addr & arch_optimizer::addr_mask) == 0) {
+ while (input_distance(begin, end) >= arch_optimizer::accu_size) {
+ typename arch_optimizer::accu_type data{*addr};
+ if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) {
+ arch_optimizer::template append<typename From::string_type::value_type, typename To::string_type>(reinterpret_cast<const typename From::string_type::value_type*>(addr), result);
+ begin += arch_optimizer::accu_size;
++addr;
} else {
// just advance one code unit for now
@@ -579,7 +624,7 @@ namespace unicode {
}
// keep up after unaligned Non-ASCII code points
- while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])) & 7) {
+ while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const typename arch_optimizer::accu_type*>(&s.data()[s.size() - input_distance(begin, end)])) & arch_optimizer::addr_mask) {
back_inserter = *begin;
++begin;
}