summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-23 13:27:34 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-23 13:27:34 +0100
commit721064dc293d8915fbb33d83bd983a40dcca180f (patch)
tree3259680f1b4d46ef98030aae5b0b3cc89ae49396 /include
parentcd7e832e2f47fa35d36794808582118cb34eca3f (diff)
Speed optimization
Diffstat (limited to 'include')
-rw-r--r--include/unicode.h73
1 files changed, 71 insertions, 2 deletions
diff --git a/include/unicode.h b/include/unicode.h
index 43dc44e..5774db7 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -8,6 +8,7 @@
#pragma once
#include <algorithm>
+#include <cstdint>
#include <iterator>
#include <list>
#include <memory>
@@ -203,6 +204,17 @@ namespace unicode::detail {
return calculate_value();
}
+ utf_iterator& operator+=(size_t distance)
+ {
+ std::advance(iterator, distance);
+ return *this;
+ }
+
+ size_t operator-(const utf_iterator& other) const
+ {
+ return iterator - other.iterator;
+ }
+
private:
typename string_type::const_iterator iterator;
typename string_type::const_iterator end_iterator;
@@ -394,7 +406,7 @@ namespace unicode {
}
// return reference?
- value_type operator*()
+ value_type operator*() const
{
input_type value{*m_it};
@@ -407,6 +419,17 @@ namespace unicode {
return static_cast<value_type>(static_cast<uint8_t>(value));
}
+ iso_iterator& operator+=(size_t distance)
+ {
+ std::advance(m_it, distance);
+ return *this;
+ }
+
+ difference_type operator-(const iso_iterator& other) const
+ {
+ return m_it - other.m_it;
+ }
+
private:
iterator m_it;
};
@@ -518,13 +541,59 @@ namespace unicode {
typedef UTF<utf_iterator<char16_t>, utf_back_insert_iterator<char16_t>> UTF_16;
typedef UTF<utf_iterator<char32_t>, utf_back_insert_iterator<char32_t>> UTF_32;
+ // std::distance doesn't work here: it is based on "output" distance of iterators
+ template<class Iterator>
+ size_t input_distance(const Iterator& it1, const Iterator& it2)
+ {
+ return it2 - it1;
+ }
+
// From and To are facets
template<typename From, typename To, std::enable_if_t<std::is_empty<From>::value, bool> = true>
typename To::string_type convert(const typename From::string_type& s)
{
typename To::string_type result;
- std::copy(From::begin(s), From::end(s), To::back_inserter(result));
+ if constexpr(sizeof(typename From::string_type::value_type) == 1 &&
+ sizeof(typename To::value_type) == 1 &&
+ sizeof(size_t) >= 8) {
+ auto begin{From::begin(s)};
+ auto end{From::end(s)};
+ auto back_inserter{To::back_inserter(result)};
+ auto addr{reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])};
+ while (input_distance(begin, end) >= 8) {
+ if (((uintptr_t)(void*)addr & 7) == 0) {
+ while (input_distance(begin, end) >= 8) {
+ uint64_t data{*addr};
+ if ((data & 0x8080808080808080ULL) == 0ULL) {
+ result.append(reinterpret_cast<const typename To::value_type*>(addr), 8);
+ begin += 8;
+ ++addr;
+ } else {
+ // just advance one code unit for now
+ back_inserter = *begin;
+ ++begin;
+ break;
+ }
+ }
+ }
+
+ // keep up after unaligned Non-ASCII code points
+ while (begin!= end && (uintptr_t)(void*)(addr = reinterpret_cast<const uint64_t*>(&s.data()[s.size() - input_distance(begin, end)])) & 7) {
+ back_inserter = *begin;
+ ++begin;
+ }
+ }
+
+ // remainder < 8 bytes
+ while (begin != end) {
+ back_inserter = *begin;
+ ++begin;
+ }
+
+ } else {
+ std::copy(From::begin(s), From::end(s), To::back_inserter(result));
+ }
return result;
}