From 79dc9edc72c5b9fefb129fe36029d4781b1e969c Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Sat, 25 Dec 2021 14:38:46 +0100 Subject: Generalized type usage and optimizations --- Makefile | 4 +- include/unicode.h | 155 +++++++++++++++++++++++++++++++++------------------ src/test-unicode.cpp | 51 +++++++++++------ 3 files changed, 138 insertions(+), 72 deletions(-) diff --git a/Makefile b/Makefile index 6ed0e68..e037bc5 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ ONDEBIAN=no endif # On Ubuntu 2104 and 2110, dh_strip / debugedit is broken, therefore different Non-Debian options in the following -DISTROS=base debian10 debian11 ubuntu2004 ubuntu2010 ubuntu2104 ubuntu2110 ubuntu2204 +DISTROS=base base-i386 debian10 debian11 ubuntu2004 ubuntu2010 ubuntu2104 ubuntu2110 ubuntu2204 ifeq ($(wildcard $(shell which clang++-13)),) ifeq ($(wildcard $(shell which clang++-12)),) @@ -124,7 +124,7 @@ deb-src: $(DISTROS): deb-src sudo pbuilder build --basetgz /var/cache/pbuilder/$@.tgz --buildresult result/$@ ../$(PROJECTNAME)_$(VERSION).dsc - debsign result/$@/$(PROJECTNAME)_$(VERSION)_amd64.changes + -debsign result/$@/$(PROJECTNAME)_$(VERSION)_amd64.changes debs: $(DISTROS) diff --git a/include/unicode.h b/include/unicode.h index 8dedb19..c2d727a 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -45,8 +45,8 @@ namespace unicode::detail { { static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4); - typedef T input_type; - typedef char32_t value_type; + typedef T value_type; + typedef char32_t internal_type; typedef char32_t& reference; typedef char32_t* pointer; typedef size_t difference_type; @@ -67,9 +67,9 @@ namespace unicode::detail { } template - T get_code_unit() const noexcept + value_type get_code_unit() const noexcept { - if constexpr (std::is_same>::value) { + if constexpr (std::is_same>::value) { // std::list doesn't support it + n auto it{iterator}; std::advance(it, index); @@ -79,46 +79,46 @@ namespace unicode::detail { } } - inline static bool is_continuation_byte(T b) noexcept + inline static bool is_continuation_byte(value_type b) noexcept { return (b & 0b11000000) == 0b10000000; } template - inline static bool is_continuation_byte(T b, Targs... Fargs) noexcept + inline static bool is_continuation_byte(value_type b, Targs... Fargs) noexcept { return is_continuation_byte(b) && is_continuation_byte(Fargs...); } template - inline static bool is_byte0_of(T b) noexcept + inline static bool is_byte0_of(value_type b) noexcept { - return (b & static_cast(0xFF << (7 - n))) == static_cast(0xFF << (8 - n)); + return (b & static_cast(0xFF << (7 - n))) == static_cast(0xFF << (8 - n)); } - inline static char32_t continuation_value(T b) noexcept + inline static internal_type continuation_value(value_type b) noexcept { - return static_cast(b & 0b00111111); + return static_cast(b & 0b00111111); } template - inline static char32_t continuation_value(T b, Targs... Fargs) noexcept + inline static internal_type continuation_value(value_type b, Targs... Fargs) noexcept { return continuation_value(b) << (6 * sizeof...(Targs)) | continuation_value(Fargs...); } template - inline static char32_t value_byte0_of(T b) noexcept + inline static internal_type value_byte0_of(value_type b) noexcept { return static_cast(b & (0b1111111 >> n)) << ((n - 1) * 6); } - template::type = true> - inline value_type calculate_value() + template::type = true> + inline internal_type calculate_value() { utf8_t byte0 {static_cast(get_code_unit<0>())}; if (byte0 & 0x80) { // 2-4 bytes - value_type value{}; + internal_type value{}; if (size_t remaining{remaining_code_units()}; remaining >= 2) { utf8_t byte1 {static_cast(get_code_unit<1>())}; if (is_byte0_of<2>(byte0) && is_continuation_byte(byte1)) { // 2 bytes @@ -154,8 +154,8 @@ namespace unicode::detail { } } - template::type = true> - inline value_type calculate_value() + template::type = true> + inline internal_type calculate_value() { char16_t unit0 {static_cast(get_code_unit<0>())}; @@ -175,10 +175,10 @@ namespace unicode::detail { } } - template::type = true> - inline value_type calculate_value() + template::type = true> + inline internal_type calculate_value() { - value_type result {static_cast(get_code_unit<0>())}; + internal_type result {static_cast(get_code_unit<0>())}; if (!unicode::is_valid_unicode(result)) throw std::invalid_argument("Invalid Unicode character: "s + std::to_string(static_cast(result))); @@ -199,7 +199,7 @@ namespace unicode::detail { return std::distance(iterator, end_iterator) != std::distance(other.iterator, other.end_iterator); } - value_type operator*() + internal_type operator*() { return calculate_value(); } @@ -256,14 +256,14 @@ namespace unicode::detail { // n is number of UTF-8 bytes in sequence template - inline static T byte0_of(char32_t value) + inline static value_type byte0_of(char32_t value) { return (value >> 6 * (n - 1)) | (0xFF << (8 - n)); } // n is index of 6-bit groups, counting from bit 0 template - inline static T trailing_byte(char32_t value) + inline static value_type trailing_byte(char32_t value) { return ((value >> n * 6) & 0b111111) | 0b10000000; } @@ -271,7 +271,7 @@ namespace unicode::detail { // calculate UTF-8 sequence byte for m >= 2 bytes sequences (i.e. non-ASCII) // assume value to be valid Unicode value for given byte position template - inline static T byte_n_of_m(char32_t value) + inline static value_type byte_n_of_m(char32_t value) { if constexpr (n == 0) return byte0_of(value); @@ -282,7 +282,7 @@ namespace unicode::detail { template inline void append(Arg&& arg) { - if constexpr (std::is_same>::value) { + if constexpr (std::is_same>::value) { s.append({arg}); } else { s.emplace_back(arg); @@ -292,7 +292,7 @@ namespace unicode::detail { template inline void append(Arg&& arg, Args&&... args) { - if constexpr (std::is_same>::value) { + if constexpr (std::is_same>::value) { s.append({arg, args...}); } else { s.emplace_back(arg); @@ -300,7 +300,7 @@ namespace unicode::detail { } } - template::type = true> + template::type = true> inline void append_utf(const char32_t& value) { if (value < 0x80) { // 1 byte @@ -315,18 +315,18 @@ namespace unicode::detail { throw std::runtime_error("Invalid internal Unicode value: "s + std::to_string(static_cast(value))); } - template::type = true> + template::type = true> inline void append_utf(const char32_t& value) { if (value <= 0xFFFF) { // expect value to be already valid Unicode values (checked in input iterator) append(static_cast(value)); } else { char32_t value_reduced{value - 0x10000}; - append(static_cast((value_reduced >> 10) + 0xD800), static_cast((value_reduced & 0x3FF) + 0xDC00)); + append(static_cast((value_reduced >> 10) + 0xD800), static_cast((value_reduced & 0x3FF) + 0xDC00)); } } - template::type = true> + template::type = true> inline void append_utf(const char32_t& value) { // expect value to be already valid Unicode values (checked in input iterator) @@ -382,8 +382,8 @@ namespace unicode { template> struct iso_iterator { - typedef iso_t input_type; - typedef char32_t value_type; + typedef iso_t value_type; + typedef char32_t internal_type; typedef char32_t& reference; typedef char32_t* pointer; typedef size_t difference_type; @@ -406,9 +406,9 @@ namespace unicode { } // return reference? - value_type operator*() const + internal_type operator*() const { - input_type value{*m_it}; + value_type value{*m_it}; if constexpr(std::addressof(Map) != std::addressof(iso_8859_1_map)) // mapping of 128 <= x <= 255 needed { @@ -416,7 +416,7 @@ namespace unicode { if (it != Map.end()) return it->second; } - return static_cast(static_cast(value)); + return static_cast(static_cast(value)); } iso_iterator& operator+=(size_t distance) @@ -554,28 +554,61 @@ namespace unicode { template<> struct ConvertInputOptimizer<1> { static const uint32_t ascii_mask { 0x80808080 }; + // 00112233 + // 00112222 + // 00111122 + // 00111111 + // 00001122 + // 00001111 + // 00000011 }; - template - struct ConvertOutputOptimizer {}; + template<> struct ConvertInputOptimizer<2> + { + static const uint32_t ascii_mask { 0xFF80FF80 }; + }; + + template<> struct ConvertInputOptimizer<4> + { + static const uint32_t ascii_mask { 0xFFFFFF80 }; + }; + + template + struct ArchitectureOptimizer {}; - template<> struct ConvertOutputOptimizer<1> + template + struct ArchitectureOptimizer<4, ConvertInputOptimizer> { - template + typedef ConvertInputOptimizer input_optimizer; + typedef uint32_t accu_type; + static const size_t accu_size {4}; + static const accu_type addr_mask {accu_size - 1}; + static const accu_type ascii_mask { (accu_type)input_optimizer::ascii_mask }; + static const accu_type ascii_value { 0ULL }; + + template inline static void append(const input_value_type* addr, output_string_type& s) { - s.append(reinterpret_cast(addr), code_units); + if constexpr(sizeof(input_value_type) == sizeof(typename output_string_type::value_type)) { + s.append(reinterpret_cast(addr), accu_size / sizeof(input_value_type)); + } else if constexpr(sizeof(input_value_type) == 1) { + s.append({static_cast(addr[0]), + static_cast(addr[1]), + static_cast(addr[2]), + static_cast(addr[3])}); + } else if constexpr(sizeof(input_value_type) == 2) { + s.append({static_cast(addr[0]), + static_cast(addr[1])}); + } else if constexpr(sizeof(input_value_type) == 4) { + s.append({static_cast(addr[0])}); + } } }; - - template - struct ArchitectureOptimizer {}; - template - struct ArchitectureOptimizer<8, ConvertInputOptimizer, ConvertOutputOptimizer> + template + struct ArchitectureOptimizer<8, ConvertInputOptimizer> { typedef ConvertInputOptimizer input_optimizer; - typedef ConvertOutputOptimizer output_optimizer; typedef uint64_t accu_type; static const size_t accu_size {8}; static const accu_type addr_mask {accu_size - 1}; @@ -585,7 +618,26 @@ namespace unicode { template inline static void append(const input_value_type* addr, output_string_type& s) { - output_optimizer::template append(addr, s); + if constexpr(sizeof(input_value_type) == sizeof(typename output_string_type::value_type)) { + s.append(reinterpret_cast(addr), accu_size / sizeof(input_value_type)); + } else if constexpr(sizeof(input_value_type) == 1) { + s.append({static_cast(addr[0]), + static_cast(addr[1]), + static_cast(addr[2]), + static_cast(addr[3]), + static_cast(addr[4]), + static_cast(addr[5]), + static_cast(addr[6]), + static_cast(addr[7])}); + } else if constexpr(sizeof(input_value_type) == 2) { + s.append({static_cast(addr[0]), + static_cast(addr[1]), + static_cast(addr[2]), + static_cast(addr[3])}); + } else if constexpr(sizeof(input_value_type) == 4) { + s.append({static_cast(addr[0]), + static_cast(addr[1])}); + } } }; @@ -595,12 +647,9 @@ namespace unicode { { typename To::string_type result; - if constexpr(sizeof(typename From::string_type::value_type) == 1 && - sizeof(typename To::value_type) == 1 && - sizeof(size_t) >= 8) { + if constexpr(sizeof(size_t) == 4 || sizeof(size_t) == 8) { typedef ConvertInputOptimizer input_optimizer; - typedef ConvertOutputOptimizer output_optimizer; - typedef ArchitectureOptimizer arch_optimizer; + typedef ArchitectureOptimizer arch_optimizer; auto begin{From::begin(s)}; auto end{From::end(s)}; @@ -612,7 +661,7 @@ namespace unicode { typename arch_optimizer::accu_type data{*addr}; if ((data & arch_optimizer::ascii_mask) == arch_optimizer::ascii_value) { arch_optimizer::template append(reinterpret_cast(addr), result); - begin += arch_optimizer::accu_size; + begin += arch_optimizer::accu_size / sizeof(typename From::string_type::value_type); ++addr; } else { // just advance one code unit for now diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index d638cbb..a30be70 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -1,11 +1,11 @@ #define BOOST_TEST_MODULE unicode_test +#include #include #include #include #include - -#include +#include #include #include @@ -24,6 +24,7 @@ #include using namespace std::chrono_literals; +using namespace std::string_literals; typedef std::tuple, std::basic_string, std::basic_string> types_collection_type; @@ -442,6 +443,27 @@ namespace { } } +class CPUTimer +{ +public: + CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now()) + { + } + + ~CPUTimer() + { + auto elapsed_cpu{mCPUTimer.elapsed()}; + std::cout << mName << ": " << std::chrono::duration(std::chrono::steady_clock::now() - mWallTime0).count() << + "s (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << + "s CPU)" << std::endl; + } + +private: + std::string mName; + std::chrono::time_point mWallTime0; + boost::timer::cpu_timer mCPUTimer; +}; + template void test_random_valid(random_context& rc, size_t length, const std::string& description) { @@ -466,33 +488,24 @@ void test_random_valid(random_context& rc, size_t length, const std::string& des } { - auto t0{std::chrono::steady_clock::now()}; + CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) + + " "s + description + + " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) + + " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8)); for (const auto& i: list) To result{unicode::convert::Facet,typename unicode::Encoding::Facet>(i)}; - std::cout << "Performance test for converting " << list.size() << - " " << description << - " from UTF-" << (sizeof(typename From::value_type) * 8) << - " to UTF-" << (sizeof(typename To::value_type) * 8) << ": " << - std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << "s" << - std::endl; } { - auto t0{std::chrono::steady_clock::now()}; + CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf"); for (const auto& i: list) To result{boost::locale::conv::utf_to_utf(i)}; - std::cout << " -> Compare to boost::locale::conv::utf_to_utf: " << - std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << "s" << - std::endl; } { - auto t0{std::chrono::steady_clock::now()}; + CPUTimer timer(" -> Compare to std::wstring_convert"); for (const auto& i: list) To result{std_convert(i)}; - std::cout << " -> Compare to std::wstring_convert: " << - std::chrono::duration(std::chrono::steady_clock::now() - t0).count() << "s" << - std::endl; } // iterate over remaining To types @@ -615,6 +628,10 @@ BOOST_AUTO_TEST_CASE(string_u8string) a = std::string{b.begin(), b.end()}; BOOST_CHECK(a == std::string{"\xc3\xa4"}); + + BOOST_CHECK(sizeof(size_t) == 4 || sizeof(size_t) == 8); + + std::cout << "Detected CPU Accu size: " << (sizeof(size_t) * 8) << std::endl; } // check environment: demonstrate how boost convert u8->u8 throws exception on invalid input -- cgit v1.2.3