From d75cefda8a5ea08976d6bb512150d7c6891ac73e Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Tue, 4 Jan 2022 19:45:01 +0100 Subject: Performance tests on deterministic data --- src/test-performance.cpp | 80 ++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/src/test-performance.cpp b/src/test-performance.cpp index 629aadd..64535c6 100644 --- a/src/test-performance.cpp +++ b/src/test-performance.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -34,49 +33,68 @@ using namespace std::string_literals; typedef std::tuple, std::basic_string, std::basic_string> types_collection_type; -struct random_context { - random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {} - std::random_device rd; // OS random number engine to seed RNG (below) - std::mt19937 gen{rd()}; - std::uniform_int_distribution sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units - std::uniform_int_distribution code_point_distribution; -}; +// LCG for generating deterministic mixed data, see also https://arxiv.org/pdf/2001.05304.pdf +uint8_t generate_byte() +{ + static uint64_t x{1}; + const static uint32_t a{0x915f77f5}; + const static uint32_t c{12345}; + const static uint32_t m_mask{0xFFFFFFFF}; + + x = (x * a + c) & m_mask; + + return (x >> 16) & 0xFF; +} + +// max is inclusive +template +T generate_value(T max = std::numeric_limits::max()) +{ + uint64_t max_modulo{ static_cast(0x100000000ULL) - (0x100000000ULL % (max + 1))}; + + uint32_t value{}; + do { + for (int i = 0; i < sizeof(value); ++i) { + value = (value << 8) | generate_byte(); + } + } while (static_cast(value) >= max_modulo); + + return static_cast(value % (max + 1)); +} // generates valid and invalid strings of different type template -T generate_random_invalid(random_context& rc, size_t length) +T generate_string_invalid(size_t length) { - // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC - std::uniform_int_distribution code_unit{0, std::numeric_limits::max()}; // code unit value T result; - std::generate_n(std::back_inserter(result), length, [&](){return static_cast(code_unit(rc.gen));}); + std::generate_n(std::back_inserter(result), length, [&](){return generate_value();}); return result; } -char32_t generate_random_char(random_context& rc) +char32_t generate_char(char32_t max = 0x10FFFF - 0x800) { - auto result {rc.code_point_distribution(rc.gen)}; + char32_t result {generate_value(max)}; if (result >= 0xD800) result += 0x800; return static_cast(result); } -std::u32string generate_random_string(random_context& rc, size_t length) +std::u32string generate_string(char32_t max, size_t length) { std::u32string result; - std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);}); + std::generate_n(std::back_inserter(result), length, [&](){return generate_char(max);}); return result; } template -void test_random_invalid(random_context& rc, size_t length) +void test_string_invalid(size_t length) { //std::cerr << "LENGTH: " << length << std::endl; typedef typename std::tuple_element::type To; - From r {static_cast(generate_random_invalid(rc, length))}; + From r {static_cast(generate_string_invalid(length))}; // base type interface try { @@ -125,15 +143,13 @@ void test_random_invalid(random_context& rc, size_t length) // iterate over remaining To types if constexpr (i + 1 < std::tuple_size::value) - test_random_invalid(rc, length); + test_string_invalid(length); } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(sequences_invalid, T, types_collection_type) { - random_context rc; - for (int i = 0; i < 10; i++) { - test_random_invalid(rc, rc.sequence_length(rc.gen)); + test_string_invalid(generate_value(100000)); } } @@ -166,13 +182,13 @@ private: }; template -void test_random_valid(random_context& rc, size_t length, const std::string& description) +void test_string_valid(char32_t max, size_t length, const std::string& description) { typedef typename std::tuple_element::type To; // Fill UTF-32 data list: source for tests std::vector u32list; - std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));}); + std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_string(max, generate_value(100000));}); // Fill From data list std::vector list; @@ -211,20 +227,16 @@ void test_random_valid(random_context& rc, size_t length, const std::string& des // iterate over remaining To types if constexpr (index + 1 < std::tuple_size::value) - test_random_valid(rc, length, description); + test_string_valid(max, length, description); } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(sequences_valid_ascii, T, types_collection_type) { - random_context rc{127}; - - test_random_valid(rc, rc.sequence_length(rc.gen), "ASCII only strings"); + test_string_valid(127, generate_value(100000), "ASCII only strings"); } -BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type) +BOOST_AUTO_TEST_CASE_TEMPLATE(sequences_valid_all_unicode, T, types_collection_type) { - random_context rc; - - test_random_valid(rc, rc.sequence_length(rc.gen), "All Unicode strings"); + test_string_valid(0x10FFFF - 0x800, generate_value(100000), "All Unicode strings"); } -- cgit v1.2.3