summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-01-02 19:02:38 +0100
committerRoland Reichwein <mail@reichwein.it>2022-01-02 19:02:38 +0100
commitaa2edab739e3daac42cd5dbb44d10234ad880626 (patch)
treea89ff831861346ffb4ece6b0ca770c8be8315770
parentc3124ce89b53e0b1ff3b666aeee9d1829b74229c (diff)
Separated unit tests and performance tests
-rw-r--r--Makefile17
-rw-r--r--msbuild/test-performance.vcxproj91
-rw-r--r--msbuild/test-performance.vcxproj.filters17
-rw-r--r--msbuild/test-unicode.vcxproj1
-rw-r--r--msbuild/test-unicode.vcxproj.filters5
-rw-r--r--src/test-helper.cpp131
-rw-r--r--src/test-helper.h39
-rw-r--r--src/test-performance.cpp230
-rw-r--r--src/test-unicode.cpp313
9 files changed, 527 insertions, 317 deletions
diff --git a/Makefile b/Makefile
index 02498b3..3d228a9 100644
--- a/Makefile
+++ b/Makefile
@@ -83,12 +83,15 @@ SRC=\
src/file.cpp \
src/recode.cpp \
src/validate.cpp \
+ src/test-helper.cpp \
+ src/test-performance.cpp \
src/test-unicode.cpp
-all: src/recode src/test-unicode src/validate
+all: src/recode src/test-unicode src/test-performance src/validate
-test: src/test-unicode
+test: src/test-unicode src/test-performance
src/test-unicode
+ src/test-performance
src/recode: src/recode.o src/file.o dep
$(CXX) $(LDFLAGS) src/recode.o src/file.o $(LDLIBS) $(LIBS) -o $@
@@ -96,8 +99,11 @@ src/recode: src/recode.o src/file.o dep
src/validate: src/validate.o src/file.o dep
$(CXX) $(LDFLAGS) src/validate.o src/file.o $(LDLIBS) $(LIBS) -o $@
-src/test-unicode: src/test-unicode.o dep
- $(CXX) $(LDFLAGS) src/test-unicode.o $(LDLIBS) $(LIBS) -o $@
+src/test-unicode: src/test-unicode.o src/test-helper.o dep
+ $(CXX) $(LDFLAGS) src/test-unicode.o src/test-helper.o $(LDLIBS) $(LIBS) -o $@
+
+src/test-performance: src/test-performance.o src/test-helper.o dep
+ $(CXX) $(LDFLAGS) src/test-performance.o src/test-helper.o $(LDLIBS) $(LIBS) -o $@
dep: $(SRC:.cpp=.d)
@@ -108,7 +114,7 @@ dep: $(SRC:.cpp=.d)
$(CXX) $(CXXFLAGS) -c $< -o $@
clean:
- -rm -f src/recode src/test-unicode src/validate src/*.tmp
+ -rm -f src/recode src/test-unicode src/test-performance src/validate src/*.tmp
-rm -rf result
-find . -name '*.o' -o -name '*.d' -o -name '*.gcno' -o -name '*.gcda' | xargs rm -f
@@ -137,6 +143,7 @@ debs: $(DISTROS)
DISTFILES= \
$(SRC) \
src/file.h \
+ src/test-helper.h \
Makefile \
include/unicode.h \
include/unicode/endian.h \
diff --git a/msbuild/test-performance.vcxproj b/msbuild/test-performance.vcxproj
new file mode 100644
index 0000000..769bc3a
--- /dev/null
+++ b/msbuild/test-performance.vcxproj
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>16.0</VCProjectVersion>
+ <Keyword>Win32Proj</Keyword>
+ <ProjectGuid>{F1CDB899-0BD2-44F3-A364-F77D19457328}</ProjectGuid>
+ <RootNamespace>test-performance</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+ <ProjectName>test-performance</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v142</PlatformToolset>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v142</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="Shared">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <Import Project="compiler.props" />
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <SDLCheck>true</SDLCheck>
+ <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <SDLCheck>true</SDLCheck>
+ <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\src\test-helper.cpp" />
+ <ClCompile Include="..\src\test-performance.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="libunicode.vcxproj">
+ <Project>{c53e2afc-ea45-4798-be36-e959bb5c5a53}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
diff --git a/msbuild/test-performance.vcxproj.filters b/msbuild/test-performance.vcxproj.filters
new file mode 100644
index 0000000..b8488bd
--- /dev/null
+++ b/msbuild/test-performance.vcxproj.filters
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Implementation">
+ <UniqueIdentifier>{A03B3E05-0246-435D-8DE4-B3B8BB72CD12}</UniqueIdentifier>
+ <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\src\test-helper.cpp">
+ <Filter>Implementation</Filter>
+ </ClCompile>
+ <ClCompile Include="..\src\test-performance.cpp">
+ <Filter>Implementation</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project>
diff --git a/msbuild/test-unicode.vcxproj b/msbuild/test-unicode.vcxproj
index 144635e..3cd66aa 100644
--- a/msbuild/test-unicode.vcxproj
+++ b/msbuild/test-unicode.vcxproj
@@ -77,6 +77,7 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
+ <ClCompile Include="..\src\test-helper.cpp" />
<ClCompile Include="..\src\test-unicode.cpp" />
</ItemGroup>
<ItemGroup>
diff --git a/msbuild/test-unicode.vcxproj.filters b/msbuild/test-unicode.vcxproj.filters
index ac23eb8..7d53cda 100644
--- a/msbuild/test-unicode.vcxproj.filters
+++ b/msbuild/test-unicode.vcxproj.filters
@@ -7,8 +7,11 @@
</Filter>
</ItemGroup>
<ItemGroup>
+ <ClCompile Include="..\src\test-helper.cpp">
+ <Filter>Implementation</Filter>
+ </ClCompile>
<ClCompile Include="..\src\test-unicode.cpp">
<Filter>Implementation</Filter>
</ClCompile>
</ItemGroup>
-</Project> \ No newline at end of file
+</Project>
diff --git a/src/test-helper.cpp b/src/test-helper.cpp
new file mode 100644
index 0000000..d441895
--- /dev/null
+++ b/src/test-helper.cpp
@@ -0,0 +1,131 @@
+#include "test-helper.h"
+
+#include <array>
+#include <chrono>
+#include <codecvt>
+#include <deque>
+#include <exception>
+#include <limits>
+#include <list>
+#include <locale>
+#include <ostream>
+#include <random>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+// output operators must be in same namespace as the type itself
+namespace std {
+
+#ifdef __cpp_char8_t
+ std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s)
+ {
+ os << "[";
+ for (auto& c: s)
+ os << " " << std::to_string(static_cast<uint8_t>(c));
+ os << "]";
+
+ return os;
+ }
+#endif
+
+ std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s)
+ {
+ os << "[";
+ for (auto& c: s)
+ os << " " << std::to_string(static_cast<uint16_t>(c));
+ os << "]";
+
+ return os;
+ }
+
+ std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s)
+ {
+ os << "[";
+ for (auto& c: s)
+ os << " " << std::to_string(static_cast<uint32_t>(c));
+ os << "]";
+
+ return os;
+ }
+
+} // namespace std
+
+namespace {
+
+ // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert
+ template<class Facet>
+ struct deletable_facet : Facet
+ {
+ template<class ...Args>
+ deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {}
+ ~deletable_facet() {}
+ };
+
+ // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?)
+ std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16;
+ std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32;
+
+} // namespace
+
+template<>
+std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s)
+{
+ std::string a{s.begin(), s.end()};
+ a = conv32.to_bytes(conv32.from_bytes(a));
+ return std::basic_string<utf8_t>{a.begin(), a.end()};
+}
+
+template<>
+std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s)
+{
+ std::string a{s.begin(), s.end()};
+ return conv16.from_bytes(a);
+}
+
+template<>
+std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s)
+{
+ std::string a{s.begin(), s.end()};
+ return conv32.from_bytes(a);
+}
+
+template<>
+std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s)
+{
+ auto result{conv16.to_bytes(s)};
+ return std::basic_string<utf8_t>(result.begin(), result.end());
+}
+
+template<>
+std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s)
+{
+ return conv16.from_bytes(conv16.to_bytes(s));
+}
+
+template<>
+std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s)
+{
+ return conv32.from_bytes(conv16.to_bytes(s));
+}
+
+template<>
+std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s)
+{
+ auto result{conv32.to_bytes(s)};
+ return std::basic_string<utf8_t>(result.begin(), result.end());
+}
+
+template<>
+std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s)
+{
+ return conv16.from_bytes(conv32.to_bytes(s));
+}
+
+template<>
+std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s)
+{
+ return conv32.from_bytes(conv32.to_bytes(s));
+}
+
diff --git a/src/test-helper.h b/src/test-helper.h
new file mode 100644
index 0000000..c3e2900
--- /dev/null
+++ b/src/test-helper.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <unicode.h>
+
+#include <string>
+
+// output operators must be in same namespace as the type itself
+namespace std {
+
+#ifdef __cpp_char8_t
+ std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s);
+#endif
+
+ std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s);
+ std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s);
+
+} // namespace std
+
+template<typename From, typename To>
+std::basic_string<To> std_convert(const std::basic_string<From>& s);
+
+template<>
+std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s);
+template<>
+std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s);
+template<>
+std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s);
+template<>
+std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s);
+template<>
+std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s);
+template<>
+std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s);
+template<>
+std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s);
+template<>
+std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s);
+template<>
+std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s);
diff --git a/src/test-performance.cpp b/src/test-performance.cpp
new file mode 100644
index 0000000..629aadd
--- /dev/null
+++ b/src/test-performance.cpp
@@ -0,0 +1,230 @@
+#define BOOST_TEST_MODULE unicode_test
+
+#include <boost/locale.hpp>
+#include <boost/test/included/unit_test.hpp>
+#include <boost/test/data/dataset.hpp>
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/version.hpp>
+#if BOOST_VERSION > 106700
+// CPU Timer in Debian 10 boost is broken, so leave it to std::chrono wall clock
+#include <boost/timer/timer.hpp>
+#endif
+
+#include <array>
+#include <chrono>
+#include <codecvt>
+#include <deque>
+#include <exception>
+#include <limits>
+#include <list>
+#include <locale>
+#include <random>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+#include <unicode.h>
+
+#include "test-helper.h"
+
+using namespace std::chrono_literals;
+using namespace std::string_literals;
+
+typedef std::tuple<std::basic_string<utf8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> types_collection_type;
+
+struct random_context {
+ random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {}
+ std::random_device rd; // OS random number engine to seed RNG (below)
+ std::mt19937 gen{rd()};
+ std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units
+ std::uniform_int_distribution<unsigned long> code_point_distribution;
+};
+
+// generates valid and invalid strings of different type
+template<typename T>
+T generate_random_invalid(random_context& rc, size_t length)
+{
+ // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC
+ std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value
+ T result;
+ std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));});
+
+ return result;
+}
+
+char32_t generate_random_char(random_context& rc)
+{
+ auto result {rc.code_point_distribution(rc.gen)};
+ if (result >= 0xD800)
+ result += 0x800;
+ return static_cast<char32_t>(result);
+}
+
+std::u32string generate_random_string(random_context& rc, size_t length)
+{
+ std::u32string result;
+ std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);});
+
+ return result;
+}
+
+template<typename From, typename ToTypesCollectionType, size_t i = 0>
+void test_random_invalid(random_context& rc, size_t length)
+{
+ //std::cerr << "LENGTH: " << length << std::endl;
+ typedef typename std::tuple_element<i,ToTypesCollectionType>::type To;
+
+ From r {static_cast<From>(generate_random_invalid<From>(rc, length))};
+
+ // base type interface
+ try {
+ To result{unicode::convert<typename From::value_type,typename To::value_type>(r)};
+
+ if (r.empty()) {
+ BOOST_CHECK(result.empty());
+ } else {
+ BOOST_CHECK(!result.empty());
+ }
+ } catch (const std::invalid_argument&) {
+ // OK: this is an expected exception for convert() on bad input
+ } catch (const std::exception& ex) {
+ BOOST_ERROR("Unexpected error on convert(): " << ex.what());
+ }
+
+ // container type interface
+ try {
+ To result{unicode::convert<From, To>(r)};
+
+ if (r.empty()) {
+ BOOST_CHECK(result.empty());
+ } else {
+ BOOST_CHECK(!result.empty());
+ }
+ } catch (const std::invalid_argument&) {
+ // OK: this is an expected exception for convert() on bad input
+ } catch (const std::exception& ex) {
+ BOOST_ERROR("Unexpected error on convert(): " << ex.what());
+ }
+
+ // encoding interface
+ try {
+ To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)};
+
+ if (r.empty()) {
+ BOOST_CHECK(result.empty());
+ } else {
+ BOOST_CHECK(!result.empty());
+ }
+ } catch (const std::invalid_argument&) {
+ // OK: this is an expected exception for convert() on bad input
+ } catch (const std::exception& ex) {
+ BOOST_ERROR("Unexpected error on convert(): " << ex.what());
+ }
+
+ // iterate over remaining To types
+ if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value)
+ test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length);
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type)
+{
+ random_context rc;
+
+ for (int i = 0; i < 10; i++) {
+ test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen));
+ }
+}
+
+class CPUTimer
+{
+public:
+ CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now())
+ {
+ }
+
+ ~CPUTimer()
+ {
+#if BOOST_VERSION > 106700
+ auto elapsed_cpu{mCPUTimer.elapsed()};
+#endif
+ std::cout << mName << ": " << std::chrono::duration<double>(std::chrono::steady_clock::now() - mWallTime0).count() <<
+ "s" <<
+#if BOOST_VERSION > 106700
+ " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" <<
+#endif
+ std::endl;
+ }
+
+private:
+ std::string mName;
+ std::chrono::time_point<std::chrono::steady_clock> mWallTime0;
+#if BOOST_VERSION > 106700
+ boost::timer::cpu_timer mCPUTimer;
+#endif
+};
+
+template<typename From, typename ToTypesCollectionType, size_t index = 0>
+void test_random_valid(random_context& rc, size_t length, const std::string& description)
+{
+ typedef typename std::tuple_element<index,ToTypesCollectionType>::type To;
+
+ // Fill UTF-32 data list: source for tests
+ std::vector<std::u32string> u32list;
+ std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));});
+
+ // Fill From data list
+ std::vector<From> list;
+ std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){
+ return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s);
+ });
+
+ for (size_t i = 0; i < list.size(); i++) {
+ BOOST_CHECK(list[i].size() >= u32list[i].size());
+ To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])};
+ BOOST_CHECK(result.size() >= u32list[i].size());
+ auto boost_result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(list[i])};
+ BOOST_CHECK_EQUAL(result, boost_result);
+ }
+
+ {
+ CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) +
+ " "s + description +
+ " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) +
+ " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8));
+ for (const auto& i: list)
+ To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(i)};
+ }
+
+ {
+ CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf");
+ for (const auto& i: list)
+ To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)};
+ }
+
+ {
+ CPUTimer timer(" -> Compare to std::wstring_convert");
+ for (const auto& i: list)
+ To result{std_convert<typename From::value_type, typename To::value_type>(i)};
+ }
+
+ // iterate over remaining To types
+ if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value)
+ test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description);
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type)
+{
+ random_context rc{127};
+
+ test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings");
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type)
+{
+ random_context rc;
+
+ test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings");
+}
+
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 6eb523e..1ea704b 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -27,6 +27,8 @@
#include <unicode.h>
+#include "test-helper.h"
+
using namespace std::chrono_literals;
using namespace std::string_literals;
@@ -95,123 +97,6 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t {
U"\x10000000", // invalid unicode (number too big)
};
-// output operators must be in same namespace as the type itself
-namespace std {
-
-#ifdef __cpp_char8_t
- std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s)
- {
- os << "[";
- for (auto& c: s)
- os << " " << std::to_string(static_cast<uint8_t>(c));
- os << "]";
-
- return os;
- }
-#endif
-
- std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s)
- {
- os << "[";
- for (auto& c: s)
- os << " " << std::to_string(static_cast<uint16_t>(c));
- os << "]";
-
- return os;
- }
-
- std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s)
- {
- os << "[";
- for (auto& c: s)
- os << " " << std::to_string(static_cast<uint32_t>(c));
- os << "]";
-
- return os;
- }
-
-} // namespace std
-
-namespace {
-
- // utility wrapper to adapt locale-bound facets for wstring/wbuffer convert
- template<class Facet>
- struct deletable_facet : Facet
- {
- template<class ...Args>
- deletable_facet(Args&& ...args) : Facet(std::forward<Args>(args)...) {}
- ~deletable_facet() {}
- };
-
- // char8_t instead of char doesn't work w/ clang++-13 + C++20 (yet?)
- std::wstring_convert<deletable_facet<std::codecvt<char16_t, char, std::mbstate_t>>, char16_t> conv16;
- std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv32;
-
- template<typename From, typename To>
- std::basic_string<To> std_convert(const std::basic_string<From>& s);
-
- template<>
- std::basic_string<utf8_t> std_convert<utf8_t, utf8_t>(const std::basic_string<utf8_t>& s)
- {
- std::string a{s.begin(), s.end()};
- a = conv32.to_bytes(conv32.from_bytes(a));
- return std::basic_string<utf8_t>{a.begin(), a.end()};
- }
-
- template<>
- std::basic_string<char16_t> std_convert<utf8_t, char16_t>(const std::basic_string<utf8_t>& s)
- {
- std::string a{s.begin(), s.end()};
- return conv16.from_bytes(a);
- }
-
- template<>
- std::basic_string<char32_t> std_convert<utf8_t, char32_t>(const std::basic_string<utf8_t>& s)
- {
- std::string a{s.begin(), s.end()};
- return conv32.from_bytes(a);
- }
-
- template<>
- std::basic_string<utf8_t> std_convert<char16_t, utf8_t>(const std::basic_string<char16_t>& s)
- {
- auto result{conv16.to_bytes(s)};
- return std::basic_string<utf8_t>(result.begin(), result.end());
- }
-
- template<>
- std::basic_string<char16_t> std_convert<char16_t, char16_t>(const std::basic_string<char16_t>& s)
- {
- return conv16.from_bytes(conv16.to_bytes(s));
- }
-
- template<>
- std::basic_string<char32_t> std_convert<char16_t, char32_t>(const std::basic_string<char16_t>& s)
- {
- return conv32.from_bytes(conv16.to_bytes(s));
- }
-
- template<>
- std::basic_string<utf8_t> std_convert<char32_t, utf8_t>(const std::basic_string<char32_t>& s)
- {
- auto result{conv32.to_bytes(s)};
- return std::basic_string<utf8_t>(result.begin(), result.end());
- }
-
- template<>
- std::basic_string<char16_t> std_convert<char32_t, char16_t>(const std::basic_string<char32_t>& s)
- {
- return conv16.from_bytes(conv32.to_bytes(s));
- }
-
- template<>
- std::basic_string<char32_t> std_convert<char32_t, char32_t>(const std::basic_string<char32_t>& s)
- {
- return conv32.from_bytes(conv32.to_bytes(s));
- }
-
-} // namespace
-
// check assumptions about environment
BOOST_AUTO_TEST_CASE(string_u8string)
{
@@ -457,200 +342,6 @@ BOOST_AUTO_TEST_CASE(is_valid_unicode)
BOOST_CHECK(!unicode::is_valid_unicode(0xDFFF));
}
-struct random_context {
- random_context(int max_value = 0x10FFFF - 0x800): code_point_distribution(0, max_value) {}
- std::random_device rd; // OS random number engine to seed RNG (below)
- std::mt19937 gen{rd()};
- std::uniform_int_distribution<size_t> sequence_length{0, 100000}; // length of sequence: 0 ... 100000 code units
- std::uniform_int_distribution<unsigned long> code_point_distribution;
-};
-
-// generates valid and invalid strings of different type
-template<typename T>
-T generate_random_invalid(random_context& rc, size_t length)
-{
- // Using unsigned long for std::uniform_int_distribution<> because it needs to be basic type according to MSVC
- std::uniform_int_distribution<unsigned long> code_unit{0, std::numeric_limits<typename T::value_type>::max()}; // code unit value
- T result;
- std::generate_n(std::back_inserter(result), length, [&](){return static_cast<typename T::value_type>(code_unit(rc.gen));});
-
- return result;
-}
-
-char32_t generate_random_char(random_context& rc)
-{
- auto result {rc.code_point_distribution(rc.gen)};
- if (result >= 0xD800)
- result += 0x800;
- return static_cast<char32_t>(result);
-}
-
-std::u32string generate_random_string(random_context& rc, size_t length)
-{
- std::u32string result;
- std::generate_n(std::back_inserter(result), length, [&](){return generate_random_char(rc);});
-
- return result;
-}
-
-template<typename From, typename ToTypesCollectionType, size_t i = 0>
-void test_random_invalid(random_context& rc, size_t length)
-{
- //std::cerr << "LENGTH: " << length << std::endl;
- typedef typename std::tuple_element<i,ToTypesCollectionType>::type To;
-
- From r {static_cast<From>(generate_random_invalid<From>(rc, length))};
-
- // base type interface
- try {
- To result{unicode::convert<typename From::value_type,typename To::value_type>(r)};
-
- if (r.empty()) {
- BOOST_CHECK(result.empty());
- } else {
- BOOST_CHECK(!result.empty());
- }
- } catch (const std::invalid_argument&) {
- // OK: this is an expected exception for convert() on bad input
- } catch (const std::exception& ex) {
- BOOST_ERROR("Unexpected error on convert(): " << ex.what());
- }
-
- // container type interface
- try {
- To result{unicode::convert<From, To>(r)};
-
- if (r.empty()) {
- BOOST_CHECK(result.empty());
- } else {
- BOOST_CHECK(!result.empty());
- }
- } catch (const std::invalid_argument&) {
- // OK: this is an expected exception for convert() on bad input
- } catch (const std::exception& ex) {
- BOOST_ERROR("Unexpected error on convert(): " << ex.what());
- }
-
- // encoding interface
- try {
- To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(r)};
-
- if (r.empty()) {
- BOOST_CHECK(result.empty());
- } else {
- BOOST_CHECK(!result.empty());
- }
- } catch (const std::invalid_argument&) {
- // OK: this is an expected exception for convert() on bad input
- } catch (const std::exception& ex) {
- BOOST_ERROR("Unexpected error on convert(): " << ex.what());
- }
-
- // iterate over remaining To types
- if constexpr (i + 1 < std::tuple_size<ToTypesCollectionType>::value)
- test_random_invalid<From, ToTypesCollectionType, i + 1>(rc, length);
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_invalid, T, types_collection_type)
-{
- random_context rc;
-
- for (int i = 0; i < 10; i++) {
- test_random_invalid<T,types_collection_type>(rc, rc.sequence_length(rc.gen));
- }
-}
-
-class CPUTimer
-{
-public:
- CPUTimer(const std::string& name = "Timer"): mName(name), mWallTime0(std::chrono::steady_clock::now())
- {
- }
-
- ~CPUTimer()
- {
-#if BOOST_VERSION > 106700
- auto elapsed_cpu{mCPUTimer.elapsed()};
-#endif
- std::cout << mName << ": " << std::chrono::duration<double>(std::chrono::steady_clock::now() - mWallTime0).count() <<
- "s" <<
-#if BOOST_VERSION > 106700
- " (" << (double(elapsed_cpu.user + elapsed_cpu.system) / 1000000000) << "s CPU)" <<
-#endif
- std::endl;
- }
-
-private:
- std::string mName;
- std::chrono::time_point<std::chrono::steady_clock> mWallTime0;
-#if BOOST_VERSION > 106700
- boost::timer::cpu_timer mCPUTimer;
-#endif
-};
-
-template<typename From, typename ToTypesCollectionType, size_t index = 0>
-void test_random_valid(random_context& rc, size_t length, const std::string& description)
-{
- typedef typename std::tuple_element<index,ToTypesCollectionType>::type To;
-
- // Fill UTF-32 data list: source for tests
- std::vector<std::u32string> u32list;
- std::generate_n(std::back_inserter(u32list), 1000, [&](){return generate_random_string(rc, rc.sequence_length(rc.gen));});
-
- // Fill From data list
- std::vector<From> list;
- std::transform(u32list.begin(), u32list.end(), std::back_inserter(list), [](const std::u32string& s){
- return unicode::convert<unicode::UTF_32, typename unicode::Encoding_t<typename From::value_type>>(s);
- });
-
- for (size_t i = 0; i < list.size(); i++) {
- BOOST_CHECK(list[i].size() >= u32list[i].size());
- To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(list[i])};
- BOOST_CHECK(result.size() >= u32list[i].size());
- auto boost_result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(list[i])};
- BOOST_CHECK_EQUAL(result, boost_result);
- }
-
- {
- CPUTimer timer("Performance test for converting "s + std::to_string(list.size()) +
- " "s + description +
- " from UTF-"s + std::to_string(sizeof(typename From::value_type) * 8) +
- " to UTF-"s + std::to_string(sizeof(typename To::value_type) * 8));
- for (const auto& i: list)
- To result{unicode::convert<typename unicode::Encoding_t<typename From::value_type>,typename unicode::Encoding_t<typename To::value_type>>(i)};
- }
-
- {
- CPUTimer timer(" -> Compare to boost::locale::conv::utf_to_utf");
- for (const auto& i: list)
- To result{boost::locale::conv::utf_to_utf<typename To::value_type, typename From::value_type>(i)};
- }
-
- {
- CPUTimer timer(" -> Compare to std::wstring_convert");
- for (const auto& i: list)
- To result{std_convert<typename From::value_type, typename To::value_type>(i)};
- }
-
- // iterate over remaining To types
- if constexpr (index + 1 < std::tuple_size<ToTypesCollectionType>::value)
- test_random_valid<From, ToTypesCollectionType, index + 1>(rc, length, description);
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_ascii, T, types_collection_type)
-{
- random_context rc{127};
-
- test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "ASCII only strings");
-}
-
-BOOST_AUTO_TEST_CASE_TEMPLATE(random_sequences_valid_all_unicode, T, types_collection_type)
-{
- random_context rc;
-
- test_random_valid<T,types_collection_type>(rc, rc.sequence_length(rc.gen), "All Unicode strings");
-}
-
// Test ISO encodings
BOOST_AUTO_TEST_CASE(convert_iso)
{