summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-12-05 20:25:51 +0100
committerRoland Reichwein <mail@reichwein.it>2021-12-05 20:25:51 +0100
commit40526eb7f247fdfc9d08c39ed3eaa97844b3c448 (patch)
treea0dcb4af00ab2027fd0b7d3f6cac38bb55b6da55
parent6dcbe207ac96fcf1a73ad6504dd7d6046d8df0cc (diff)
Fix build on C++20, testsv1.5
-rw-r--r--debian/changelog6
-rw-r--r--include/unicode.h4
-rw-r--r--src/recode.cpp21
-rw-r--r--src/test-unicode.cpp22
4 files changed, 39 insertions, 14 deletions
diff --git a/debian/changelog b/debian/changelog
index 82b8c15..b6d362d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+unicode (1.5) unstable; urgency=medium
+
+ * Fixed C++20 version and tests
+
+ -- Roland Reichwein <mail@reichwein.it> Sun, 05 Dec 2021 20:20:35 +0100
+
unicode (1.3) unstable; urgency=medium
* Include msbuild project files
diff --git a/include/unicode.h b/include/unicode.h
index 33b3199..36fc6ae 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -351,8 +351,8 @@ namespace unicode::detail {
typename utf_back_insert_iterator::string_type& s;
};
- typedef std::unordered_map<utf8_t, char32_t> iso_map_type;
- typedef std::unordered_map<char32_t, utf8_t> iso_map_type_reverse;
+ typedef std::unordered_map<iso_t, char32_t> iso_map_type;
+ typedef std::unordered_map<char32_t, iso_t> iso_map_type_reverse;
// ISO-8859-1 is lower 8-bit of Unicode, so no exceptions necessary
static inline iso_map_type iso_8859_1_map;
diff --git a/src/recode.cpp b/src/recode.cpp
index 8145fb8..89bd69b 100644
--- a/src/recode.cpp
+++ b/src/recode.cpp
@@ -4,7 +4,9 @@
#include <boost/algorithm/string/predicate.hpp>
#include <boost/endian/conversion.hpp>
+#include <boost/version.hpp>
+#include <algorithm>
#include <filesystem>
#include <functional>
#include <iostream>
@@ -58,10 +60,27 @@ std::string get_id()
return get_id(std::string{typeid(From).name()}, typeid(To).name());
}
+// workaround for broken boost::endian::endian_reverse_inplace for C++20 in boost 1.74
+template<typename T>
+void reverse_endian_inplace(T& c)
+{
+ size_t size{sizeof(T)};
+ uint8_t* p{reinterpret_cast<uint8_t*>(&c)};
+ for (int i = 0; i < size / 2; i++) {
+ std::swap(p[i], p[size - 1 - i]);
+ }
+}
+
template<typename T>
void reverse_endian(std::basic_string<T>& s)
{
- std::for_each(s.begin(), s.end(), [](T& c){boost::endian::endian_reverse_inplace(c);});
+ std::for_each(s.begin(), s.end(), [](T& c){
+#if BOOST_VERSION > 107400
+ boost::endian::endian_reverse_inplace(c);
+#else
+ reverse_endian_inplace(c);
+#endif
+ });
}
std::unordered_map<std::string, std::function<std::string(const std::string&, bool, bool)>> convert_map {};
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 9c68c59..b5b48f0 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -37,13 +37,13 @@ std::vector<types_collection_type> success_sets {
};
// Error cases: throwing upon convert to all other types
-std::vector<std::basic_string<iso_t>> failure_strings_char8_t {
+std::vector<std::basic_string<utf8_t>> failure_strings_char8_t {
// using u8"" here doesn't work on MSVC
- "\x80", // utf-8 continuation byte
- "\x81", // utf-8 continuation byte
- "\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä"
- "\xF8\x80\x80\x80\x80", // overlong encoding
- "\xF7\xBF\xBF\xBF", // valid encoding of invalid code point
+ u8"\x80", // utf-8 continuation byte
+ u8"\x81", // utf-8 continuation byte
+ u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä"
+ u8"\xF8\x80\x80\x80\x80", // overlong encoding
+ u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point
};
std::vector<std::basic_string<char16_t>> failure_strings_char16_t {
@@ -367,10 +367,10 @@ BOOST_AUTO_TEST_CASE(convert)
BOOST_CHECK((unicode::convert<unicode::UTF_8,unicode::UTF_16>(u8"abc")) == std::u16string{u"abc"});
BOOST_CHECK((unicode::convert<unicode::UTF_32,unicode::UTF_16>(U"abc")) == std::u16string{u"abc"});
- BOOST_CHECK((unicode::convert<utf8_t,char16_t>("abc")) == std::u16string{u"abc"});
+ BOOST_CHECK((unicode::convert<utf8_t,char16_t>(u8"abc")) == std::u16string{u"abc"});
BOOST_CHECK((unicode::convert<char32_t,char16_t>(U"abc")) == std::u16string{u"abc"});
- BOOST_CHECK((unicode::convert<char, char32_t>(u8"äöü")) == std::u32string{U"äöü"});
+ BOOST_CHECK((unicode::convert<char, char32_t>("äöü")) == std::u32string{U"äöü"});
#ifdef _WIN32
BOOST_CHECK(sizeof(wchar_t) == 2);
@@ -381,9 +381,9 @@ BOOST_AUTO_TEST_CASE(convert)
// For the following checks, wchar_t size and encoding is system dependent:
// Windows: UTF-16
// Linux: UTF-32
- BOOST_CHECK((unicode::convert<char, wchar_t>(u8"äöü")) == std::wstring{L"äöü"});
- BOOST_CHECK((unicode::convert<char, wchar_t>(u8"\u732b")) == std::wstring{L"\u732b"});
- BOOST_CHECK((unicode::convert<char, wchar_t>(u8"\U0001F63A")) == std::wstring{L"\U0001F63A"});
+ BOOST_CHECK((unicode::convert<char, wchar_t>("äöü")) == std::wstring{L"äöü"});
+ BOOST_CHECK((unicode::convert<char, wchar_t>("\u732b")) == std::wstring{L"\u732b"});
+ BOOST_CHECK((unicode::convert<char, wchar_t>("\U0001F63A")) == std::wstring{L"\U0001F63A"});
BOOST_CHECK((unicode::convert<wchar_t, char32_t>(L"\U0001F63A")) == std::u32string{U"\U0001F63A"});
BOOST_CHECK((unicode::convert<wchar_t, utf8_t>(L"\U0001F63A")) == std::u8string{u8"\U0001F63A"});