summaryrefslogtreecommitdiffhomepage
path: root/src/test-unicode.cpp
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-01-27 22:21:04 +0100
committerRoland Reichwein <mail@reichwein.it>2021-01-27 22:21:04 +0100
commitcd4fad54c0be9fb7fca57e8e03228b8b649b5b51 (patch)
tree6b688a27597791bfea60d533f985061f1e6f9e06 /src/test-unicode.cpp
parentfad8b697dff7c7b47f034124ea6eef25e74bd7af (diff)
Bugfixes, tests
Diffstat (limited to 'src/test-unicode.cpp')
-rw-r--r--src/test-unicode.cpp129
1 files changed, 115 insertions, 14 deletions
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 0560c1b..2cc8393 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -1,17 +1,83 @@
#define BOOST_TEST_MODULE unicode_test
#include <boost/test/included/unit_test.hpp>
+#include <boost/test/data/dataset.hpp>
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <exception>
#include <string>
#include <tuple>
#include <type_traits>
+#include <vector>
#include <unicode.h>
-std::tuple<std::basic_string<char8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> t {
- u8"Täst", u"Täst", U"Täst"
+typedef std::tuple<std::basic_string<char8_t>, std::basic_string<char16_t>, std::basic_string<char32_t>> types_collection_type;
+
+// create tuple of the same string, in UTF-8, UTF-16 and UTF-32
+#define SUCCESS_TUPLE(x) {u8 ## x, u ## x, U ## x}
+
+// Success cases: convert string to all other types, respectively
+std::vector<types_collection_type> success_sets {
+ SUCCESS_TUPLE(""),
+ SUCCESS_TUPLE("ASCII string1"),
+ SUCCESS_TUPLE("Täst just looks like German"),
+ SUCCESS_TUPLE("\u732b is chinese for cat"),
+ SUCCESS_TUPLE("\U0001F63A"),
+ SUCCESS_TUPLE("\U0001F63A is a smiling cat"),
+};
+
+// Error cases: throwing upon convert to all other types
+std::vector<std::basic_string<char8_t>> failure_strings_char8_t {
+ u8"\x80",
+ u8"\x81"
+};
+
+std::vector<std::basic_string<char16_t>> failure_strings_char16_t {
+ u"\xD801",
+};
+
+std::vector<std::basic_string<char32_t>> failure_strings_char32_t {
+ U"\xD801",
+ U"\x10000000",
};
+// output operators must be in same namespace as the type itself
+namespace std {
+
+std::ostream& operator<<(std::ostream& os, std::basic_string<char8_t> const& s)
+{
+ os << "[";
+ for (auto& c: s)
+ os << " " << std::to_string(static_cast<uint8_t>(c));
+ os << "]";
+
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s)
+{
+ os << "[";
+ for (auto& c: s)
+ os << " " << std::to_string(static_cast<uint16_t>(c));
+ os << "]";
+
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, std::basic_string<char32_t> const& s)
+{
+ os << "[";
+ for (auto& c: s)
+ os << " " << std::to_string(static_cast<uint32_t>(c));
+ os << "]";
+
+ return os;
+}
+
+}
+
template<size_t i = 0, size_t j = 0, typename... Ts>
void test_utf_to_utf(std::tuple<Ts...>& t)
{
@@ -21,7 +87,7 @@ void test_utf_to_utf(std::tuple<Ts...>& t)
// test
To result { unicode::utf_to_utf<typename From::value_type, typename To::value_type>(std::get<i>(t)) };
- BOOST_CHECK(std::get<j>(t) == result);
+ BOOST_CHECK_MESSAGE(std::get<j>(t) == result, "From " << typeid(From).name() << "(" << i << ", " << std::get<i>(t) << ") to " << typeid(To).name() << "(" << j << ", " << std::get<j>(t) << "), got " << result);
//std::cout << std::to_string(std::tuple_size<typename std::remove_reference<decltype(t)>::type>::value) << "," << std::to_string(i) << "," << std::to_string(j) << std::endl;
@@ -32,27 +98,62 @@ void test_utf_to_utf(std::tuple<Ts...>& t)
test_utf_to_utf<0, j + 1>(t);
}
-BOOST_AUTO_TEST_CASE(utf_to_utf)
+// We don't use BOOST_DATA_TEST_CASE here because boost::test tries to assign
+// a new variable to each tuple element which we don't want
+// https://lists.boost.org/boost-bugs/2016/05/45214.php
+
+BOOST_AUTO_TEST_CASE(utf_to_utf_success)
{
- test_utf_to_utf(t);
+ for (auto& t: success_sets)
+ test_utf_to_utf(t);
}
-BOOST_AUTO_TEST_CASE(utf8_to_utf16)
+// iterate over std::tuple T types
+template<typename From, typename Collection, size_t index = 0>
+void test_utf_to_utf_failure(std::basic_string<From>& s)
{
- std::u8string u8{u8"ascii string1"};
-
- std::u16string u16{unicode::utf_to_utf<char8_t, char16_t>(u8)};
+ typedef typename std::tuple_element<index, Collection>::type::value_type To;
- BOOST_CHECK(u16 == u"ascii string1");
+ try {
+ unicode::utf_to_utf<From,To>(s);
+ BOOST_FAIL("Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name());
+ } catch (...) {
+ // OK
+ };
+
+ // iterate over remaining types
+ if constexpr (index + 1 < std::tuple_size<Collection>::value)
+ test_utf_to_utf_failure<From, Collection, index + 1>(s);
}
-BOOST_AUTO_TEST_CASE(utf16_to_utf8)
+BOOST_AUTO_TEST_CASE(utf_to_utf_failure)
{
- std::u16string u16{u"ascii string1"};
+ for (auto& s: failure_strings_char8_t)
+ test_utf_to_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
- std::u8string u8{unicode::utf_to_utf<char16_t, char8_t>(u16)};
+ for (auto& s: failure_strings_char16_t)
+ test_utf_to_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
+
+ for (auto& s: failure_strings_char32_t)
+ test_utf_to_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
+}
+
+BOOST_AUTO_TEST_CASE(is_valid_unicode)
+{
+ BOOST_CHECK(unicode::is_valid_unicode('\0'));
+ BOOST_CHECK(unicode::is_valid_unicode(U'a'));
+ BOOST_CHECK(unicode::is_valid_unicode(U'ä'));
+ BOOST_CHECK(unicode::is_valid_unicode(U'\u732b')); // cat chinese
+ BOOST_CHECK(unicode::is_valid_unicode(U'\U0001F63A')); // cat chinese
+ BOOST_CHECK(unicode::is_valid_unicode(0x0001F63A)); // cat smiley
- BOOST_CHECK(u8 == u8"ascii string1");
+ BOOST_CHECK(!unicode::is_valid_unicode(0x00110000));
+ BOOST_CHECK(!unicode::is_valid_unicode(0xFFFFFFFF)); // U"\UFFFFFFFF" is invalid C++
+ BOOST_CHECK(!unicode::is_valid_unicode(0x01234567));
+ BOOST_CHECK(!unicode::is_valid_unicode(0x12345678));
+ BOOST_CHECK(!unicode::is_valid_unicode(0xD800));
+ BOOST_CHECK(!unicode::is_valid_unicode(0xD987));
+ BOOST_CHECK(!unicode::is_valid_unicode(0xDFFF));
}
// TODO: