summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2021-01-30 19:20:15 +0100
committerRoland Reichwein <mail@reichwein.it>2021-01-30 19:20:15 +0100
commit2ef9f51df48b14556e236d14213233e1bd7f829a (patch)
treef5230e326501ebaec1baca9615dc69b7538f1911
parentee9dfd30e8c02e9a7457385786218d2b4b935720 (diff)
Added Support for Debian and Ubuntu, add is_valid_utf()
-rw-r--r--Makefile20
-rw-r--r--debian/control3
-rw-r--r--include/unicode.h11
-rw-r--r--src/test-unicode.cpp50
4 files changed, 80 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index 46626e8..b66c17e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,22 +1,38 @@
PROJECTNAME=unicode
VERSION=$(shell dpkg-parsechangelog --show-field Version)
-DISTROS=base #debian10
+DISTROS=base debian10 ubuntu2004 ubuntu2010
+ifeq ($(wildcard $(shell which clang++-11)),)
+ifeq ($(wildcard $(shell which clang++)),)
+$(error No clang++-11 nor clang++ available!)
+else
+CXX=clang++
+endif
+else
CXX=clang++-11
# GCC is buggy: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282
#CXX=g++-10
+endif
STANDARD=c++17
#STANDARD=c++20
+ifeq ($(CXXFLAGS),)
CXXFLAGS=-O0 -g -D_DEBUG
#CXXFLAGS=-O2 -DNDEBUG
+endif
CXXFLAGS+=-Wall -Iinclude -std=$(STANDARD)
ifeq ($(CXX),clang++-11)
COMPILER_SUITE=clang
+LIBS+=-fuse-ld=lld-11
+endif
+
+ifeq ($(CXX),clang++)
+COMPILER_SUITE=clang
+LIBS+=-fuse-ld=lld
endif
ifeq ($(COMPILER_SUITE),clang)
@@ -31,7 +47,6 @@ LDLIBS+=\
ifeq ($(COMPILER_SUITE),clang)
LIBS+= \
--fuse-ld=lld-11 \
-lc++ \
-lc++abi
#-lc++fs
@@ -68,6 +83,7 @@ dep: $(SRC:.cpp=.d)
clean:
-rm -f src/recode src/test-unicode
+ -rm -rf result
-find . -name '*.o' -o -name '*.d' -o -name '*.gcno' -o -name '*.gcda' | xargs rm -f
install:
diff --git a/debian/control b/debian/control
index 42b6c22..1572512 100644
--- a/debian/control
+++ b/debian/control
@@ -16,3 +16,6 @@ Description: Unicode conversion library
.
Features:
- Additional support for ISO-8859-1 encoding (Latin-1) as subset of Unicode
+ - Additional support for ISO-8859-15
+ - Tested on Debian 10, Ubuntu 2004, Ubuntu 2010
+ - C++17 and C++20 compatible
diff --git a/include/unicode.h b/include/unicode.h
index 9e0132b..f31cbac 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -339,5 +339,16 @@ namespace unicode {
return result;
}
+ template<typename T>
+ bool is_valid_utf(const std::basic_string<T>& s)
+ {
+ try {
+ std::for_each(utf_begin<T>(s), utf_end<T>(s), [](const T& c){});
+ } catch(...) {
+ return false;
+ }
+ return true;
+ }
+
} // namespace unicode
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 05370c7..3d67124 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -37,7 +37,7 @@ std::vector<types_collection_type> success_sets {
std::vector<std::basic_string<utf8_t>> failure_strings_char8_t {
u8"\x80", // utf-8 continuation byte
u8"\x81", // utf-8 continuation byte
- u8"\xc3ä", // initial byte of utf-8 "ä", followed by valid utf-8 "ä"
+ u8"\xc3\xc3\xa4", // initial byte of utf-8 "ä", followed by valid utf-8 "ä"
u8"\xF8\x80\x80\x80\x80", // overlong encoding
u8"\xF7\xBF\xBF\xBF", // valid encoding of invalid code point
};
@@ -56,6 +56,7 @@ std::vector<std::basic_string<char32_t>> failure_strings_char32_t {
// output operators must be in same namespace as the type itself
namespace std {
+#ifdef __cpp_char8_t
std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s)
{
os << "[";
@@ -65,6 +66,7 @@ std::ostream& operator<<(std::ostream& os, std::basic_string<utf8_t> const& s)
return os;
}
+#endif
std::ostream& operator<<(std::ostream& os, std::basic_string<char16_t> const& s)
{
@@ -118,6 +120,27 @@ BOOST_AUTO_TEST_CASE(utf_to_utf_success)
test_utf_to_utf(t);
}
+template<size_t i = 0, typename... Ts>
+void test_is_valid_utf(std::tuple<Ts...>& t)
+{
+ typedef typename std::tuple_element<i,typename std::remove_reference<decltype(t)>::type>::type T;
+
+ // test
+ bool result { unicode::is_valid_utf<typename T::value_type>(std::get<i>(t)) };
+
+ BOOST_CHECK_MESSAGE(result == true, "is_valid_utf w/ " << typeid(T).name() << "(" << i << ", " << std::get<i>(t) << "), got " << result);
+
+ // iterate over other combinations
+ if constexpr (i + 1 < std::tuple_size<typename std::remove_reference<decltype(t)>::type>::value)
+ test_is_valid_utf<i + 1>(t);
+}
+
+BOOST_AUTO_TEST_CASE(is_valid_utf_success)
+{
+ for (auto& t: success_sets)
+ test_is_valid_utf(t);
+}
+
// iterate over std::tuple T types
template<typename From, typename Collection, size_t index = 0>
void test_utf_to_utf_failure(std::basic_string<From>& s)
@@ -126,7 +149,7 @@ void test_utf_to_utf_failure(std::basic_string<From>& s)
try {
unicode::utf_to_utf<From,To>(s);
- BOOST_FAIL("Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name());
+ BOOST_ERROR("Expected exception at index: " << index << ", " << typeid(From).name() << " -> " << typeid(To).name());
} catch (...) {
// OK
};
@@ -148,6 +171,29 @@ BOOST_AUTO_TEST_CASE(utf_to_utf_failure)
test_utf_to_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
}
+// iterate over std::tuple T types
+template<typename T, typename Collection, size_t index = 0>
+void test_is_valid_utf_failure(std::basic_string<T>& s)
+{
+ BOOST_CHECK_MESSAGE(unicode::is_valid_utf<T>(s) == false, "Expected bad UTF at index: " << index << ", " << typeid(T).name());
+
+ // iterate over remaining types
+ if constexpr (index + 1 < std::tuple_size<Collection>::value)
+ test_is_valid_utf_failure<T, Collection, index + 1>(s);
+}
+
+BOOST_AUTO_TEST_CASE(is_valid_utf_failure)
+{
+ for (auto& s: failure_strings_char8_t)
+ test_is_valid_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
+
+ for (auto& s: failure_strings_char16_t)
+ test_is_valid_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
+
+ for (auto& s: failure_strings_char32_t)
+ test_is_valid_utf_failure<typename std::remove_reference<decltype(s)>::type::value_type, types_collection_type>(s);
+}
+
BOOST_AUTO_TEST_CASE(is_valid_unicode)
{
BOOST_CHECK(unicode::is_valid_unicode('\0'));