summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-12-15 11:01:37 +0100
committerRoland Reichwein <mail@reichwein.it>2022-12-15 11:01:37 +0100
commit55d1d3612141ef1fe858b2bccb950da51cfe7a17 (patch)
tree50cc3a90dd5d3eea8e4f1020da165e2bc31e0556
parent53e50b9fd7f051d039e40f77fa00498dd9a8e2b0 (diff)
Bugfix ValidationHEADv1.10master
-rw-r--r--Makefile12
-rw-r--r--debian/changelog6
-rw-r--r--include/unicode/utf.h8
-rw-r--r--src/test-unicode.cpp9
4 files changed, 31 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index 75e9bc5..98364d1 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,7 @@ ifeq ($(DEBIANVERSION),10)
CXX=g++-8
else
+ifeq ($(wildcard $(shell which clang++-14)),)
ifeq ($(wildcard $(shell which clang++-13)),)
ifeq ($(wildcard $(shell which clang++-12)),)
ifeq ($(wildcard $(shell which clang++-11)),)
@@ -31,6 +32,9 @@ endif
else
CXX=clang++-13
endif
+else
+CXX=clang++-14
+endif
endif
@@ -41,6 +45,14 @@ ifeq ($(CXXFLAGS),)
CXXFLAGS=-O2 -DNDEBUG
endif
+ifeq ($(CXX),clang++-14)
+ifeq ($(ONDEBIAN),yes)
+COMPILER_SUITE=clang
+LIBS+=-fuse-ld=lld-14
+STANDARD=c++20
+endif
+endif
+
ifeq ($(CXX),clang++-13)
ifeq ($(ONDEBIAN),yes)
COMPILER_SUITE=clang
diff --git a/debian/changelog b/debian/changelog
index e886d47..114bd57 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+unicode (1.10) unstable; urgency=medium
+
+ * Validation bugfix, tests
+
+ -- Roland Reichwein <mail@reichwein.it> Thu, 15 Dec 2022 10:54:15 +0100
+
unicode (1.9) unstable; urgency=medium
* Optimizations for validation
diff --git a/include/unicode/utf.h b/include/unicode/utf.h
index 691d4ba..0738242 100644
--- a/include/unicode/utf.h
+++ b/include/unicode/utf.h
@@ -58,13 +58,13 @@ namespace unicode::detail {
while (i < size) {
if (is_utf8_sequence(s[i])) {
i++;
- } else if ((i < size - 1) && is_utf8_sequence(s[i], s[i + 1])) {
+ } else if ((i + 1 < size) && is_utf8_sequence(s[i], s[i + 1])) {
i += 2;
- } else if ((i < size - 2) && is_utf8_sequence(s[i], s[i + 1], s[i + 2])) {
+ } else if ((i + 2 < size) && is_utf8_sequence(s[i], s[i + 1], s[i + 2])) {
if (((s[i] & 0xF) == 0xD) && ((s[i + 1] & 0x20) == 0x20))
return false; // Reserved for UTF-16 surrogates: 0xD800..0xDFFF
i += 3;
- } else if ((i < size - 3) && is_utf8_sequence(s[i], s[i + 1], s[i + 2], s[i + 3])) {
+ } else if ((i + 3 < size) && is_utf8_sequence(s[i], s[i + 1], s[i + 2], s[i + 3])) {
if ((((s[i] & 7) << 2) | ((s[i + 1] >> 4) & 3)) >= 0x11)
return false; // Unicode too big above 0x10FFFF
i += 4;
@@ -101,7 +101,7 @@ namespace unicode::detail {
while (i < size) {
if (is_utf16_sequence(s[i])) {
i++;
- } else if ((i < size - 1) && is_utf16_sequence(s[i], s[i + 1])) {
+ } else if ((i + 1 < size) && is_utf16_sequence(s[i], s[i + 1])) {
i += 2;
} else
#if __cplusplus >= 202002L
diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp
index 948dbcc..0b5ced7 100644
--- a/src/test-unicode.cpp
+++ b/src/test-unicode.cpp
@@ -141,6 +141,15 @@ std::vector<std::basic_string<utf8_t>> failure_strings_char8_t {
(utf8_t*)"text1\xc3\xc3\xa4text3",
(utf8_t*)"text1\xc3text2\xc3\xa4",
+ (utf8_t*)"\xff",
+ (utf8_t*)"\xff\xff",
+ (utf8_t*)"\xff\xff\xff",
+ (utf8_t*)"\xff\xff\xff\xff",
+ (utf8_t*)"\xff\xff\xff\xff\xff",
+ (utf8_t*)"\xff\xff\xff\xff\xff\xff",
+ (utf8_t*)"\xff\xff\xff\xff\xff\xff\xff",
+ (utf8_t*)"\xff\xff\xff\xff\xff\xff\xff\xff",
+
(utf8_t*)"\xF8\x80\x80\x80\x80", // overlong encoding of valid code point
(utf8_t*)"text1\xF8\x80\x80\x80\x80text2",
(utf8_t*)"\xF8\x80\x80\x80\x80text2",