From 55d1d3612141ef1fe858b2bccb950da51cfe7a17 Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Thu, 15 Dec 2022 11:01:37 +0100 Subject: Bugfix Validation --- Makefile | 12 ++++++++++++ debian/changelog | 6 ++++++ include/unicode/utf.h | 8 ++++---- src/test-unicode.cpp | 9 +++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 75e9bc5..98364d1 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ ifeq ($(DEBIANVERSION),10) CXX=g++-8 else +ifeq ($(wildcard $(shell which clang++-14)),) ifeq ($(wildcard $(shell which clang++-13)),) ifeq ($(wildcard $(shell which clang++-12)),) ifeq ($(wildcard $(shell which clang++-11)),) @@ -31,6 +32,9 @@ endif else CXX=clang++-13 endif +else +CXX=clang++-14 +endif endif @@ -41,6 +45,14 @@ ifeq ($(CXXFLAGS),) CXXFLAGS=-O2 -DNDEBUG endif +ifeq ($(CXX),clang++-14) +ifeq ($(ONDEBIAN),yes) +COMPILER_SUITE=clang +LIBS+=-fuse-ld=lld-14 +STANDARD=c++20 +endif +endif + ifeq ($(CXX),clang++-13) ifeq ($(ONDEBIAN),yes) COMPILER_SUITE=clang diff --git a/debian/changelog b/debian/changelog index e886d47..114bd57 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +unicode (1.10) unstable; urgency=medium + + * Validation bugfix, tests + + -- Roland Reichwein Thu, 15 Dec 2022 10:54:15 +0100 + unicode (1.9) unstable; urgency=medium * Optimizations for validation diff --git a/include/unicode/utf.h b/include/unicode/utf.h index 691d4ba..0738242 100644 --- a/include/unicode/utf.h +++ b/include/unicode/utf.h @@ -58,13 +58,13 @@ namespace unicode::detail { while (i < size) { if (is_utf8_sequence(s[i])) { i++; - } else if ((i < size - 1) && is_utf8_sequence(s[i], s[i + 1])) { + } else if ((i + 1 < size) && is_utf8_sequence(s[i], s[i + 1])) { i += 2; - } else if ((i < size - 2) && is_utf8_sequence(s[i], s[i + 1], s[i + 2])) { + } else if ((i + 2 < size) && is_utf8_sequence(s[i], s[i + 1], s[i + 2])) { if (((s[i] & 0xF) == 0xD) && ((s[i + 1] & 0x20) == 0x20)) return false; // Reserved for UTF-16 surrogates: 0xD800..0xDFFF i += 3; - } else if ((i < size - 3) && is_utf8_sequence(s[i], s[i + 1], s[i + 2], s[i + 3])) { + } else if ((i + 3 < size) && is_utf8_sequence(s[i], s[i + 1], s[i + 2], s[i + 3])) { if ((((s[i] & 7) << 2) | ((s[i + 1] >> 4) & 3)) >= 0x11) return false; // Unicode too big above 0x10FFFF i += 4; @@ -101,7 +101,7 @@ namespace unicode::detail { while (i < size) { if (is_utf16_sequence(s[i])) { i++; - } else if ((i < size - 1) && is_utf16_sequence(s[i], s[i + 1])) { + } else if ((i + 1 < size) && is_utf16_sequence(s[i], s[i + 1])) { i += 2; } else #if __cplusplus >= 202002L diff --git a/src/test-unicode.cpp b/src/test-unicode.cpp index 948dbcc..0b5ced7 100644 --- a/src/test-unicode.cpp +++ b/src/test-unicode.cpp @@ -141,6 +141,15 @@ std::vector> failure_strings_char8_t { (utf8_t*)"text1\xc3\xc3\xa4text3", (utf8_t*)"text1\xc3text2\xc3\xa4", + (utf8_t*)"\xff", + (utf8_t*)"\xff\xff", + (utf8_t*)"\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff\xff\xff", + (utf8_t*)"\xff\xff\xff\xff\xff\xff\xff\xff", + (utf8_t*)"\xF8\x80\x80\x80\x80", // overlong encoding of valid code point (utf8_t*)"text1\xF8\x80\x80\x80\x80text2", (utf8_t*)"\xF8\x80\x80\x80\x80text2", -- cgit v1.2.3