summaryrefslogtreecommitdiffhomepage
path: root/include/unicode/validation.h
diff options
context:
space:
mode:
authorRoland Reichwein <mail@reichwein.it>2022-01-03 16:08:38 +0100
committerRoland Reichwein <mail@reichwein.it>2022-01-03 16:08:38 +0100
commitd234c1ca09af512e9a13579a6fff8d5834d7b36c (patch)
treed36c82ff54ddf04b83e3d5154bc32278f16fcf09 /include/unicode/validation.h
parentec9c8e682d615cd2b51ea0fec05273ed4dcad50a (diff)
Separated out remaining functions from unicode.h, documentation
Diffstat (limited to 'include/unicode/validation.h')
-rw-r--r--include/unicode/validation.h78
1 files changed, 78 insertions, 0 deletions
diff --git a/include/unicode/validation.h b/include/unicode/validation.h
new file mode 100644
index 0000000..b5060c4
--- /dev/null
+++ b/include/unicode/validation.h
@@ -0,0 +1,78 @@
+//
+// Reichwein.IT Unicode Library
+//
+// Functions for validation of UTF (Unicode Transformation Format) encodings
+//
+
+#pragma once
+
+#include "unicode/endian.h"
+#include "unicode/iso.h"
+#include "unicode/optimization.h"
+#include "unicode/predicate.h"
+#include "unicode/types.h"
+#include "unicode/type_traits.h"
+#include "unicode/utf.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+namespace unicode {
+
+ // First variant of is_valid_utf(): Specification of encoding explicitly
+ //
+ // e.g.
+ // unicode::UTF_8
+ // unicode::UTF_16
+ // unicode::UTF_32
+ //
+ // see also type_traits.h and utf.h
+ template<typename Encoding, std::enable_if_t<is_encoding_v<Encoding>, bool> = true>
+ bool is_valid_utf(const typename Encoding::string_type& s)
+ {
+ return validate_utf<typename Encoding::value_type>(s);
+ }
+
+ // Second variant of is_valid_utf(): Specification of encoding via character type
+ //
+ // see also type_traits.h for is_char
+ template<typename T,
+ typename Container=std::basic_string<T>,
+ std::enable_if_t<is_char_v<T>, bool> = true>
+ bool is_valid_utf(const Container& s)
+ {
+ typedef UTF<utf_iterator<T>, utf_back_insert_iterator<T>> UTF_Trait;
+
+ try {
+ std::for_each(UTF_Trait::begin(s), UTF_Trait::end(s), [](const char32_t& c){});
+ } catch (const std::invalid_argument&) {
+ return false;
+ }
+ return true;
+ }
+
+ // Third variant of is_valid_utf(): Specification of encoding via container type
+ //
+ // see also type_traits.h for is_container
+ template<typename Container, std::enable_if_t<is_container_v<Container>, bool> = true>
+ bool is_valid_utf(const Container& s)
+ {
+ typedef UTF<utf_iterator<typename Container::value_type, Container>, utf_back_insert_iterator<typename Container::value_type, Container>> UTF_Trait;
+
+ try {
+ std::for_each(UTF_Trait::begin(s), UTF_Trait::end(s), [](const char32_t& c){});
+ } catch (const std::invalid_argument&) {
+ return false;
+ }
+ return true;
+ }
+
+} // namespace unicode
+