From d234c1ca09af512e9a13579a6fff8d5834d7b36c Mon Sep 17 00:00:00 2001 From: Roland Reichwein Date: Mon, 3 Jan 2022 16:08:38 +0100 Subject: Separated out remaining functions from unicode.h, documentation --- include/unicode/conversion.h | 113 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 include/unicode/conversion.h (limited to 'include/unicode/conversion.h') diff --git a/include/unicode/conversion.h b/include/unicode/conversion.h new file mode 100644 index 0000000..dc57084 --- /dev/null +++ b/include/unicode/conversion.h @@ -0,0 +1,113 @@ +// +// Reichwein.IT Unicode Library +// +// Functions for conversion between UTF and ISO encodings +// + +#pragma once + +#include "unicode/endian.h" +#include "unicode/iso.h" +#include "unicode/optimization.h" +#include "unicode/predicate.h" +#include "unicode/types.h" +#include "unicode/type_traits.h" +#include "unicode/utf.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace unicode { + + // First variant of convert(): Specification of encodings explicitly + // + // e.g. + // unicode::UTF_8 + // unicode::UTF_16 + // unicode::UTF_32 + // unicode::ISO_8859_1 + // unicode::ISO_8859_15 + // + // see also utf.h and iso.h + // + // From and To are Encodings + // + // throws std::invalid_argument on conversion error + template && is_encoding_v, bool> = true> + typename To::string_type convert(const typename From::string_type& s) + { + // At compile time, decide which optimization to use, with fallback to + // iterating with std::copy() + + // if input type == output type, only validate and return input, if appropriate + if constexpr(sizeof(typename From::value_type) == sizeof(typename To::value_type) && + is_utf_encoding_v && is_utf_encoding_v) { + if (validate_utf(s)) { + return s; + } else { + throw std::invalid_argument("Invalid UTF input"); + } + } else if constexpr(accu_size == 8 && is_little_endian() && is_utf_8_v && + is_utf_encoding_v && is_utf_encoding_v) { // endian specific optimization + return convert_optimized_utf(s); + } else if constexpr(accu_size == 4 || accu_size == 8) { // accu size specific optimization with speedup for 7bit input + return convert_optimized(s); + } else { + typename To::string_type result; + std::copy(From::begin(s), From::end(s), To::back_inserter(result)); + return result; + } + } + + // Second variant of convert(): Specification of encodings via character type + // + // see also type_traits.h for is_char + // + // From and To are from: utf8_t (i.e. char or char8_t (C++20)), char16_t and char32_t, char, wchar_t, uint8_t, uint16_t, uint32_t + // + // throws std::invalid_argument on conversion error + template, + typename ToContainer=std::basic_string, + std::enable_if_t && is_char_v, bool> = true> + ToContainer convert(const FromContainer& s) + { + typedef UTF, utf_back_insert_iterator> UTF_Trait; + + ToContainer result; + + std::copy(UTF_Trait::begin(s), UTF_Trait::end(s), UTF_Trait::back_inserter(result)); + + return result; + } + + // Third variant of convert(): Specification of encodings via container type + // + // see also type_traits.h for is_container + // + // From and To are containers + // + // throws std::invalid_argument on conversion error + template && is_container_v, bool> = true + > + ToContainer convert(const FromContainer& s) + { + typedef UTF, utf_back_insert_iterator> UTF_Trait; + + ToContainer result; + + std::copy(UTF_Trait::begin(s), UTF_Trait::end(s), UTF_Trait::back_inserter(result)); + + return result; + } + +} // namespace unicode + -- cgit v1.2.3