summaryrefslogtreecommitdiffhomepage
path: root/include/unicode.h
blob: b90ed15a64d840767d548accee4494973c755987 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// libunicode
// Copyright (C) 2021 Roland Reichwein

#pragma once

#include <algorithm>
#include <string>

namespace {

 struct utf8_iterator
 {
  typedef char32_t value_type;
  typedef char32_t& reference;

  void get_value()
  {
   // TODO: set value to current data in *iterator ...
   value = 'X';
  }

  size_t get_number_of_utf8_bytes()
  {
   // TODO: how many bytes
   return 1;
  }

  // pre-increment
  utf8_iterator& operator++()
  {
   iterator += get_number_of_utf8_bytes();
   return *this;
  }

  bool operator!=(const utf8_iterator& other) const
  {
   return iterator != other.iterator;
  }

  reference operator*()
  {
   get_value();
   return value;
  }

  std::u8string::const_iterator iterator;

  std::u8string::const_iterator end_iterator;
  value_type value{};
 };

 struct utf16_back_insert_iterator
 {
  typedef utf16_back_insert_iterator& reference;

  utf16_back_insert_iterator(std::u16string& s): s(s) {}

  // no-op
  utf16_back_insert_iterator& operator++()
  {
   return *this;
  }

  // support *x = value, together with operator=()
  reference operator*()
  {
   return *this;
  }

  // append utf-16 word sequence
  reference operator=(const char32_t& value)
  {
   s.push_back(0); // TODO
   return *this;
  }

  std::u16string& s;
 };

 utf16_back_insert_iterator utf16_back_inserter(std::u16string& s)
 {
  return utf16_back_insert_iterator(s);
 }

 utf8_iterator utf8_begin(const std::u8string& s)
 {
  return utf8_iterator{s.cbegin(), s.cend()};
 }

 utf8_iterator utf8_end(const std::u8string& s)
 {
  return utf8_iterator{s.cend(), s.cend()};
 }

} // namespace

namespace unicode {

// returns number of bytes in UTF-8 byte sequence of first found code point,
// if found. 0 if none found or sequence empty.
//size_t utf8_start()
//{
//}

std::u16string utf8_to_utf16(const std::u8string& s)
{
 std::u16string result;

 std::copy(utf8_begin(s), utf8_end(s), utf16_back_inserter(result));

 return result;
}

//std::u8string utf16_to_utf8(const std::u16string& s)
//{
// std::u8string result;
//
// std::transform(utf16_begin(s), utf16_end(s), std::back_inserter(result));
//
// return result;
//}

} // namespace unicode