13 constexpr static auto is_start_of_one_byte_sequence(
c8 byte) ->
bool {
17 constexpr static auto is_start_of_two_byte_sequence(
c8 byte) ->
bool {
18 return byte >= 0xC2 &&
byte <= 0xDF;
21 constexpr static auto is_start_of_three_byte_sequence(
c8 byte) ->
bool {
22 return byte >= 0xE0 &&
byte <= 0xEF;
25 constexpr static auto is_start_of_four_byte_sequence(
c8 byte) ->
bool {
26 return byte >= 0xF0 &&
byte <= 0xF4;
29 constexpr static auto is_start_of_multi_byte_sequence(
c8 byte) ->
bool {
30 return is_start_of_two_byte_sequence(
byte) || is_start_of_three_byte_sequence(
byte) ||
31 is_start_of_four_byte_sequence(
byte);
34 constexpr static auto is_valid_first_byte(
c8 byte) ->
bool {
35 return is_start_of_one_byte_sequence(
byte) || is_start_of_multi_byte_sequence(
byte);
38 constexpr static auto is_valid_second_byte(
c8 first_byte,
c8 second_byte) ->
bool {
41 return second_byte >= 0xA0 && second_byte <= 0xBF;
43 return second_byte >= 0x80 && second_byte <= 0x9F;
45 return second_byte >= 0x90 && second_byte <= 0xBF;
47 return second_byte >= 0x80 && second_byte <= 0x8F;
49 return second_byte >= 0x80 && second_byte <= 0xBF;
53 constexpr static auto is_valid_third_byte([[maybe_unused]]
c8 first_byte,
c8 third_byte) ->
bool {
54 return third_byte >= 0x80 && third_byte <= 0xBF;
57 constexpr static auto is_valid_fourth_byte([[maybe_unused]]
c8 first_byte,
c8 fourth_byte) ->
bool {
58 return fourth_byte >= 0x80 && fourth_byte <= 0xBF;
61 constexpr static auto byte_sequence_length(
c8 first_byte) ->
u8 {
62 return is_start_of_one_byte_sequence(first_byte) ? 1
63 : is_start_of_two_byte_sequence(first_byte) ? 2
64 : is_start_of_three_byte_sequence(first_byte) ? 3
74 auto length = byte_sequence_length(*m_data);
75 auto first_byte_mask = 0b11111111 >> length;
76 auto result =
static_cast<c32>(*m_data & first_byte_mask);
79 result |= m_data[i] & 0b00111111;
84 constexpr void advance_one() { m_data += byte_sequence_length(*m_data); }
88 }
while (!is_valid_first_byte(*m_data));
91 constexpr auto data() const ->
c8 const* {
return m_data; }
93 constexpr explicit operator c8 const*()
const {
return data(); }
97 return a.data() == b.data();
103 c8 const* m_data {
nullptr };
114 template<
typename =
void>
117 while (i <
data.size()) {
118 auto first_byte =
data.data()[i];
119 if (!utf8::is_valid_first_byte(first_byte)) {
122 auto length = utf8::byte_sequence_length(first_byte);
123 if (i + length >
data.size()) {
128 if (!utf8::is_valid_fourth_byte(first_byte,
data.data()[i + 3])) {
133 if (!utf8::is_valid_third_byte(first_byte,
data.data()[i + 2])) {
138 if (!utf8::is_valid_second_byte(first_byte,
data.data()[i + 1])) {
151 size_t offset) ->
bool {
153 if (offset >=
data.size()) {
154 return offset ==
data.size();
156 return utf8::is_valid_first_byte(
data[offset]);
161 auto code_point_value =
static_cast<u32>(code_point);
162 if (code_point_value <= 0x7F) {
163 (void) result.resize(1);
164 result[0] = code_point_value;
165 }
else if (code_point_value <= 0x7FF) {
166 (void) result.resize(2);
167 result[0] = 0b11000000 | (code_point_value >> 6);
168 result[1] = 0b10000000 | (code_point_value & 0x3F);
169 }
else if (code_point_value <= 0xFFFF) {
170 (void) result.resize(3);
171 result[0] = 0b11100000 | (code_point_value >> 12);
172 result[1] = 0b10000000 | ((code_point_value >> 6) & 0x3F);
173 result[2] = 0b10000000 | (code_point_value & 0x3F);
175 (void) result.resize(4);
176 result[0] = 0b11110000 | (code_point_value >> 18);
177 result[1] = 0b10000000 | ((code_point_value >> 12) & 0x3F);
178 result[2] = 0b10000000 | ((code_point_value >> 6) & 0x3F);
179 result[3] = 0b10000000 | (code_point_value & 0x3F);
Definition static_vector.h:17
Definition utf8_encoding.h:107
utf8::Utf8Iterator Iterator
Definition utf8_encoding.h:111
constexpr friend auto tag_invoke(types::Tag< encoding::convert_to_code_units >, Utf8Encoding const &, c32 code_point)
Definition utf8_encoding.h:159
constexpr friend auto tag_invoke(types::Tag< encoding::validate >, Utf8Encoding const &, Span< c8 const > data) -> bool
Definition utf8_encoding.h:115
constexpr friend auto tag_invoke(types::Tag< encoding::valid_byte_offset >, Utf8Encoding const &, Span< c8 const > data, size_t offset) -> bool
Definition utf8_encoding.h:150
c8 CodeUnit
Definition utf8_encoding.h:109
c32 CodePoint
Definition utf8_encoding.h:110
Definition utf8_encoding.h:68
constexpr void advance_one()
Definition utf8_encoding.h:84
constexpr void back_one()
Definition utf8_encoding.h:85
constexpr auto operator*() const -> c32
Definition utf8_encoding.h:73
constexpr friend auto operator==(Utf8Iterator const &a, Utf8Iterator const &b) -> bool
Definition utf8_encoding.h:96
constexpr auto data() const -> c8 const *
Definition utf8_encoding.h:91
constexpr friend auto operator<=>(Utf8Iterator const &a, Utf8Iterator const &b)
Definition utf8_encoding.h:99
constexpr Utf8Iterator(c8 const *data)
Definition utf8_encoding.h:71
Definition span_forward_declaration.h:10
Definition utf8_encoding.h:12
Definition constant_string_interface.h:31
constexpr auto data(concepts::detail::ConstantString auto const &string)
Definition string_data.h:6
constexpr auto range
Definition range.h:22
char8_t c8
Definition char.h:4
__UINT8_TYPE__ u8
Definition integers.h:9
char32_t c32
Definition char.h:6
__UINT32_TYPE__ u32
Definition integers.h:11
di::meta::Decay< decltype(T)> Tag
Definition tag_invoke.h:28