8#ifndef BOOST_NOWIDE_UTF_HPP_INCLUDED
9#define BOOST_NOWIDE_UTF_HPP_INCLUDED
46 if(0xD800 <= v && v <= 0xDFFF)
51#ifdef BOOST_NOWIDE_DOXYGEN
55 template<
typename CharType,
int size = sizeof(CharType)>
76 template<
typename Iterator>
120 template<
typename Iterator>
127 template<
typename Iterator>
133 template<
typename CharType,
int size = sizeof(CharType)>
136 template<
typename CharType>
143 unsigned char c = ci;
146 if(BOOST_UNLIKELY(c < 194))
152 if(BOOST_LIKELY(c <= 244))
164 }
else if(value <= 0x7FF)
167 }
else if(BOOST_LIKELY(value <= 0xFFFF))
178 unsigned char c = ci;
179 return (c & 0xC0) == 0x80;
187 template<
typename Iterator>
190 if(BOOST_UNLIKELY(p == e))
193 unsigned char lead = *p++;
198 if(BOOST_UNLIKELY(trail_size < 0))
206 code_point c = lead & ((1 << (6 - trail_size)) - 1);
213 if(BOOST_UNLIKELY(p == e))
218 c = (c << 6) | (tmp & 0x3F);
219 BOOST_NOWIDE_FALLTHROUGH;
221 if(BOOST_UNLIKELY(p == e))
226 c = (c << 6) | (tmp & 0x3F);
227 BOOST_NOWIDE_FALLTHROUGH;
229 if(BOOST_UNLIKELY(p == e))
234 c = (c << 6) | (tmp & 0x3F);
249 template<
typename Iterator>
252 unsigned char lead = *p++;
260 else if(BOOST_LIKELY(lead < 240))
265 code_point c = lead & ((1 << (6 - trail_size)) - 1);
269 case 3: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
270 case 2: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
271 case 1: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F);
277 template<
typename Iterator>
283 }
else if(value <= 0x7FF)
285 *out++ =
static_cast<char_type>((value >> 6) | 0xC0);
286 *out++ =
static_cast<char_type>((value & 0x3F) | 0x80);
287 }
else if(BOOST_LIKELY(value <= 0xFFFF))
289 *out++ =
static_cast<char_type>((value >> 12) | 0xE0);
290 *out++ =
static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
291 *out++ =
static_cast<char_type>((value & 0x3F) | 0x80);
294 *out++ =
static_cast<char_type>((value >> 18) | 0xF0);
295 *out++ =
static_cast<char_type>(((value >> 12) & 0x3F) | 0x80);
296 *out++ =
static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
297 *out++ =
static_cast<char_type>((value & 0x3F) | 0x80);
303 template<
typename CharType>
304 struct utf_traits<CharType, 2>
309 static bool is_single_codepoint(uint16_t x)
312 return x <= 0xD7FF || x >= 0xE000;
314 static bool is_first_surrogate(uint16_t x)
317 return 0xD800 <= x && x <= 0xDBFF;
319 static bool is_second_surrogate(uint16_t x)
322 return 0xDC00 <= x && x <= 0xDFFF;
324 static code_point combine_surrogate(uint16_t w1, uint16_t w2)
326 return ((
code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
330 if(is_first_surrogate(c))
332 if(is_second_surrogate(c))
339 return is_second_surrogate(c);
344 return !is_second_surrogate(c);
347 template<
typename It>
350 if(BOOST_UNLIKELY(current == last))
352 uint16_t w1 = *current++;
353 if(BOOST_LIKELY(is_single_codepoint(w1)))
362 uint16_t w2 = *current++;
363 if(!is_second_surrogate(w2))
365 return combine_surrogate(w1, w2);
367 template<
typename It>
370 uint16_t w1 = *current++;
371 if(BOOST_LIKELY(is_single_codepoint(w1)))
375 uint16_t w2 = *current++;
376 return combine_surrogate(w1, w2);
382 return u >= 0x10000 ? 2 : 1;
384 template<
typename It>
387 if(BOOST_LIKELY(u <= 0xFFFF))
393 *out++ =
static_cast<char_type>(0xD800 | (u >> 10));
394 *out++ =
static_cast<char_type>(0xDC00 | (u & 0x3FF));
400 template<
typename CharType>
401 struct utf_traits<CharType, 4>
419 template<
typename It>
425 template<
typename It>
428 if(BOOST_UNLIKELY(current == last))
440 template<
typename It>
Namespace that holds basic operations on UTF encoded sequences.
Definition: convert.hpp:19
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:42
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:27
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:37
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:32
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:57
static code_point decode(Iterator &p, Iterator e)
static bool is_trail(char_type c)
static code_point decode_valid(Iterator &p)
static int trail_length(char_type c)
static Iterator encode(code_point value, Iterator out)
static const int max_width
Definition: utf.hpp:86
CharType char_type
Definition: utf.hpp:61
static bool is_lead(char_type c)
static int width(code_point value)