7 #ifndef BOOST_LOCALE_UTF_HPP_INCLUDED 8 #define BOOST_LOCALE_UTF_HPP_INCLUDED 10 #include <boost/locale/config.hpp> 11 #include <boost/cstdint.hpp> 13 namespace boost {
namespace locale {
31 if(0xD800 <= v && v <= 0xDFFF)
36 #ifdef BOOST_LOCALE_DOXYGEN 39 template<
typename CharType,
int size = sizeof(CharType)>
56 template<
typename Iterator>
89 template<
typename Iterator>
95 template<
typename Iterator>
101 template<
typename CharType,
int size = sizeof(CharType)>
104 template<
typename CharType>
110 unsigned char c = ci;
113 if(BOOST_UNLIKELY(c < 194))
119 if(BOOST_LIKELY(c <= 244))
130 }
else if(value <= 0x7FF) {
132 }
else if(BOOST_LIKELY(value <= 0xFFFF)) {
141 unsigned char c = ci;
142 return (c & 0xC0) == 0x80;
147 template<
typename Iterator>
150 if(BOOST_UNLIKELY(p == e))
153 unsigned char lead = *p++;
158 if(BOOST_UNLIKELY(trail_size < 0))
166 code_point c = lead & ((1 << (6 - trail_size)) - 1);
172 if(BOOST_UNLIKELY(p == e))
177 c = (c << 6) | (tmp & 0x3F);
180 if(BOOST_UNLIKELY(p == e))
185 c = (c << 6) | (tmp & 0x3F);
188 if(BOOST_UNLIKELY(p == e))
193 c = (c << 6) | (tmp & 0x3F);
202 if(BOOST_UNLIKELY(
width(c) != trail_size + 1))
208 template<
typename Iterator>
211 unsigned char lead = *p++;
219 else if(BOOST_LIKELY(lead < 240))
224 code_point c = lead & ((1 << (6 - trail_size)) - 1);
227 case 3: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_FALLTHROUGH;
228 case 2: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_FALLTHROUGH;
229 case 1: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F);
235 template<
typename Iterator>
239 *out++ = static_cast<char_type>(value);
240 }
else if(value <= 0x7FF) {
241 *out++ = static_cast<char_type>((value >> 6) | 0xC0);
242 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
243 }
else if(BOOST_LIKELY(value <= 0xFFFF)) {
244 *out++ = static_cast<char_type>((value >> 12) | 0xE0);
245 *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
246 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
248 *out++ = static_cast<char_type>((value >> 18) | 0xF0);
249 *out++ = static_cast<char_type>(((value >> 12) & 0x3F) | 0x80);
250 *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
251 *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
257 template<
typename CharType>
258 struct utf_traits<CharType, 2> {
262 static bool is_first_surrogate(uint16_t x) {
return 0xD800 <= x && x <= 0xDBFF; }
263 static bool is_second_surrogate(uint16_t x) {
return 0xDC00 <= x && x <= 0xDFFF; }
264 static code_point combine_surrogate(uint16_t w1, uint16_t w2)
266 return ((
code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
270 if(is_first_surrogate(c))
272 if(is_second_surrogate(c))
282 template<
typename It>
285 if(BOOST_UNLIKELY(current == last))
287 uint16_t w1 = *current++;
288 if(BOOST_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
295 uint16_t w2 = *current++;
296 if(w2 < 0xDC00 || 0xDFFF < w2)
298 return combine_surrogate(w1, w2);
300 template<
typename It>
303 uint16_t w1 = *current++;
304 if(BOOST_LIKELY(w1 < 0xD800 || 0xDFFF < w1)) {
307 uint16_t w2 = *current++;
308 return combine_surrogate(w1, w2);
313 template<
typename It>
316 if(BOOST_LIKELY(u <= 0xFFFF)) {
317 *out++ = static_cast<char_type>(u);
320 *out++ = static_cast<char_type>(0xD800 | (u >> 10));
321 *out++ = static_cast<char_type>(0xDC00 | (u & 0x3FF));
327 template<
typename CharType>
328 struct utf_traits<CharType, 4> {
339 template<
typename It>
345 template<
typename It>
348 if(BOOST_UNLIKELY(current == last))
357 template<
typename It>
360 *out++ = static_cast<char_type>(u);
static code_point decode(Iterator &p, Iterator e)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:27
static Iterator encode(code_point value, Iterator out)
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:19
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:40
static int width(code_point value)
CharType char_type
The type of the character.
Definition: utf.hpp:42
static int trail_length(char_type c)
static bool is_lead(char_type c)
Returns true if c is lead code unit, always true of UTF-32.
static code_point decode_valid(Iterator &p)
static bool is_trail(char_type c)
Returns true if c is trail code unit, always false for UTF-32.
constexpr code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:22
static constexpr int max_width
Definition: utf.hpp:64
constexpr code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:24