8#ifndef BOOST_LOCALE_GENERIC_CODECVT_HPP
9#define BOOST_LOCALE_GENERIC_CODECVT_HPP
11#include <boost/locale/utf.hpp>
15namespace boost {
namespace locale {
17 static_assert(
sizeof(std::mbstate_t) >= 2,
"std::mbstate_t is to small to store an UTF-16 codepoint");
20 inline void copy_uint16_t(
void* dst,
const void* src)
22 unsigned char* cdst =
static_cast<unsigned char*
>(dst);
23 const unsigned char* csrc =
static_cast<const unsigned char*
>(src);
27 inline uint16_t read_state(
const std::mbstate_t& src)
30 copy_uint16_t(&dst, &src);
33 inline void write_state(std::mbstate_t& dst,
const uint16_t src)
35 copy_uint16_t(&dst, &src);
150 template<
typename CharType,
typename CodecvtImpl,
int CharSize = sizeof(CharType)>
159 template<
typename CharType,
typename CodecvtImpl>
160 class generic_codecvt<CharType, CodecvtImpl, 2> :
public std::codecvt<CharType, char, std::mbstate_t>,
163 typedef CharType uchar;
165 generic_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs) {}
166 const CodecvtImpl& implementation()
const {
return *
static_cast<const CodecvtImpl*
>(
this); }
169 std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const override
171 if(*
reinterpret_cast<char*
>(&s) != 0)
172 return std::codecvt_base::error;
174 return std::codecvt_base::ok;
176 int do_encoding()
const noexcept override {
return 0; }
177 int do_max_length()
const noexcept override {
return implementation().max_encoding_length(); }
178 bool do_always_noconv()
const noexcept override {
return false; }
180 int do_length(std::mbstate_t& std_state,
const char* from,
const char* from_end,
size_t max)
const override
182 bool state = *
reinterpret_cast<char*
>(&std_state) != 0;
183 const char* save_from = from;
185 auto cvt_state = implementation().initial_state(to_unicode_state);
186 while(max > 0 && from < from_end) {
187 const char* prev_from = from;
188 const utf::code_point ch = implementation().to_unicode(cvt_state, from, from_end);
200 *
reinterpret_cast<char*
>(&std_state) = state;
201 return static_cast<int>(from - save_from);
204 std::codecvt_base::result do_in(std::mbstate_t& std_state,
206 const char* from_end,
207 const char*& from_next,
210 uchar*& to_next)
const override
212 std::codecvt_base::result r = std::codecvt_base::ok;
219 bool state = *
reinterpret_cast<char*
>(&std_state) != 0;
220 auto cvt_state = implementation().initial_state(to_unicode_state);
221 while(to < to_end && from < from_end) {
222 const char* from_saved = from;
224 utf::code_point ch = implementation().to_unicode(cvt_state, from, from_end);
228 r = std::codecvt_base::error;
233 r = std::codecvt_base::partial;
238 *to++ =
static_cast<uchar
>(ch);
250 std::uint16_t w1 =
static_cast<std::uint16_t
>(0xD800 | (ch >> 10));
251 std::uint16_t w2 =
static_cast<std::uint16_t
>(0xDC00 | (ch & 0x3FF));
262 if(r == std::codecvt_base::ok && (from != from_end || state))
263 r = std::codecvt_base::partial;
264 *
reinterpret_cast<char*
>(&std_state) = state;
268 std::codecvt_base::result do_out(std::mbstate_t& std_state,
270 const uchar* from_end,
271 const uchar*& from_next,
274 char*& to_next)
const override
276 std::codecvt_base::result r = std::codecvt_base::ok;
283 std::uint16_t state = detail::read_state(std_state);
284 auto cvt_state = implementation().initial_state(from_unicode_state);
285 while(to < to_end && from < from_end) {
291 std::uint16_t w1 = state;
292 std::uint16_t w2 = *from;
295 if(0xDC00 <= w2 && w2 <= 0xDFFF) {
296 std::uint16_t vh = w1 - 0xD800;
297 std::uint16_t vl = w2 - 0xDC00;
298 ch = ((uint32_t(vh) << 10) | vl) + 0x10000;
301 r = std::codecvt_base::error;
306 if(0xD800 <= ch && ch <= 0xDBFF) {
311 state =
static_cast<uint16_t
>(ch);
314 }
else if(0xDC00 <= ch && ch <= 0xDFFF) {
318 r = std::codecvt_base::error;
323 r = std::codecvt_base::error;
326 const utf::code_point len = implementation().from_unicode(cvt_state, ch, to, to_end);
328 r = std::codecvt_base::partial;
331 r = std::codecvt_base::error;
340 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
341 r = std::codecvt_base::partial;
342 detail::write_state(std_state, state);
351 template<
typename CharType,
typename CodecvtImpl>
352 class generic_codecvt<CharType, CodecvtImpl, 4> :
public std::codecvt<CharType, char, std::mbstate_t>,
355 typedef CharType uchar;
357 generic_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs) {}
359 const CodecvtImpl& implementation()
const {
return *
static_cast<const CodecvtImpl*
>(
this); }
362 std::codecvt_base::result
363 do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const override
366 return std::codecvt_base::ok;
368 int do_encoding()
const noexcept override {
return 0; }
369 int do_max_length()
const noexcept override {
return implementation().max_encoding_length(); }
370 bool do_always_noconv()
const noexcept override {
return false; }
372 int do_length(std::mbstate_t& ,
const char* from,
const char* from_end,
size_t max)
const override
374 const char* start_from = from;
375 auto cvt_state = implementation().initial_state(to_unicode_state);
376 while(max > 0 && from < from_end) {
377 const char* save_from = from;
378 const utf::code_point ch = implementation().to_unicode(cvt_state, from, from_end);
386 return static_cast<int>(from - start_from);
389 std::codecvt_base::result do_in(std::mbstate_t& ,
391 const char* from_end,
392 const char*& from_next,
395 uchar*& to_next)
const override
397 std::codecvt_base::result r = std::codecvt_base::ok;
399 auto cvt_state = implementation().initial_state(to_unicode_state);
400 while(to < to_end && from < from_end) {
401 const char* from_saved = from;
403 const utf::code_point ch = implementation().to_unicode(cvt_state, from, from_end);
406 r = std::codecvt_base::error;
411 r = std::codecvt_base::partial;
419 if(r == std::codecvt_base::ok && from != from_end)
420 r = std::codecvt_base::partial;
424 std::codecvt_base::result do_out(std::mbstate_t& ,
426 const uchar* from_end,
427 const uchar*& from_next,
430 char*& to_next)
const override
432 std::codecvt_base::result r = std::codecvt_base::ok;
433 auto cvt_state = implementation().initial_state(from_unicode_state);
434 while(to < to_end && from < from_end) {
435 const std::uint32_t ch = *from;
437 r = std::codecvt_base::error;
440 const utf::code_point len = implementation().from_unicode(cvt_state, ch, to, to_end);
442 r = std::codecvt_base::partial;
445 r = std::codecvt_base::error;
453 if(r == std::codecvt_base::ok && from != from_end)
454 r = std::codecvt_base::partial;
459 template<
typename CodecvtImpl>
460 class generic_codecvt<char, CodecvtImpl, 1> :
public std::codecvt<char, char, std::mbstate_t>,
465 const CodecvtImpl& implementation()
const {
return *
static_cast<const CodecvtImpl*
>(
this); }
467 generic_codecvt(
size_t refs = 0) : std::codecvt<char, char, std::mbstate_t>(refs) {}
A base class that used to define constants for generic_codecvt.
Definition: generic_codecvt.hpp:40
initial_convertion_state
Initial state for converting to or from Unicode code points, used by initial_state in derived classes...
Definition: generic_codecvt.hpp:43
@ to_unicode_state
The state would be used by to_unicode functions.
Definition: generic_codecvt.hpp:44
@ from_unicode_state
The state would be used by from_unicode functions.
Definition: generic_codecvt.hpp:45
Generic codecvt facet for various stateless encodings to UTF-16 and UTF-32 using wchar_t,...
Definition: generic_codecvt.hpp:151
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:19
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:30
constexpr code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:22
constexpr code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:24