7#ifndef BOOST_LOCALE_GENERIC_CODECVT_HPP
8#define BOOST_LOCALE_GENERIC_CODECVT_HPP
10#include <boost/locale/utf.hpp>
11#include <boost/cstdint.hpp>
14namespace boost {
namespace locale {
16#ifndef BOOST_LOCALE_DOXYGEN
20 static_assert(
sizeof(std::mbstate_t) >= 2,
"std::mbstate_t is to small");
23#if defined(_MSC_VER) && _MSC_VER < 1700
26# define BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
142 template<
typename CharType,
typename CodecvtImpl,
int CharSize = sizeof(CharType)>
151 template<
typename CharType,
typename CodecvtImpl>
152 class generic_codecvt<CharType, CodecvtImpl, 2> :
public std::codecvt<CharType, char, std::mbstate_t>,
155 typedef CharType uchar;
157 generic_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs) {}
158 const CodecvtImpl& implementation()
const {
return *
static_cast<const CodecvtImpl*
>(
this); }
161 std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const override
163 boost::uint16_t& state = *
reinterpret_cast<boost::uint16_t*
>(&s);
165 return std::codecvt_base::error;
167 return std::codecvt_base::ok;
169 int do_encoding()
const noexcept override {
return 0; }
170 int do_max_length()
const noexcept override {
return implementation().max_encoding_length(); }
171 bool do_always_noconv()
const noexcept override {
return false; }
174#ifdef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
177 std::mbstate_t& std_state,
179 const char* from_end,
180 size_t max)
const override
182#ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
183 const char* save_from = from;
184 boost::uint16_t& state = *
reinterpret_cast<boost::uint16_t*
>(&std_state);
186 const size_t start_max = max;
187 boost::uint16_t state = *
reinterpret_cast<const boost::uint16_t*
>(&std_state);
190 typename CodecvtImpl::state_type cvt_state =
192 while(max > 0 && from < from_end) {
193 const char* prev_from = from;
194 boost::uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
209#ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
210 return static_cast<int>(from - save_from);
212 return static_cast<int>(start_max - max);
216 std::codecvt_base::result do_in(std::mbstate_t& std_state,
218 const char* from_end,
219 const char*& from_next,
222 uchar*& to_next)
const override
224 std::codecvt_base::result r = std::codecvt_base::ok;
231 boost::uint16_t& state = *
reinterpret_cast<boost::uint16_t*
>(&std_state);
232 typename CodecvtImpl::state_type cvt_state =
234 while(to < to_end && from < from_end) {
235 const char* from_saved = from;
237 uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
241 r = std::codecvt_base::error;
246 r = std::codecvt_base::partial;
251 *to++ =
static_cast<uchar
>(ch);
263 boost::uint16_t w1 =
static_cast<boost::uint16_t
>(0xD800 | (ch >> 10));
264 boost::uint16_t w2 =
static_cast<boost::uint16_t
>(0xDC00 | (ch & 0x3FF));
277 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
278 r = std::codecvt_base::partial;
282 std::codecvt_base::result do_out(std::mbstate_t& std_state,
284 const uchar* from_end,
285 const uchar*& from_next,
288 char*& to_next)
const override
290 std::codecvt_base::result r = std::codecvt_base::ok;
297 boost::uint16_t& state = *
reinterpret_cast<boost::uint16_t*
>(&std_state);
298 typename CodecvtImpl::state_type cvt_state =
300 while(to < to_end && from < from_end) {
301 boost::uint32_t ch = 0;
306 boost::uint16_t w1 = state;
307 boost::uint16_t w2 = *from;
310 if(0xDC00 <= w2 && w2 <= 0xDFFF) {
311 boost::uint16_t vh = w1 - 0xD800;
312 boost::uint16_t vl = w2 - 0xDC00;
313 ch = ((uint32_t(vh) << 10) | vl) + 0x10000;
316 r = std::codecvt_base::error;
321 if(0xD800 <= ch && ch <= 0xDBFF) {
326 state =
static_cast<uint16_t
>(ch);
329 }
else if(0xDC00 <= ch && ch <= 0xDFFF) {
333 r = std::codecvt_base::error;
338 r = std::codecvt_base::error;
341 boost::uint32_t len = implementation().from_unicode(cvt_state, ch, to, to_end);
343 r = std::codecvt_base::partial;
346 r = std::codecvt_base::error;
355 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
356 r = std::codecvt_base::partial;
365 template<
typename CharType,
typename CodecvtImpl>
366 class generic_codecvt<CharType, CodecvtImpl, 4> :
public std::codecvt<CharType, char, std::mbstate_t>,
369 typedef CharType uchar;
371 generic_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs) {}
373 const CodecvtImpl& implementation()
const {
return *
static_cast<const CodecvtImpl*
>(
this); }
376 std::codecvt_base::result
377 do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const override
380 return std::codecvt_base::ok;
382 int do_encoding()
const noexcept override {
return 0; }
383 int do_max_length()
const noexcept override {
return implementation().max_encoding_length(); }
384 bool do_always_noconv()
const noexcept override {
return false; }
387#ifdef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
392 const char* from_end,
393 size_t max)
const override
395#ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
396 const char* start_from = from;
398 const size_t start_max = max;
400 typename CodecvtImpl::state_type cvt_state =
402 while(max > 0 && from < from_end) {
403 const char* save_from = from;
404 boost::uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
412#ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
413 return static_cast<int>(from - start_from);
415 return static_cast<int>(start_max - max);
419 std::codecvt_base::result do_in(std::mbstate_t& ,
421 const char* from_end,
422 const char*& from_next,
425 uchar*& to_next)
const override
427 std::codecvt_base::result r = std::codecvt_base::ok;
435 while(to < to_end && from < from_end) {
436 const char* from_saved = from;
438 uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
441 r = std::codecvt_base::error;
446 r = std::codecvt_base::partial;
454 if(r == std::codecvt_base::ok && from != from_end)
455 r = std::codecvt_base::partial;
459 std::codecvt_base::result do_out(std::mbstate_t& ,
461 const uchar* from_end,
462 const uchar*& from_next,
465 char*& to_next)
const override
467 std::codecvt_base::result r = std::codecvt_base::ok;
469 while(to < to_end && from < from_end) {
470 boost::uint32_t ch = 0;
473 r = std::codecvt_base::error;
476 boost::uint32_t len = implementation().from_unicode(cvt_state, ch, to, to_end);
478 r = std::codecvt_base::partial;
481 r = std::codecvt_base::error;
489 if(r == std::codecvt_base::ok && from != from_end)
490 r = std::codecvt_base::partial;
495 template<
typename CharType,
typename CodecvtImpl>
496 class generic_codecvt<CharType, CodecvtImpl, 1> :
public std::codecvt<CharType, char, std::mbstate_t>,
499 typedef CharType uchar;
501 const CodecvtImpl& implementation()
const {
return *
static_cast<const CodecvtImpl*
>(
this); }
503 generic_codecvt(
size_t refs = 0) : std::codecvt<char, char, std::mbstate_t>(refs) {}
A base class that used to define constants for generic_codecvt.
Definition: generic_codecvt.hpp:30
initial_convertion_state
Initial state for converting to or from unicode code points, used by initial_state in derived classes...
Definition: generic_codecvt.hpp:33
@ to_unicode_state
The state would be used by to_unicode functions.
Definition: generic_codecvt.hpp:34
@ from_unicode_state
The state would be used by from_unicode functions.
Definition: generic_codecvt.hpp:35
Generic codecvt facet for various stateless encodings to UTF-16 and UTF-32 using wchar_t,...
Definition: generic_codecvt.hpp:143
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:27
constexpr code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:22
constexpr code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:24