7 #ifndef BOOST_LOCALE_GENERIC_CODECVT_HPP 8 #define BOOST_LOCALE_GENERIC_CODECVT_HPP 10 #include <boost/locale/utf.hpp> 11 #include <boost/cstdint.hpp> 14 namespace boost {
namespace locale {
16 #ifndef BOOST_LOCALE_DOXYGEN 20 static_assert(
sizeof(std::mbstate_t) >= 2,
"std::mbstate_t is to small");
23 #if defined(_MSC_VER) && _MSC_VER < 1700 26 # define BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST 142 template<
typename CharType,
typename CodecvtImpl,
int CharSize = sizeof(CharType)>
151 template<
typename CharType,
typename CodecvtImpl>
152 class generic_codecvt<CharType, CodecvtImpl, 2> :
public std::codecvt<CharType, char, std::mbstate_t>,
155 typedef CharType uchar;
157 generic_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs) {}
158 const CodecvtImpl& implementation()
const {
return *static_cast<const CodecvtImpl*>(
this); }
161 std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const override 163 boost::uint16_t& state = *reinterpret_cast<boost::uint16_t*>(&s);
165 std::cout <<
"Entering unshift " << std::hex << state << std::dec << std::endl;
168 return std::codecvt_base::error;
170 return std::codecvt_base::ok;
172 int do_encoding()
const noexcept
override 176 int do_max_length()
const noexcept
override 178 return implementation().max_encoding_length();
180 bool do_always_noconv()
const noexcept
override 186 #ifdef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
189 std::mbstate_t& std_state,
191 const char* from_end,
192 size_t max)
const override 194 #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST 195 const char* save_from = from;
196 boost::uint16_t& state = *reinterpret_cast<boost::uint16_t*>(&std_state);
198 size_t save_max = max;
199 boost::uint16_t state = *reinterpret_cast<const boost::uint16_t*>(&std_state);
202 typename CodecvtImpl::state_type cvt_state =
204 while(max > 0 && from < from_end) {
205 const char* prev_from = from;
206 boost::uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
221 #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST 222 return static_cast<int>(from - save_from);
224 return static_cast<int>(save_max - max);
228 std::codecvt_base::result do_in(std::mbstate_t& std_state,
230 const char* from_end,
231 const char*& from_next,
234 uchar*& to_next)
const override 236 std::codecvt_base::result r = std::codecvt_base::ok;
243 boost::uint16_t& state = *reinterpret_cast<boost::uint16_t*>(&std_state);
244 typename CodecvtImpl::state_type cvt_state =
246 while(to < to_end && from < from_end) {
248 std::cout <<
"Entering IN--------------\n";
249 std::cout <<
"State " << std::hex << state << std::endl;
250 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
252 const char* from_saved = from;
254 uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
258 r = std::codecvt_base::error;
263 r = std::codecvt_base::partial;
268 *to++ = static_cast<uchar>(ch);
280 boost::uint16_t w1 = static_cast<boost::uint16_t>(0xD800 | (ch >> 10));
281 boost::uint16_t w2 = static_cast<boost::uint16_t>(0xDC00 | (ch & 0x3FF));
294 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
295 r = std::codecvt_base::partial;
297 std::cout <<
"Returning ";
299 case std::codecvt_base::ok: std::cout <<
"ok\n";
break;
300 case std::codecvt_base::partial: std::cout <<
"partial\n";
break;
301 case std::codecvt_base::error: std::cout <<
"error\n";
break;
302 default: std::cout <<
"other\n";
break;
304 std::cout <<
"State " << std::hex << state << std::endl;
305 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
310 std::codecvt_base::result do_out(std::mbstate_t& std_state,
312 const uchar* from_end,
313 const uchar*& from_next,
316 char*& to_next)
const override 318 std::codecvt_base::result r = std::codecvt_base::ok;
325 boost::uint16_t& state = *reinterpret_cast<boost::uint16_t*>(&std_state);
326 typename CodecvtImpl::state_type cvt_state =
328 while(to < to_end && from < from_end) {
330 std::cout <<
"Entering OUT --------------\n";
331 std::cout <<
"State " << std::hex << state << std::endl;
332 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
334 boost::uint32_t ch = 0;
339 boost::uint16_t w1 = state;
340 boost::uint16_t w2 = *from;
343 if(0xDC00 <= w2 && w2 <= 0xDFFF) {
344 boost::uint16_t vh = w1 - 0xD800;
345 boost::uint16_t vl = w2 - 0xDC00;
346 ch = ((uint32_t(vh) << 10) | vl) + 0x10000;
349 r = std::codecvt_base::error;
354 if(0xD800 <= ch && ch <= 0xDBFF) {
359 state = static_cast<uint16_t>(ch);
362 }
else if(0xDC00 <= ch && ch <= 0xDFFF) {
366 r = std::codecvt_base::error;
371 r = std::codecvt_base::error;
374 boost::uint32_t len = implementation().from_unicode(cvt_state, ch, to, to_end);
376 r = std::codecvt_base::partial;
379 r = std::codecvt_base::error;
388 if(r == std::codecvt_base::ok && from != from_end)
389 r = std::codecvt_base::partial;
391 std::cout <<
"Returning ";
393 case std::codecvt_base::ok: std::cout <<
"ok\n";
break;
394 case std::codecvt_base::partial: std::cout <<
"partial\n";
break;
395 case std::codecvt_base::error: std::cout <<
"error\n";
break;
396 default: std::cout <<
"other\n";
break;
398 std::cout <<
"State " << std::hex << state << std::endl;
399 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
409 template<
typename CharType,
typename CodecvtImpl>
410 class generic_codecvt<CharType, CodecvtImpl, 4> :
public std::codecvt<CharType, char, std::mbstate_t>,
413 typedef CharType uchar;
415 generic_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs) {}
417 const CodecvtImpl& implementation()
const {
return *static_cast<const CodecvtImpl*>(
this); }
420 std::codecvt_base::result
421 do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const override 424 return std::codecvt_base::ok;
426 int do_encoding()
const noexcept
override {
return 0; }
427 int do_max_length()
const noexcept
override {
return implementation().max_encoding_length(); }
428 bool do_always_noconv()
const noexcept
override {
return false; }
431 #ifdef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
436 const char* from_end,
437 size_t max)
const override 439 #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST 440 const char* start_from = from;
442 size_t save_max = max;
444 typename CodecvtImpl::state_type cvt_state =
446 while(max > 0 && from < from_end) {
447 const char* save_from = from;
448 boost::uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
455 #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST 456 return from - start_from;
458 return save_max - max;
462 std::codecvt_base::result do_in(std::mbstate_t& ,
464 const char* from_end,
465 const char*& from_next,
468 uchar*& to_next)
const override 470 std::codecvt_base::result r = std::codecvt_base::ok;
478 while(to < to_end && from < from_end) {
480 std::cout <<
"Entering IN--------------\n";
481 std::cout <<
"State " << std::hex << state << std::endl;
482 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
484 const char* from_saved = from;
486 uint32_t ch = implementation().to_unicode(cvt_state, from, from_end);
489 r = std::codecvt_base::error;
494 r = std::codecvt_base::partial;
502 if(r == std::codecvt_base::ok && from != from_end)
503 r = std::codecvt_base::partial;
505 std::cout <<
"Returning ";
507 case std::codecvt_base::ok: std::cout <<
"ok\n";
break;
508 case std::codecvt_base::partial: std::cout <<
"partial\n";
break;
509 case std::codecvt_base::error: std::cout <<
"error\n";
break;
510 default: std::cout <<
"other\n";
break;
512 std::cout <<
"State " << std::hex << state << std::endl;
513 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
518 std::codecvt_base::result do_out(std::mbstate_t& ,
520 const uchar* from_end,
521 const uchar*& from_next,
524 char*& to_next)
const override 526 std::codecvt_base::result r = std::codecvt_base::ok;
528 while(to < to_end && from < from_end) {
530 std::cout <<
"Entering OUT --------------\n";
531 std::cout <<
"State " << std::hex << state << std::endl;
532 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
534 boost::uint32_t ch = 0;
537 r = std::codecvt_base::error;
540 boost::uint32_t len = implementation().from_unicode(cvt_state, ch, to, to_end);
542 r = std::codecvt_base::partial;
545 r = std::codecvt_base::error;
553 if(r == std::codecvt_base::ok && from != from_end)
554 r = std::codecvt_base::partial;
556 std::cout <<
"Returning ";
558 case std::codecvt_base::ok: std::cout <<
"ok\n";
break;
559 case std::codecvt_base::partial: std::cout <<
"partial\n";
break;
560 case std::codecvt_base::error: std::cout <<
"error\n";
break;
561 default: std::cout <<
"other\n";
break;
563 std::cout <<
"State " << std::hex << state << std::endl;
564 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end - to << std::endl;
570 template<
typename CharType,
typename CodecvtImpl>
571 class generic_codecvt<CharType, CodecvtImpl, 1> :
public std::codecvt<CharType, char, std::mbstate_t>,
574 typedef CharType uchar;
576 const CodecvtImpl& implementation()
const {
return *static_cast<const CodecvtImpl*>(
this); }
578 generic_codecvt(
size_t refs = 0) : std::codecvt<char, char, std::mbstate_t>(refs) {}
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:27
The state would be used by to_unicode functions.
Definition: generic_codecvt.hpp:34
initial_convertion_state
Initial state for converting to or from unicode code points, used by initial_state in derived classes...
Definition: generic_codecvt.hpp:33
A base class that used to define constants for generic_codecvt.
Definition: generic_codecvt.hpp:30
Generic codecvt facet for various stateless encodings to UTF-16 and UTF-32 using wchar_t,...
Definition: generic_codecvt.hpp:143
constexpr code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:22
The state would be used by from_unicode functions.
Definition: generic_codecvt.hpp:35
constexpr code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:24