Boost.Locale
utf8_codecvt.hpp
1 //
2 // Copyright (c) 2015 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
9 #define BOOST_LOCALE_UTF8_CODECVT_HPP
10 
11 #include <boost/locale/utf.hpp>
12 #include <boost/locale/generic_codecvt.hpp>
13 #include <boost/cstdint.hpp>
14 #include <locale>
15 
16 namespace boost {
17 namespace locale {
18 
22 template<typename CharType>
23 class utf8_codecvt : public generic_codecvt<CharType,utf8_codecvt<CharType> >
24 {
25 public:
26 
27  struct state_type {};
28 
29  utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType,utf8_codecvt<CharType> >(refs)
30  {
31  }
32 
33  static int max_encoding_length()
34  {
35  return 4;
36  }
37 
38  static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
39  {
40  return state_type();
41  }
42  static utf::code_point to_unicode(state_type &,char const *&begin,char const *end)
43  {
44  char const *p=begin;
45 
47  if(c!=utf::illegal && c!=utf::incomplete)
48  begin = p;
49  return c;
50  }
51 
52  static utf::code_point from_unicode(state_type &,utf::code_point u,char *begin,char const *end)
53  {
55  return utf::illegal;
56  int width;
57  if((width=utf::utf_traits<char>::width(u)) > end - begin)
58  return utf::incomplete;
60  return width;
61  }
62 };
63 
64 } // locale
65 } // namespace boost
66 
67 #endif
68 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
static code_point decode(Iterator &p, Iterator e)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:49
static Iterator encode(code_point value, Iterator out)
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:44
Geneneric utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t...
Definition: utf8_codecvt.hpp:23
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:34
initial_convertion_state
Definition: generic_codecvt.hpp:39
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:39
Definition: utf8_codecvt.hpp:27
Geneneric generic codecvt facet, various stateless encodings to UTF-16 and UTF-32 using wchar_t...
Definition: generic_codecvt.hpp:140