Boost.Locale
utf8_codecvt.hpp
1//
2// Copyright (c) 2015 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
8#define BOOST_LOCALE_UTF8_CODECVT_HPP
9
10#include <boost/locale/generic_codecvt.hpp>
11#include <boost/locale/utf.hpp>
12#include <boost/assert.hpp>
13#include <cstdint>
14#include <locale>
15
16namespace boost { namespace locale {
17
20 template<typename CharType>
21 class utf8_codecvt : public generic_codecvt<CharType, utf8_codecvt<CharType>> {
22 public:
23 struct state_type {};
24
25 utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType, utf8_codecvt<CharType>>(refs) {}
26
27 static int max_encoding_length() { return 4; }
28
29 static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
30 {
31 return state_type();
32 }
33 static utf::code_point to_unicode(state_type&, const char*& begin, const char* end)
34 {
35 const char* p = begin;
36
38 if(c != utf::illegal && c != utf::incomplete)
39 begin = p;
40 return c;
41 }
42
43 static utf::len_or_error from_unicode(state_type&, utf::code_point u, char* begin, const char* end)
44 {
45 BOOST_ASSERT(utf::is_valid_codepoint(u));
46 const auto width = utf::utf_traits<char>::width(u);
47 if(width > end - begin)
48 return utf::incomplete;
50 return width;
51 }
52 };
53
54}} // namespace boost::locale
55
56#endif
initial_convertion_state
Initial state for converting to or from Unicode code points, used by initial_state in derived classes...
Definition: generic_codecvt.hpp:43
Generic codecvt facet for various stateless encodings to UTF-16 and UTF-32 using wchar_t,...
Definition: generic_codecvt.hpp:151
Generic utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t,...
Definition: utf8_codecvt.hpp:21
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:19
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:30
code_point len_or_error
Either a length/size or an error (illegal/incomplete)
Definition: utf.hpp:27
constexpr code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:22
constexpr code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:24
Definition: utf8_codecvt.hpp:23
static int width(code_point value)
static code_point decode(Iterator &p, Iterator e)
static Iterator encode(code_point value, Iterator out)