7 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED 8 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED 10 #include <boost/locale/boundary/boundary_point.hpp> 11 #include <boost/locale/boundary/facets.hpp> 12 #include <boost/locale/boundary/segment.hpp> 13 #include <boost/locale/boundary/types.hpp> 14 #include <boost/cstdint.hpp> 15 #include <boost/iterator/iterator_facade.hpp> 22 #include <type_traits> 26 # pragma warning(push) 27 # pragma warning(disable : 4275 4251 4231 4660) 30 namespace boost {
namespace locale {
namespace boundary {
44 template<
typename IteratorType,
45 typename CategoryType =
typename std::iterator_traits<IteratorType>::iterator_category>
46 struct mapping_traits {
47 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
50 std::basic_string<char_type> str(b, e);
51 return std::use_facet<boundary_indexing<char_type>>(l).map(t, str.c_str(), str.c_str() + str.size());
55 template<
typename CharType,
typename SomeIteratorType>
56 struct linear_iterator_traits {
57 static constexpr
bool is_linear =
58 std::is_same<SomeIteratorType, CharType*>::value || std::is_same<SomeIteratorType, const CharType*>::value
59 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::iterator>::value
60 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::const_iterator>::value
61 || std::is_same<SomeIteratorType, typename std::vector<CharType>::iterator>::value
62 || std::is_same<SomeIteratorType, typename std::vector<CharType>::const_iterator>::value;
65 template<
typename IteratorType>
66 struct mapping_traits<IteratorType, std::random_access_iterator_tag> {
67 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
78 if(linear_iterator_traits<char_type, IteratorType>::is_linear && b != e) {
79 const char_type* begin = &*b;
80 const char_type* end = begin + (e - b);
81 index_type tmp = std::use_facet<boundary_indexing<char_type>>(l).map(t, begin, end);
84 std::basic_string<char_type> str(b, e);
86 std::use_facet<boundary_indexing<char_type>>(l).map(t, str.c_str(), str.c_str() + str.size());
93 template<
typename BaseIterator>
96 typedef BaseIterator base_iterator;
97 typedef typename std::iterator_traits<base_iterator>::value_type char_type;
99 mapping(
boundary_type type, base_iterator begin, base_iterator end,
const std::locale& loc) :
100 index_(new
index_type()), begin_(begin), end_(end)
102 index_type idx = detail::mapping_traits<base_iterator>::map(type, begin, end, loc);
108 const index_type& index()
const {
return *index_; }
110 base_iterator begin()
const {
return begin_; }
112 base_iterator end()
const {
return end_; }
115 std::shared_ptr<index_type> index_;
116 base_iterator begin_, end_;
119 template<
typename BaseIterator>
120 class segment_index_iterator :
public boost::iterator_facade<segment_index_iterator<BaseIterator>,
121 segment<BaseIterator>,
122 boost::bidirectional_traversal_tag,
123 const segment<BaseIterator>&> {
125 typedef BaseIterator base_iterator;
126 typedef mapping<base_iterator> mapping_type;
127 typedef segment<base_iterator> segment_type;
129 segment_index_iterator() : current_(0, 0), map_(0), mask_(0), full_select_(false) {}
131 segment_index_iterator(base_iterator p,
const mapping_type* map,
rule_type mask,
bool full_select) :
132 map_(map), mask_(mask), full_select_(full_select)
136 segment_index_iterator(
bool is_begin,
const mapping_type* map,
rule_type mask,
bool full_select) :
137 map_(map), mask_(mask), full_select_(full_select)
145 const segment_type& dereference()
const {
return value_; }
147 bool equal(
const segment_index_iterator& other)
const 149 return map_ == other.map_ && current_.second == other.current_.second;
154 std::pair<size_t, size_t> next = current_;
156 next.first = next.second;
157 while(next.second < size()) {
159 if(valid_offset(next.second))
162 if(next.second == size())
163 next.first = next.second - 1;
165 while(next.second < size()) {
166 next.first = next.second;
168 if(valid_offset(next.second))
172 update_current(next);
177 std::pair<size_t, size_t> next = current_;
179 while(next.second > 1) {
181 if(valid_offset(next.second))
184 next.first = next.second;
185 while(next.first > 0) {
187 if(valid_offset(next.first))
191 while(next.second > 1) {
193 if(valid_offset(next.second))
196 next.first = next.second - 1;
198 update_current(next);
204 current_.first = size() - 1;
205 current_.second = size();
206 value_ = segment_type(map_->end(), map_->end(), 0);
210 current_.first = current_.second = 0;
211 value_ = segment_type(map_->begin(), map_->begin(), 0);
215 void set(base_iterator p)
217 size_t dist = std::distance(map_->begin(), p);
218 index_type::const_iterator b = map_->index().begin(), e = map_->index().end();
219 index_type::const_iterator boundary_point = std::upper_bound(b, e, break_info(dist));
220 while(boundary_point != e && (boundary_point->rule & mask_) == 0)
223 current_.first = current_.second = boundary_point - b;
226 while(current_.first > 0) {
228 if(valid_offset(current_.first))
232 if(current_.first > 0)
235 value_.first = map_->begin();
236 std::advance(value_.first, get_offset(current_.first));
237 value_.second = value_.first;
238 std::advance(value_.second, get_offset(current_.second) - get_offset(current_.first));
243 void update_current(std::pair<size_t, size_t> pos)
245 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
246 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
247 std::advance(value_.first, first_diff);
248 std::advance(value_.second, second_diff);
255 if(current_.second != size()) {
256 value_.rule(index()[current_.second].rule);
259 size_t get_offset(
size_t ind)
const 262 return index().back().offset;
263 return index()[ind].offset;
266 bool valid_offset(
size_t offset)
const 268 return offset == 0 || offset == size()
269 || (index()[offset].rule & mask_) != 0;
272 size_t size()
const {
return index().size(); }
274 const index_type& index()
const {
return map_->index(); }
277 std::pair<size_t, size_t> current_;
278 const mapping_type* map_;
283 template<
typename BaseIterator>
284 class boundary_point_index_iterator :
public boost::iterator_facade<boundary_point_index_iterator<BaseIterator>,
285 boundary_point<BaseIterator>,
286 boost::bidirectional_traversal_tag,
287 const boundary_point<BaseIterator>&> {
289 typedef BaseIterator base_iterator;
290 typedef mapping<base_iterator> mapping_type;
291 typedef boundary_point<base_iterator> boundary_point_type;
293 boundary_point_index_iterator() : current_(0), map_(0), mask_(0) {}
295 boundary_point_index_iterator(
bool is_begin,
const mapping_type* map,
rule_type mask) :
296 map_(map), mask_(mask)
303 boundary_point_index_iterator(base_iterator p,
const mapping_type* map,
rule_type mask) :
304 map_(map), mask_(mask)
309 const boundary_point_type& dereference()
const {
return value_; }
311 bool equal(
const boundary_point_index_iterator& other)
const 313 return map_ == other.map_ && current_ == other.current_;
318 size_t next = current_;
319 while(next < size()) {
321 if(valid_offset(next))
324 update_current(next);
329 size_t next = current_;
332 if(valid_offset(next))
335 update_current(next);
342 value_ = boundary_point_type(map_->end(), 0);
347 value_ = boundary_point_type(map_->begin(), 0);
350 void set(base_iterator p)
352 size_t dist = std::distance(map_->begin(), p);
354 index_type::const_iterator b = index().begin();
355 index_type::const_iterator e = index().end();
356 index_type::const_iterator ptr = std::lower_bound(b, e, break_info(dist));
358 if(ptr == index().end())
359 current_ = size() - 1;
361 current_ = ptr - index().begin();
363 while(!valid_offset(current_))
366 std::ptrdiff_t diff = get_offset(current_) - dist;
367 std::advance(p, diff);
372 void update_current(
size_t pos)
374 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
375 base_iterator i = value_.iterator();
376 std::advance(i, diff);
384 if(current_ != size()) {
385 value_.rule(index()[current_].rule);
388 size_t get_offset(
size_t ind)
const 391 return index().back().offset;
392 return index()[ind].offset;
395 bool valid_offset(
size_t offset)
const 397 return offset == 0 || offset + 1 >= size()
398 || (index()[offset].rule & mask_) != 0;
401 size_t size()
const {
return index().size(); }
403 const index_type& index()
const {
return map_->index(); }
405 boundary_point_type value_;
407 const mapping_type* map_;
415 template<
typename BaseIterator>
418 template<
typename BaseIterator>
471 template<
typename BaseIterator>
477 #ifdef BOOST_LOCALE_DOXYGEN 478 typedef unspecified_iterator_type
iterator;
494 typedef detail::segment_index_iterator<base_iterator>
iterator;
495 typedef detail::segment_index_iterator<base_iterator>
const_iterator;
515 const std::locale& loc = std::locale()) :
517 mask_(mask), full_select_(false)
524 const std::locale& loc = std::locale()) :
526 mask_(0xFFFFFFFFu), full_select_(false)
555 map_ = mapping_type(type,
begin,
end, loc);
567 return iterator(
true, &map_, mask_, full_select_);
577 return iterator(
false, &map_, mask_, full_select_);
597 return iterator(p, &map_, mask_, full_select_);
643 typedef detail::mapping<base_iterator> mapping_type;
693 template<
typename BaseIterator>
694 class boundary_point_index {
699 #ifdef BOOST_LOCALE_DOXYGEN 700 typedef unspecified_iterator_type
iterator;
717 typedef detail::boundary_point_index_iterator<base_iterator>
iterator;
718 typedef detail::boundary_point_index_iterator<base_iterator>
const_iterator;
739 const std::locale& loc = std::locale()) :
748 const std::locale& loc = std::locale()) :
778 map_ = mapping_type(type,
begin,
end, loc);
790 return iterator(
true, &map_, mask_);
802 return iterator(
false, &map_, mask_);
834 typedef detail::mapping<base_iterator> mapping_type;
840 template<
typename BaseIterator>
842 map_(other.map_), mask_(0xFFFFFFFFu), full_select_(false)
845 template<
typename BaseIterator>
847 map_(other.map_), mask_(0xFFFFFFFFu)
850 template<
typename BaseIterator>
857 template<
typename BaseIterator>
858 boundary_point_index<BaseIterator>&
868 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T 871 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T 877 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T 880 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T 886 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T 889 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T 895 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T 898 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T 912 # pragma warning(pop) void full_select(bool v)
Definition: index.hpp:636
iterator find(base_iterator p) const
Definition: index.hpp:816
boundary_point_index< const char32_t * > u32cboundary_point_index
convenience typedef
Definition: index.hpp:899
a segment object that represents a pair of two iterators that define the range where this segment exi...
Definition: segment.hpp:91
boundary_type
This type describes a possible boundary analysis alternatives.
Definition: types.hpp:30
bool full_select() const
Definition: index.hpp:621
rule_type rule() const
Get the mask of rules that are used.
Definition: index.hpp:601
This class holds an index of boundary points and allows iterating over them.
Definition: index.hpp:419
BaseIterator base_iterator
The type of the iterator used to iterate over the original text.
Definition: index.hpp:475
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, const std::locale &loc=std::locale())
Definition: index.hpp:735
segment_index< std::u16string::const_iterator > u16ssegment_index
convenience typedef
Definition: index.hpp:869
iterator begin() const
Definition: index.hpp:565
segment_index(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:521
boundary_point_index< std::wstring::const_iterator > wsboundary_point_index
convenience typedef
Definition: index.hpp:885
iterator end() const
Definition: index.hpp:800
segment_index & operator=(const boundary_point_index< base_iterator > &)
segment_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, const std::locale &loc=std::locale())
Definition: index.hpp:511
boundary_point< base_iterator > value_type
Definition: index.hpp:722
segment< base_iterator > value_type
Definition: index.hpp:499
void rule(rule_type v)
Set the mask of rules that are used.
Definition: index.hpp:827
boundary_point_index< const wchar_t * > wcboundary_point_index
convenience typedef
Definition: index.hpp:894
boundary_point_index< const char16_t * > u16cboundary_point_index
convenience typedef
Definition: index.hpp:896
boundary_point_index & operator=(const segment_index< base_iterator > &other)
uint32_t rule_type
Flags used with word boundary analysis – the type of the word, line or sentence boundary found.
Definition: types.hpp:40
segment_index< const wchar_t * > wcsegment_index
convenience typedef
Definition: index.hpp:876
unspecified_iterator_type iterator
Definition: index.hpp:490
segment_index()
Definition: index.hpp:508
iterator end() const
Definition: index.hpp:575
iterator begin() const
Definition: index.hpp:788
boundary_point_index< std::string::const_iterator > sboundary_point_index
convenience typedef
Definition: index.hpp:884
segment_index< std::string::const_iterator > ssegment_index
convenience typedef
Definition: index.hpp:866
segment_index< std::wstring::const_iterator > wssegment_index
convenience typedef
Definition: index.hpp:867
unspecified_iterator_type const_iterator
Definition: index.hpp:492
void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:776
unspecified_iterator_type const_iterator
Definition: index.hpp:715
This class represents a boundary point in the text.
Definition: boundary_point.hpp:44
rule_type rule() const
Get the mask of rules that are used.
Definition: index.hpp:822
void rule(rule_type v)
Set the mask of rules that are used.
Definition: index.hpp:606
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:745
boundary_point_index< std::u32string::const_iterator > u32sboundary_point_index
convenience typedef
Definition: index.hpp:890
iterator find(base_iterator p) const
Definition: index.hpp:595
unspecified_iterator_type iterator
Definition: index.hpp:713
boundary_point_index< std::u16string::const_iterator > u16sboundary_point_index
convenience typedef
Definition: index.hpp:887
Generate boundary analysis facet.
segment_index< const char16_t * > u16csegment_index
convenience typedef
Definition: index.hpp:878
BaseIterator base_iterator
The type of the iterator used to iterate over the original text.
Definition: index.hpp:697
segment_index< const char32_t * > u32csegment_index
convenience typedef
Definition: index.hpp:881
segment_index< const char * > csegment_index
convenience typedef
Definition: index.hpp:875
segment_index< std::u32string::const_iterator > u32ssegment_index
convenience typedef
Definition: index.hpp:872
std::vector< break_info > index_type
Definition: facets.hpp:50
boundary_point_index< const char * > cboundary_point_index
convenience typedef
Definition: index.hpp:893
boundary_point_index()
Definition: index.hpp:731
This class holds an index of segments in the text range and allows to iterate over them.
Definition: index.hpp:416
void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:553