Boost.Locale
index.hpp
1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0.
5 // https://www.boost.org/LICENSE_1_0.txt
6 
7 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
8 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
9 
10 #include <boost/locale/boundary/boundary_point.hpp>
11 #include <boost/locale/boundary/facets.hpp>
12 #include <boost/locale/boundary/segment.hpp>
13 #include <boost/locale/boundary/types.hpp>
14 #include <boost/cstdint.hpp>
15 #include <boost/iterator/iterator_facade.hpp>
16 #include <algorithm>
17 #include <iterator>
18 #include <locale>
19 #include <memory>
20 #include <stdexcept>
21 #include <string>
22 #include <type_traits>
23 #include <vector>
24 
25 #ifdef BOOST_MSVC
26 # pragma warning(push)
27 # pragma warning(disable : 4275 4251 4231 4660)
28 #endif
29 
30 namespace boost { namespace locale { namespace boundary {
39 
41 
42  namespace detail {
43 
44  template<typename IteratorType,
45  typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
46  struct mapping_traits {
47  typedef typename std::iterator_traits<IteratorType>::value_type char_type;
48  static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
49  {
50  std::basic_string<char_type> str(b, e);
51  return std::use_facet<boundary_indexing<char_type>>(l).map(t, str.c_str(), str.c_str() + str.size());
52  }
53  };
54 
55  template<typename CharType, typename SomeIteratorType>
56  struct linear_iterator_traits {
57  static constexpr bool is_linear =
58  std::is_same<SomeIteratorType, CharType*>::value || std::is_same<SomeIteratorType, const CharType*>::value
59  || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::iterator>::value
60  || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::const_iterator>::value
61  || std::is_same<SomeIteratorType, typename std::vector<CharType>::iterator>::value
62  || std::is_same<SomeIteratorType, typename std::vector<CharType>::const_iterator>::value;
63  };
64 
65  template<typename IteratorType>
66  struct mapping_traits<IteratorType, std::random_access_iterator_tag> {
67  typedef typename std::iterator_traits<IteratorType>::value_type char_type;
68 
69  static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
70  {
71  index_type result;
72 
73  // Optimize for most common cases
74  //
75  // C++11 requires that string is continuous in memory and all known
76  // string implementations do this because of c_str() support.
77 
78  if(linear_iterator_traits<char_type, IteratorType>::is_linear && b != e) {
79  const char_type* begin = &*b;
80  const char_type* end = begin + (e - b);
81  index_type tmp = std::use_facet<boundary_indexing<char_type>>(l).map(t, begin, end);
82  result.swap(tmp);
83  } else {
84  std::basic_string<char_type> str(b, e);
85  index_type tmp =
86  std::use_facet<boundary_indexing<char_type>>(l).map(t, str.c_str(), str.c_str() + str.size());
87  result.swap(tmp);
88  }
89  return result;
90  }
91  };
92 
93  template<typename BaseIterator>
94  class mapping {
95  public:
96  typedef BaseIterator base_iterator;
97  typedef typename std::iterator_traits<base_iterator>::value_type char_type;
98 
99  mapping(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc) :
100  index_(new index_type()), begin_(begin), end_(end)
101  {
102  index_type idx = detail::mapping_traits<base_iterator>::map(type, begin, end, loc);
103  index_->swap(idx);
104  }
105 
106  mapping() {}
107 
108  const index_type& index() const { return *index_; }
109 
110  base_iterator begin() const { return begin_; }
111 
112  base_iterator end() const { return end_; }
113 
114  private:
115  std::shared_ptr<index_type> index_;
116  base_iterator begin_, end_;
117  };
118 
119  template<typename BaseIterator>
120  class segment_index_iterator : public boost::iterator_facade<segment_index_iterator<BaseIterator>,
121  segment<BaseIterator>,
122  boost::bidirectional_traversal_tag,
123  const segment<BaseIterator>&> {
124  public:
125  typedef BaseIterator base_iterator;
126  typedef mapping<base_iterator> mapping_type;
127  typedef segment<base_iterator> segment_type;
128 
129  segment_index_iterator() : current_(0, 0), map_(0), mask_(0), full_select_(false) {}
130 
131  segment_index_iterator(base_iterator p, const mapping_type* map, rule_type mask, bool full_select) :
132  map_(map), mask_(mask), full_select_(full_select)
133  {
134  set(p);
135  }
136  segment_index_iterator(bool is_begin, const mapping_type* map, rule_type mask, bool full_select) :
137  map_(map), mask_(mask), full_select_(full_select)
138  {
139  if(is_begin)
140  set_begin();
141  else
142  set_end();
143  }
144 
145  const segment_type& dereference() const { return value_; }
146 
147  bool equal(const segment_index_iterator& other) const
148  {
149  return map_ == other.map_ && current_.second == other.current_.second;
150  }
151 
152  void increment()
153  {
154  std::pair<size_t, size_t> next = current_;
155  if(full_select_) {
156  next.first = next.second;
157  while(next.second < size()) {
158  next.second++;
159  if(valid_offset(next.second))
160  break;
161  }
162  if(next.second == size())
163  next.first = next.second - 1;
164  } else {
165  while(next.second < size()) {
166  next.first = next.second;
167  next.second++;
168  if(valid_offset(next.second))
169  break;
170  }
171  }
172  update_current(next);
173  }
174 
175  void decrement()
176  {
177  std::pair<size_t, size_t> next = current_;
178  if(full_select_) {
179  while(next.second > 1) {
180  next.second--;
181  if(valid_offset(next.second))
182  break;
183  }
184  next.first = next.second;
185  while(next.first > 0) {
186  next.first--;
187  if(valid_offset(next.first))
188  break;
189  }
190  } else {
191  while(next.second > 1) {
192  next.second--;
193  if(valid_offset(next.second))
194  break;
195  }
196  next.first = next.second - 1;
197  }
198  update_current(next);
199  }
200 
201  private:
202  void set_end()
203  {
204  current_.first = size() - 1;
205  current_.second = size();
206  value_ = segment_type(map_->end(), map_->end(), 0);
207  }
208  void set_begin()
209  {
210  current_.first = current_.second = 0;
211  value_ = segment_type(map_->begin(), map_->begin(), 0);
212  increment();
213  }
214 
215  void set(base_iterator p)
216  {
217  size_t dist = std::distance(map_->begin(), p);
218  index_type::const_iterator b = map_->index().begin(), e = map_->index().end();
219  index_type::const_iterator boundary_point = std::upper_bound(b, e, break_info(dist));
220  while(boundary_point != e && (boundary_point->rule & mask_) == 0)
221  boundary_point++;
222 
223  current_.first = current_.second = boundary_point - b;
224 
225  if(full_select_) {
226  while(current_.first > 0) {
227  current_.first--;
228  if(valid_offset(current_.first))
229  break;
230  }
231  } else {
232  if(current_.first > 0)
233  current_.first--;
234  }
235  value_.first = map_->begin();
236  std::advance(value_.first, get_offset(current_.first));
237  value_.second = value_.first;
238  std::advance(value_.second, get_offset(current_.second) - get_offset(current_.first));
239 
240  update_rule();
241  }
242 
243  void update_current(std::pair<size_t, size_t> pos)
244  {
245  std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
246  std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
247  std::advance(value_.first, first_diff);
248  std::advance(value_.second, second_diff);
249  current_ = pos;
250  update_rule();
251  }
252 
253  void update_rule()
254  {
255  if(current_.second != size()) {
256  value_.rule(index()[current_.second].rule);
257  }
258  }
259  size_t get_offset(size_t ind) const
260  {
261  if(ind == size())
262  return index().back().offset;
263  return index()[ind].offset;
264  }
265 
266  bool valid_offset(size_t offset) const
267  {
268  return offset == 0 || offset == size() // make sure we not acess index[size]
269  || (index()[offset].rule & mask_) != 0;
270  }
271 
272  size_t size() const { return index().size(); }
273 
274  const index_type& index() const { return map_->index(); }
275 
276  segment_type value_;
277  std::pair<size_t, size_t> current_;
278  const mapping_type* map_;
279  rule_type mask_;
280  bool full_select_;
281  };
282 
283  template<typename BaseIterator>
284  class boundary_point_index_iterator : public boost::iterator_facade<boundary_point_index_iterator<BaseIterator>,
285  boundary_point<BaseIterator>,
286  boost::bidirectional_traversal_tag,
287  const boundary_point<BaseIterator>&> {
288  public:
289  typedef BaseIterator base_iterator;
290  typedef mapping<base_iterator> mapping_type;
291  typedef boundary_point<base_iterator> boundary_point_type;
292 
293  boundary_point_index_iterator() : current_(0), map_(0), mask_(0) {}
294 
295  boundary_point_index_iterator(bool is_begin, const mapping_type* map, rule_type mask) :
296  map_(map), mask_(mask)
297  {
298  if(is_begin)
299  set_begin();
300  else
301  set_end();
302  }
303  boundary_point_index_iterator(base_iterator p, const mapping_type* map, rule_type mask) :
304  map_(map), mask_(mask)
305  {
306  set(p);
307  }
308 
309  const boundary_point_type& dereference() const { return value_; }
310 
311  bool equal(const boundary_point_index_iterator& other) const
312  {
313  return map_ == other.map_ && current_ == other.current_;
314  }
315 
316  void increment()
317  {
318  size_t next = current_;
319  while(next < size()) {
320  next++;
321  if(valid_offset(next))
322  break;
323  }
324  update_current(next);
325  }
326 
327  void decrement()
328  {
329  size_t next = current_;
330  while(next > 0) {
331  next--;
332  if(valid_offset(next))
333  break;
334  }
335  update_current(next);
336  }
337 
338  private:
339  void set_end()
340  {
341  current_ = size();
342  value_ = boundary_point_type(map_->end(), 0);
343  }
344  void set_begin()
345  {
346  current_ = 0;
347  value_ = boundary_point_type(map_->begin(), 0);
348  }
349 
350  void set(base_iterator p)
351  {
352  size_t dist = std::distance(map_->begin(), p);
353 
354  index_type::const_iterator b = index().begin();
355  index_type::const_iterator e = index().end();
356  index_type::const_iterator ptr = std::lower_bound(b, e, break_info(dist));
357 
358  if(ptr == index().end())
359  current_ = size() - 1;
360  else
361  current_ = ptr - index().begin();
362 
363  while(!valid_offset(current_))
364  current_++;
365 
366  std::ptrdiff_t diff = get_offset(current_) - dist;
367  std::advance(p, diff);
368  value_.iterator(p);
369  update_rule();
370  }
371 
372  void update_current(size_t pos)
373  {
374  std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
375  base_iterator i = value_.iterator();
376  std::advance(i, diff);
377  current_ = pos;
378  value_.iterator(i);
379  update_rule();
380  }
381 
382  void update_rule()
383  {
384  if(current_ != size()) {
385  value_.rule(index()[current_].rule);
386  }
387  }
388  size_t get_offset(size_t ind) const
389  {
390  if(ind == size())
391  return index().back().offset;
392  return index()[ind].offset;
393  }
394 
395  bool valid_offset(size_t offset) const
396  {
397  return offset == 0 || offset + 1 >= size() // last and first are always valid regardless of mark
398  || (index()[offset].rule & mask_) != 0;
399  }
400 
401  size_t size() const { return index().size(); }
402 
403  const index_type& index() const { return map_->index(); }
404 
405  boundary_point_type value_;
406  size_t current_;
407  const mapping_type* map_;
408  rule_type mask_;
409  };
410 
411  } // namespace detail
412 
414 
415  template<typename BaseIterator>
417 
418  template<typename BaseIterator>
420 
470 
471  template<typename BaseIterator>
472  class segment_index {
473  public:
475  typedef BaseIterator base_iterator;
476 
477 #ifdef BOOST_LOCALE_DOXYGEN
478  typedef unspecified_iterator_type iterator;
492  typedef unspecified_iterator_type const_iterator;
493 #else
494  typedef detail::segment_index_iterator<base_iterator> iterator;
495  typedef detail::segment_index_iterator<base_iterator> const_iterator;
496 #endif
500 
508  segment_index() : mask_(0xFFFFFFFFu), full_select_(false) {}
514  rule_type mask,
515  const std::locale& loc = std::locale()) :
516  map_(type, begin, end, loc),
517  mask_(mask), full_select_(false)
518  {}
524  const std::locale& loc = std::locale()) :
525  map_(type, begin, end, loc),
526  mask_(0xFFFFFFFFu), full_select_(false)
527  {}
528 
538 
548 
553  void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
554  {
555  map_ = mapping_type(type, begin, end, loc);
556  }
557 
565  iterator begin() const
566  {
567  return iterator(true, &map_, mask_, full_select_);
568  }
569 
575  iterator end() const
576  {
577  return iterator(false, &map_, mask_, full_select_);
578  }
579 
596  {
597  return iterator(p, &map_, mask_, full_select_);
598  }
599 
601  rule_type rule() const
602  {
603  return mask_;
604  }
606  void rule(rule_type v)
607  {
608  mask_ = v;
609  }
610 
621  bool full_select() const
622  {
623  return full_select_;
624  }
625 
636  void full_select(bool v)
637  {
638  full_select_ = v;
639  }
640 
641  private:
642  friend class boundary_point_index<base_iterator>;
643  typedef detail::mapping<base_iterator> mapping_type;
644  mapping_type map_;
645  rule_type mask_;
646  bool full_select_;
647  };
648 
693  template<typename BaseIterator>
694  class boundary_point_index {
695  public:
697  typedef BaseIterator base_iterator;
698 
699 #ifdef BOOST_LOCALE_DOXYGEN
700  typedef unspecified_iterator_type iterator;
715  typedef unspecified_iterator_type const_iterator;
716 #else
717  typedef detail::boundary_point_index_iterator<base_iterator> iterator;
718  typedef detail::boundary_point_index_iterator<base_iterator> const_iterator;
719 #endif
723 
731  boundary_point_index() : mask_(0xFFFFFFFFu) {}
732 
738  rule_type mask,
739  const std::locale& loc = std::locale()) :
740  map_(type, begin, end, loc),
741  mask_(mask)
742  {}
748  const std::locale& loc = std::locale()) :
749  map_(type, begin, end, loc),
750  mask_(0xFFFFFFFFu)
751  {}
752 
771 
776  void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
777  {
778  map_ = mapping_type(type, begin, end, loc);
779  }
780 
788  iterator begin() const
789  {
790  return iterator(true, &map_, mask_);
791  }
792 
800  iterator end() const
801  {
802  return iterator(false, &map_, mask_);
803  }
804 
817  {
818  return iterator(p, &map_, mask_);
819  }
820 
822  rule_type rule() const
823  {
824  return mask_;
825  }
827  void rule(rule_type v)
828  {
829  mask_ = v;
830  }
831 
832  private:
833  friend class segment_index<base_iterator>;
834  typedef detail::mapping<base_iterator> mapping_type;
835  mapping_type map_;
836  rule_type mask_;
837  };
838 
840  template<typename BaseIterator>
841  segment_index<BaseIterator>::segment_index(const boundary_point_index<BaseIterator>& other) :
842  map_(other.map_), mask_(0xFFFFFFFFu), full_select_(false)
843  {}
844 
845  template<typename BaseIterator>
846  boundary_point_index<BaseIterator>::boundary_point_index(const segment_index<BaseIterator>& other) :
847  map_(other.map_), mask_(0xFFFFFFFFu)
848  {}
849 
850  template<typename BaseIterator>
851  segment_index<BaseIterator>& segment_index<BaseIterator>::operator=(const boundary_point_index<BaseIterator>& other)
852  {
853  map_ = other.map_;
854  return *this;
855  }
856 
857  template<typename BaseIterator>
858  boundary_point_index<BaseIterator>&
859  boundary_point_index<BaseIterator>::operator=(const segment_index<BaseIterator>& other)
860  {
861  map_ = other.map_;
862  return *this;
863  }
865 
868 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
870 #endif
871 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
873 #endif
874 
877 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
879 #endif
880 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
882 #endif
883 
886 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
888 #endif
889 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
891 #endif
892 
895 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
897 #endif
898 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
900 #endif
901 
902 }}} // namespace boost::locale::boundary
903 
910 
911 #ifdef BOOST_MSVC
912 # pragma warning(pop)
913 #endif
914 
915 #endif
void full_select(bool v)
Definition: index.hpp:636
iterator find(base_iterator p) const
Definition: index.hpp:816
boundary_point_index< const char32_t * > u32cboundary_point_index
convenience typedef
Definition: index.hpp:899
a segment object that represents a pair of two iterators that define the range where this segment exi...
Definition: segment.hpp:91
boundary_type
This type describes a possible boundary analysis alternatives.
Definition: types.hpp:30
bool full_select() const
Definition: index.hpp:621
rule_type rule() const
Get the mask of rules that are used.
Definition: index.hpp:601
This class holds an index of boundary points and allows iterating over them.
Definition: index.hpp:419
BaseIterator base_iterator
The type of the iterator used to iterate over the original text.
Definition: index.hpp:475
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, const std::locale &loc=std::locale())
Definition: index.hpp:735
segment_index< std::u16string::const_iterator > u16ssegment_index
convenience typedef
Definition: index.hpp:869
iterator begin() const
Definition: index.hpp:565
segment_index(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:521
boundary_point_index< std::wstring::const_iterator > wsboundary_point_index
convenience typedef
Definition: index.hpp:885
iterator end() const
Definition: index.hpp:800
segment_index & operator=(const boundary_point_index< base_iterator > &)
segment_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, const std::locale &loc=std::locale())
Definition: index.hpp:511
boundary_point< base_iterator > value_type
Definition: index.hpp:722
segment< base_iterator > value_type
Definition: index.hpp:499
void rule(rule_type v)
Set the mask of rules that are used.
Definition: index.hpp:827
boundary_point_index< const wchar_t * > wcboundary_point_index
convenience typedef
Definition: index.hpp:894
boundary_point_index< const char16_t * > u16cboundary_point_index
convenience typedef
Definition: index.hpp:896
boundary_point_index & operator=(const segment_index< base_iterator > &other)
uint32_t rule_type
Flags used with word boundary analysis – the type of the word, line or sentence boundary found.
Definition: types.hpp:40
segment_index< const wchar_t * > wcsegment_index
convenience typedef
Definition: index.hpp:876
unspecified_iterator_type iterator
Definition: index.hpp:490
segment_index()
Definition: index.hpp:508
iterator end() const
Definition: index.hpp:575
iterator begin() const
Definition: index.hpp:788
boundary_point_index< std::string::const_iterator > sboundary_point_index
convenience typedef
Definition: index.hpp:884
segment_index< std::string::const_iterator > ssegment_index
convenience typedef
Definition: index.hpp:866
segment_index< std::wstring::const_iterator > wssegment_index
convenience typedef
Definition: index.hpp:867
unspecified_iterator_type const_iterator
Definition: index.hpp:492
void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:776
unspecified_iterator_type const_iterator
Definition: index.hpp:715
This class represents a boundary point in the text.
Definition: boundary_point.hpp:44
rule_type rule() const
Get the mask of rules that are used.
Definition: index.hpp:822
void rule(rule_type v)
Set the mask of rules that are used.
Definition: index.hpp:606
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:745
boundary_point_index< std::u32string::const_iterator > u32sboundary_point_index
convenience typedef
Definition: index.hpp:890
iterator find(base_iterator p) const
Definition: index.hpp:595
unspecified_iterator_type iterator
Definition: index.hpp:713
boundary_point_index< std::u16string::const_iterator > u16sboundary_point_index
convenience typedef
Definition: index.hpp:887
Generate boundary analysis facet.
segment_index< const char16_t * > u16csegment_index
convenience typedef
Definition: index.hpp:878
BaseIterator base_iterator
The type of the iterator used to iterate over the original text.
Definition: index.hpp:697
segment_index< const char32_t * > u32csegment_index
convenience typedef
Definition: index.hpp:881
segment_index< const char * > csegment_index
convenience typedef
Definition: index.hpp:875
segment_index< std::u32string::const_iterator > u32ssegment_index
convenience typedef
Definition: index.hpp:872
std::vector< break_info > index_type
Definition: facets.hpp:50
boundary_point_index< const char * > cboundary_point_index
convenience typedef
Definition: index.hpp:893
boundary_point_index()
Definition: index.hpp:731
This class holds an index of segments in the text range and allows to iterate over them.
Definition: index.hpp:416
void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:553