Boost.Locale
index.hpp
1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
9 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
10 
11 #include <boost/locale/config.hpp>
12 #include <boost/locale/boundary/types.hpp>
13 #include <boost/locale/boundary/facets.hpp>
14 #include <boost/locale/boundary/segment.hpp>
15 #include <boost/locale/boundary/boundary_point.hpp>
16 #include <boost/iterator/iterator_facade.hpp>
17 #include <boost/type_traits/is_same.hpp>
18 #include <boost/shared_ptr.hpp>
19 #include <boost/cstdint.hpp>
20 #include <boost/assert.hpp>
21 #ifdef BOOST_MSVC
22 # pragma warning(push)
23 # pragma warning(disable : 4275 4251 4231 4660)
24 #endif
25 #include <string>
26 #include <locale>
27 #include <vector>
28 #include <iterator>
29 #include <algorithm>
30 #include <stdexcept>
31 
32 #include <iostream>
33 
34 namespace boost {
35 
36  namespace locale {
37 
38  namespace boundary {
46 
48 
49  namespace details {
50 
51  template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
52  struct mapping_traits {
53  typedef typename std::iterator_traits<IteratorType>::value_type char_type;
54  static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
55  {
56  std::basic_string<char_type> str(b,e);
57  return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
58  }
59  };
60 
61  template<typename CharType,typename SomeIteratorType>
62  struct linear_iterator_traits {
63  static const bool is_linear =
64  is_same<SomeIteratorType,CharType*>::value
65  || is_same<SomeIteratorType,CharType const*>::value
66  || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value
67  || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value
68  || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value
69  || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value
70  ;
71  };
72 
73 
74 
75  template<typename IteratorType>
76  struct mapping_traits<IteratorType,std::random_access_iterator_tag> {
77 
78  typedef typename std::iterator_traits<IteratorType>::value_type char_type;
79 
80 
81 
82  static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
83  {
84  index_type result;
85 
86  //
87  // Optimize for most common cases
88  //
89  // C++0x requires that string is continious in memory and all known
90  // string implementations
91  // do this because of c_str() support.
92  //
93 
94  if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e)
95  {
96  char_type const *begin = &*b;
97  char_type const *end = begin + (e-b);
98  index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end);
99  result.swap(tmp);
100  }
101  else {
102  std::basic_string<char_type> str(b,e);
103  index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
104  result.swap(tmp);
105  }
106  return result;
107  }
108  };
109 
110  template<typename BaseIterator>
111  class mapping {
112  public:
113  typedef BaseIterator base_iterator;
114  typedef typename std::iterator_traits<base_iterator>::value_type char_type;
115 
116 
117  mapping(boundary_type type,
118  base_iterator begin,
119  base_iterator end,
120  std::locale const &loc)
121  :
122  index_(new index_type()),
123  begin_(begin),
124  end_(end)
125  {
126  index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc);
127  index_->swap(idx);
128  }
129 
130  mapping()
131  {
132  }
133 
134  index_type const &index() const
135  {
136  return *index_;
137  }
138 
139  base_iterator begin() const
140  {
141  return begin_;
142  }
143 
144  base_iterator end() const
145  {
146  return end_;
147  }
148 
149  private:
151  base_iterator begin_,end_;
152  };
153 
154  template<typename BaseIterator>
155  class segment_index_iterator :
156  public boost::iterator_facade<
157  segment_index_iterator<BaseIterator>,
158  segment<BaseIterator>,
159  boost::bidirectional_traversal_tag,
160  segment<BaseIterator> const &
161  >
162  {
163  public:
164  typedef BaseIterator base_iterator;
165  typedef mapping<base_iterator> mapping_type;
166  typedef segment<base_iterator> segment_type;
167 
168  segment_index_iterator() : current_(0,0),map_(0)
169  {
170  }
171 
172  segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) :
173  map_(map),
174  mask_(mask),
175  full_select_(full_select)
176  {
177  set(p);
178  }
179  segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) :
180  map_(map),
181  mask_(mask),
182  full_select_(full_select)
183  {
184  if(is_begin)
185  set_begin();
186  else
187  set_end();
188  }
189 
190  segment_type const &dereference() const
191  {
192  return value_;
193  }
194 
195  bool equal(segment_index_iterator const &other) const
196  {
197  return map_ == other.map_ && current_.second == other.current_.second;
198  }
199 
200  void increment()
201  {
202  std::pair<size_t,size_t> next = current_;
203  if(full_select_) {
204  next.first = next.second;
205  while(next.second < size()) {
206  next.second++;
207  if(valid_offset(next.second))
208  break;
209  }
210  if(next.second == size())
211  next.first = next.second - 1;
212  }
213  else {
214  while(next.second < size()) {
215  next.first = next.second;
216  next.second++;
217  if(valid_offset(next.second))
218  break;
219  }
220  }
221  update_current(next);
222  }
223 
224  void decrement()
225  {
226  std::pair<size_t,size_t> next = current_;
227  if(full_select_) {
228  while(next.second >1) {
229  next.second--;
230  if(valid_offset(next.second))
231  break;
232  }
233  next.first = next.second;
234  while(next.first >0) {
235  next.first--;
236  if(valid_offset(next.first))
237  break;
238  }
239  }
240  else {
241  while(next.second >1) {
242  next.second--;
243  if(valid_offset(next.second))
244  break;
245  }
246  next.first = next.second - 1;
247  }
248  update_current(next);
249  }
250 
251  private:
252 
253  void set_end()
254  {
255  current_.first = size() - 1;
256  current_.second = size();
257  value_ = segment_type(map_->end(),map_->end(),0);
258  }
259  void set_begin()
260  {
261  current_.first = current_.second = 0;
262  value_ = segment_type(map_->begin(),map_->begin(),0);
263  increment();
264  }
265 
266  void set(base_iterator p)
267  {
268  size_t dist=std::distance(map_->begin(),p);
269  index_type::const_iterator b=map_->index().begin(),e=map_->index().end();
270  index_type::const_iterator
271  boundary_point=std::upper_bound(b,e,break_info(dist));
272  while(boundary_point != e && (boundary_point->rule & mask_)==0)
273  boundary_point++;
274 
275  current_.first = current_.second = boundary_point - b;
276 
277  if(full_select_) {
278  while(current_.first > 0) {
279  current_.first --;
280  if(valid_offset(current_.first))
281  break;
282  }
283  }
284  else {
285  if(current_.first > 0)
286  current_.first --;
287  }
288  value_.first = map_->begin();
289  std::advance(value_.first,get_offset(current_.first));
290  value_.second = value_.first;
291  std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first));
292 
293  update_rule();
294  }
295 
296  void update_current(std::pair<size_t,size_t> pos)
297  {
298  std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
299  std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
300  std::advance(value_.first,first_diff);
301  std::advance(value_.second,second_diff);
302  current_ = pos;
303  update_rule();
304  }
305 
306  void update_rule()
307  {
308  if(current_.second != size()) {
309  value_.rule(index()[current_.second].rule);
310  }
311  }
312  size_t get_offset(size_t ind) const
313  {
314  if(ind == size())
315  return index().back().offset;
316  return index()[ind].offset;
317  }
318 
319  bool valid_offset(size_t offset) const
320  {
321  return offset == 0
322  || offset == size() // make sure we not acess index[size]
323  || (index()[offset].rule & mask_)!=0;
324  }
325 
326  size_t size() const
327  {
328  return index().size();
329  }
330 
331  index_type const &index() const
332  {
333  return map_->index();
334  }
335 
336 
337  segment_type value_;
338  std::pair<size_t,size_t> current_;
339  mapping_type const *map_;
340  rule_type mask_;
341  bool full_select_;
342  };
343 
344  template<typename BaseIterator>
345  class boundary_point_index_iterator :
346  public boost::iterator_facade<
347  boundary_point_index_iterator<BaseIterator>,
348  boundary_point<BaseIterator>,
349  boost::bidirectional_traversal_tag,
350  boundary_point<BaseIterator> const &
351  >
352  {
353  public:
354  typedef BaseIterator base_iterator;
355  typedef mapping<base_iterator> mapping_type;
356  typedef boundary_point<base_iterator> boundary_point_type;
357 
358  boundary_point_index_iterator() : current_(0),map_(0)
359  {
360  }
361 
362  boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) :
363  map_(map),
364  mask_(mask)
365  {
366  if(is_begin)
367  set_begin();
368  else
369  set_end();
370  }
371  boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) :
372  map_(map),
373  mask_(mask)
374  {
375  set(p);
376  }
377 
378  boundary_point_type const &dereference() const
379  {
380  return value_;
381  }
382 
383  bool equal(boundary_point_index_iterator const &other) const
384  {
385  return map_ == other.map_ && current_ == other.current_;
386  }
387 
388  void increment()
389  {
390  size_t next = current_;
391  while(next < size()) {
392  next++;
393  if(valid_offset(next))
394  break;
395  }
396  update_current(next);
397  }
398 
399  void decrement()
400  {
401  size_t next = current_;
402  while(next>0) {
403  next--;
404  if(valid_offset(next))
405  break;
406  }
407  update_current(next);
408  }
409 
410  private:
411  void set_end()
412  {
413  current_ = size();
414  value_ = boundary_point_type(map_->end(),0);
415  }
416  void set_begin()
417  {
418  current_ = 0;
419  value_ = boundary_point_type(map_->begin(),0);
420  }
421 
422  void set(base_iterator p)
423  {
424  size_t dist = std::distance(map_->begin(),p);
425 
426  index_type::const_iterator b=index().begin();
427  index_type::const_iterator e=index().end();
428  index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist));
429 
430  if(ptr==index().end())
431  current_=size()-1;
432  else
433  current_=ptr - index().begin();
434 
435  while(!valid_offset(current_))
436  current_ ++;
437 
438  std::ptrdiff_t diff = get_offset(current_) - dist;
439  std::advance(p,diff);
440  value_.iterator(p);
441  update_rule();
442  }
443 
444  void update_current(size_t pos)
445  {
446  std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
447  base_iterator i=value_.iterator();
448  std::advance(i,diff);
449  current_ = pos;
450  value_.iterator(i);
451  update_rule();
452  }
453 
454  void update_rule()
455  {
456  if(current_ != size()) {
457  value_.rule(index()[current_].rule);
458  }
459  }
460  size_t get_offset(size_t ind) const
461  {
462  if(ind == size())
463  return index().back().offset;
464  return index()[ind].offset;
465  }
466 
467  bool valid_offset(size_t offset) const
468  {
469  return offset == 0
470  || offset + 1 >= size() // last and first are always valid regardless of mark
471  || (index()[offset].rule & mask_)!=0;
472  }
473 
474  size_t size() const
475  {
476  return index().size();
477  }
478 
479  index_type const &index() const
480  {
481  return map_->index();
482  }
483 
484 
485  boundary_point_type value_;
486  size_t current_;
487  mapping_type const *map_;
488  rule_type mask_;
489  };
490 
491 
492  } // details
493 
495 
496  template<typename BaseIterator>
498 
499  template<typename BaseIterator>
501 
502 
554 
555  template<typename BaseIterator>
556  class segment_index {
557  public:
558 
562  typedef BaseIterator base_iterator;
563  #ifdef BOOST_LOCALE_DOXYGEN
564  typedef unspecified_iterator_type iterator;
582  typedef unspecified_iterator_type const_iterator;
583  #else
584  typedef details::segment_index_iterator<base_iterator> iterator;
585  typedef details::segment_index_iterator<base_iterator> const_iterator;
586  #endif
592 
602  segment_index() : mask_(0xFFFFFFFFu),full_select_(false)
603  {
604  }
610  base_iterator begin,
611  base_iterator end,
612  rule_type mask,
613  std::locale const &loc=std::locale())
614  :
615  map_(type,begin,end,loc),
616  mask_(mask),
617  full_select_(false)
618  {
619  }
625  base_iterator begin,
626  base_iterator end,
627  std::locale const &loc=std::locale())
628  :
629  map_(type,begin,end,loc),
630  mask_(0xFFFFFFFFu),
631  full_select_(false)
632  {
633  }
634 
657 
658 
665  void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
666  {
667  map_ = mapping_type(type,begin,end,loc);
668  }
669 
679  iterator begin() const
680  {
681  return iterator(true,&map_,mask_,full_select_);
682  }
683 
691  iterator end() const
692  {
693  return iterator(false,&map_,mask_,full_select_);
694  }
695 
714  {
715  return iterator(p,&map_,mask_,full_select_);
716  }
717 
721  rule_type rule() const
722  {
723  return mask_;
724  }
728  void rule(rule_type v)
729  {
730  mask_ = v;
731  }
732 
745 
746  bool full_select() const
747  {
748  return full_select_;
749  }
750 
763 
764  void full_select(bool v)
765  {
766  full_select_ = v;
767  }
768 
769  private:
770  friend class boundary_point_index<base_iterator>;
771  typedef details::mapping<base_iterator> mapping_type;
772  mapping_type map_;
773  rule_type mask_;
774  bool full_select_;
775  };
776 
823 
824 
825  template<typename BaseIterator>
826  class boundary_point_index {
827  public:
831  typedef BaseIterator base_iterator;
832  #ifdef BOOST_LOCALE_DOXYGEN
833  typedef unspecified_iterator_type iterator;
851  typedef unspecified_iterator_type const_iterator;
852  #else
853  typedef details::boundary_point_index_iterator<base_iterator> iterator;
854  typedef details::boundary_point_index_iterator<base_iterator> const_iterator;
855  #endif
861 
871  boundary_point_index() : mask_(0xFFFFFFFFu)
872  {
873  }
874 
880  base_iterator begin,
881  base_iterator end,
882  rule_type mask,
883  std::locale const &loc=std::locale())
884  :
885  map_(type,begin,end,loc),
886  mask_(mask)
887  {
888  }
894  base_iterator begin,
895  base_iterator end,
896  std::locale const &loc=std::locale())
897  :
898  map_(type,begin,end,loc),
899  mask_(0xFFFFFFFFu)
900  {
901  }
902 
925 
932  void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
933  {
934  map_ = mapping_type(type,begin,end,loc);
935  }
936 
946  iterator begin() const
947  {
948  return iterator(true,&map_,mask_);
949  }
950 
960  iterator end() const
961  {
962  return iterator(false,&map_,mask_);
963  }
964 
979  {
980  return iterator(p,&map_,mask_);
981  }
982 
986  rule_type rule() const
987  {
988  return mask_;
989  }
993  void rule(rule_type v)
994  {
995  mask_ = v;
996  }
997 
998  private:
999 
1000  friend class segment_index<base_iterator>;
1001  typedef details::mapping<base_iterator> mapping_type;
1002  mapping_type map_;
1003  rule_type mask_;
1004  };
1005 
1007  template<typename BaseIterator>
1008  segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) :
1009  map_(other.map_),
1010  mask_(0xFFFFFFFFu),
1011  full_select_(false)
1012  {
1013  }
1014 
1015  template<typename BaseIterator>
1016  boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) :
1017  map_(other.map_),
1018  mask_(0xFFFFFFFFu)
1019  {
1020  }
1021 
1022  template<typename BaseIterator>
1023  segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other)
1024  {
1025  map_ = other.map_;
1026  return *this;
1027  }
1028 
1029  template<typename BaseIterator>
1030  boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other)
1031  {
1032  map_ = other.map_;
1033  return *this;
1034  }
1036 
1039  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1041  #endif
1042  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1044  #endif
1045 
1048  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1050  #endif
1051  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1053  #endif
1054 
1057  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1059  #endif
1060  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1062  #endif
1063 
1066  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1068  #endif
1069  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1071  #endif
1072 
1073 
1074 
1075  } // boundary
1076 
1077  } // locale
1078 } // boost
1079 
1086 
1087 #ifdef BOOST_MSVC
1088 #pragma warning(pop)
1089 #endif
1090 
1091 #endif
1092 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
void full_select(bool v)
Definition: index.hpp:764
a segment object that represents a pair of two iterators that define the range where this segment exi...
Definition: segment.hpp:102
boundary_type
Definition: types.hpp:39
boundary_point_index const & operator=(segment_index< base_iterator > const &other)
This class holds an index of boundary points and allows iterating over them.
Definition: index.hpp:500
BaseIterator base_iterator
Definition: index.hpp:562
segment_index< std::u16string::const_iterator > u16ssegment_index
convenience typedef
Definition: index.hpp:1040
segment_index< char const * > csegment_index
convenience typedef
Definition: index.hpp:1046
void map(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:665
boundary_point_index< std::wstring::const_iterator > wsboundary_point_index
convenience typedef
Definition: index.hpp:1056
boundary_point_index< wchar_t const * > wcboundary_point_index
convenience typedef
Definition: index.hpp:1065
boundary_point< base_iterator > value_type
Definition: index.hpp:860
segment< base_iterator > value_type
Definition: index.hpp:591
void rule(rule_type v)
Definition: index.hpp:993
segment_index const & operator=(boundary_point_index< base_iterator > const &)
rule_type rule() const
Definition: index.hpp:986
iterator end() const
Definition: index.hpp:960
uint32_t rule_type
Flags used with word boundary analysis – the type of the word, line or sentence boundary found...
Definition: types.hpp:51
Definition: generator.hpp:23
boundary_point_index< char16_t const * > u16cboundary_point_index
convenience typedef
Definition: index.hpp:1067
void map(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:932
unspecified_iterator_type iterator
Definition: index.hpp:578
segment_index()
Definition: index.hpp:602
segment_index< char16_t const * > u16csegment_index
convenience typedef
Definition: index.hpp:1049
boundary_point_index< std::string::const_iterator > sboundary_point_index
convenience typedef
Definition: index.hpp:1055
segment_index< std::string::const_iterator > ssegment_index
convenience typedef
Definition: index.hpp:1037
segment_index< char32_t const * > u32csegment_index
convenience typedef
Definition: index.hpp:1052
segment_index< std::wstring::const_iterator > wssegment_index
convenience typedef
Definition: index.hpp:1038
unspecified_iterator_type const_iterator
Definition: index.hpp:582
unspecified_iterator_type const_iterator
Definition: index.hpp:851
This class represents a boundary point in the text.
Definition: boundary_point.hpp:48
iterator find(base_iterator p) const
Definition: index.hpp:713
iterator begin() const
Definition: index.hpp:679
void rule(rule_type v)
Definition: index.hpp:728
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:893
boundary_point_index< char32_t const * > u32cboundary_point_index
convenience typedef
Definition: index.hpp:1070
segment_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, std::locale const &loc=std::locale())
Definition: index.hpp:609
iterator find(base_iterator p) const
Definition: index.hpp:978
boundary_point_index< char const * > cboundary_point_index
convenience typedef
Definition: index.hpp:1064
boundary_point_index< std::u32string::const_iterator > u32sboundary_point_index
convenience typedef
Definition: index.hpp:1061
unspecified_iterator_type iterator
Definition: index.hpp:847
segment_index(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:624
bool full_select() const
Definition: index.hpp:746
boundary_point_index< std::u16string::const_iterator > u16sboundary_point_index
convenience typedef
Definition: index.hpp:1058
BaseIterator base_iterator
Definition: index.hpp:831
iterator end() const
Definition: index.hpp:691
segment_index< std::u32string::const_iterator > u32ssegment_index
convenience typedef
Definition: index.hpp:1043
std::vector< break_info > index_type
Definition: facets.hpp:86
rule_type rule() const
Definition: index.hpp:721
iterator begin() const
Definition: index.hpp:946
boundary_point_index()
Definition: index.hpp:871
This class holds an index of segments in the text range and allows to iterate over them...
Definition: index.hpp:497
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, std::locale const &loc=std::locale())
Definition: index.hpp:879
segment_index< wchar_t const * > wcsegment_index
convenience typedef
Definition: index.hpp:1047