...one of the most highly
regarded and expertly designed C++ library projects in the
world.
— Herb Sutter and Andrei
Alexandrescu, C++
Coding Standards
00001 // 00002 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 00003 // 00004 // Distributed under the Boost Software License, Version 1.0. (See 00005 // accompanying file LICENSE_1_0.txt or copy at 00006 // http://www.boost.org/LICENSE_1_0.txt) 00007 // 00008 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED 00009 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED 00010 00011 #include <boost/locale/config.hpp> 00012 #include <boost/locale/boundary/types.hpp> 00013 #include <boost/locale/boundary/facets.hpp> 00014 #include <boost/locale/boundary/segment.hpp> 00015 #include <boost/locale/boundary/boundary_point.hpp> 00016 #include <boost/iterator/iterator_facade.hpp> 00017 #include <boost/shared_ptr.hpp> 00018 #include <boost/cstdint.hpp> 00019 #include <boost/assert.hpp> 00020 #ifdef BOOST_MSVC 00021 # pragma warning(push) 00022 # pragma warning(disable : 4275 4251 4231 4660) 00023 #endif 00024 #include <string> 00025 #include <locale> 00026 #include <vector> 00027 #include <iterator> 00028 #include <algorithm> 00029 #include <stdexcept> 00030 00031 #include <iostream> 00032 00033 namespace boost { 00034 00035 namespace locale { 00036 00037 namespace boundary { 00045 00047 00048 namespace details { 00049 00050 template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category> 00051 struct mapping_traits { 00052 typedef typename std::iterator_traits<IteratorType>::value_type char_type; 00053 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l) 00054 { 00055 std::basic_string<char_type> str(b,e); 00056 return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size()); 00057 } 00058 }; 00059 00060 template<typename CharType,typename SomeIteratorType> 00061 struct linear_iterator_traits { 00062 static const bool is_linear = false; 00063 }; 00064 00065 template<typename CharType> 00066 struct linear_iterator_traits<CharType,typename std::basic_string<CharType>::iterator> { 00067 static const bool is_linear = true; 00068 }; 00069 00070 template<typename CharType> 00071 struct linear_iterator_traits<CharType,typename std::basic_string<CharType>::const_iterator> { 00072 static const bool is_linear = true; 00073 }; 00074 00075 template<typename CharType> 00076 struct linear_iterator_traits<CharType,typename std::vector<CharType>::iterator> { 00077 static const bool is_linear = true; 00078 }; 00079 00080 template<typename CharType> 00081 struct linear_iterator_traits<CharType,typename std::vector<CharType>::const_iterator> { 00082 static const bool is_linear = true; 00083 }; 00084 00085 template<typename CharType> 00086 struct linear_iterator_traits<CharType,CharType *> { 00087 static const bool is_linear = true; 00088 }; 00089 00090 template<typename CharType> 00091 struct linear_iterator_traits<CharType,CharType const *> { 00092 static const bool is_linear = true; 00093 }; 00094 00095 00096 template<typename IteratorType> 00097 struct mapping_traits<IteratorType,std::random_access_iterator_tag> { 00098 00099 typedef typename std::iterator_traits<IteratorType>::value_type char_type; 00100 00101 00102 00103 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l) 00104 { 00105 index_type result; 00106 00107 // 00108 // Optimize for most common cases 00109 // 00110 // C++0x requires that string is continious in memory and all known 00111 // string implementations 00112 // do this because of c_str() support. 00113 // 00114 00115 if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e) 00116 { 00117 char_type const *begin = &*b; 00118 char_type const *end = begin + (e-b); 00119 index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end); 00120 result.swap(tmp); 00121 } 00122 else { 00123 std::basic_string<char_type> str(b,e); 00124 index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size()); 00125 result.swap(tmp); 00126 } 00127 return result; 00128 } 00129 }; 00130 00131 template<typename BaseIterator> 00132 class mapping { 00133 public: 00134 typedef BaseIterator base_iterator; 00135 typedef typename std::iterator_traits<base_iterator>::value_type char_type; 00136 00137 00138 mapping(boundary_type type, 00139 base_iterator begin, 00140 base_iterator end, 00141 std::locale const &loc) 00142 : 00143 index_(new index_type()), 00144 begin_(begin), 00145 end_(end) 00146 { 00147 index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc); 00148 index_->swap(idx); 00149 } 00150 00151 mapping() 00152 { 00153 } 00154 00155 index_type const &index() const 00156 { 00157 return *index_; 00158 } 00159 00160 base_iterator begin() const 00161 { 00162 return begin_; 00163 } 00164 00165 base_iterator end() const 00166 { 00167 return end_; 00168 } 00169 00170 private: 00171 boost::shared_ptr<index_type> index_; 00172 base_iterator begin_,end_; 00173 }; 00174 00175 template<typename BaseIterator> 00176 class segment_index_iterator : 00177 public boost::iterator_facade< 00178 segment_index_iterator<BaseIterator>, 00179 segment<BaseIterator>, 00180 boost::bidirectional_traversal_tag, 00181 segment<BaseIterator> const & 00182 > 00183 { 00184 public: 00185 typedef BaseIterator base_iterator; 00186 typedef mapping<base_iterator> mapping_type; 00187 typedef segment<base_iterator> segment_type; 00188 00189 segment_index_iterator() : current_(0,0),map_(0) 00190 { 00191 } 00192 00193 segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) : 00194 map_(map), 00195 mask_(mask), 00196 full_select_(full_select) 00197 { 00198 set(p); 00199 } 00200 segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) : 00201 map_(map), 00202 mask_(mask), 00203 full_select_(full_select) 00204 { 00205 if(is_begin) 00206 set_begin(); 00207 else 00208 set_end(); 00209 } 00210 00211 segment_type const &dereference() const 00212 { 00213 return value_; 00214 } 00215 00216 bool equal(segment_index_iterator const &other) const 00217 { 00218 return map_ == other.map_ && current_.second == other.current_.second; 00219 } 00220 00221 void increment() 00222 { 00223 std::pair<size_t,size_t> next = current_; 00224 if(full_select_) { 00225 next.first = next.second; 00226 while(next.second < size()) { 00227 next.second++; 00228 if(valid_offset(next.second)) 00229 break; 00230 } 00231 if(next.second == size()) 00232 next.first = next.second - 1; 00233 } 00234 else { 00235 while(next.second < size()) { 00236 next.first = next.second; 00237 next.second++; 00238 if(valid_offset(next.second)) 00239 break; 00240 } 00241 } 00242 update_current(next); 00243 } 00244 00245 void decrement() 00246 { 00247 std::pair<size_t,size_t> next = current_; 00248 if(full_select_) { 00249 while(next.second >1) { 00250 next.second--; 00251 if(valid_offset(next.second)) 00252 break; 00253 } 00254 next.first = next.second; 00255 while(next.first >0) { 00256 next.first--; 00257 if(valid_offset(next.first)) 00258 break; 00259 } 00260 } 00261 else { 00262 while(next.second >1) { 00263 next.second--; 00264 if(valid_offset(next.second)) 00265 break; 00266 } 00267 next.first = next.second - 1; 00268 } 00269 update_current(next); 00270 } 00271 00272 private: 00273 00274 void set_end() 00275 { 00276 current_.first = size() - 1; 00277 current_.second = size(); 00278 value_ = segment_type(map_->end(),map_->end(),0); 00279 } 00280 void set_begin() 00281 { 00282 current_.first = current_.second = 0; 00283 value_ = segment_type(map_->begin(),map_->begin(),0); 00284 increment(); 00285 } 00286 00287 void set(base_iterator p) 00288 { 00289 size_t dist=std::distance(map_->begin(),p); 00290 index_type::const_iterator b=map_->index().begin(),e=map_->index().end(); 00291 index_type::const_iterator 00292 boundary_point=std::upper_bound(b,e,break_info(dist)); 00293 while(boundary_point != e && (boundary_point->rule & mask_)==0) 00294 boundary_point++; 00295 00296 current_.first = current_.second = boundary_point - b; 00297 00298 if(full_select_) { 00299 while(current_.first > 0) { 00300 current_.first --; 00301 if(valid_offset(current_.first)) 00302 break; 00303 } 00304 } 00305 else { 00306 if(current_.first > 0) 00307 current_.first --; 00308 } 00309 value_.first = map_->begin(); 00310 std::advance(value_.first,get_offset(current_.first)); 00311 value_.second = value_.first; 00312 std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first)); 00313 00314 update_rule(); 00315 } 00316 00317 void update_current(std::pair<size_t,size_t> pos) 00318 { 00319 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first); 00320 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second); 00321 std::advance(value_.first,first_diff); 00322 std::advance(value_.second,second_diff); 00323 current_ = pos; 00324 update_rule(); 00325 } 00326 00327 void update_rule() 00328 { 00329 if(current_.second != size()) { 00330 value_.rule(index()[current_.second].rule); 00331 } 00332 } 00333 size_t get_offset(size_t ind) const 00334 { 00335 if(ind == size()) 00336 return index().back().offset; 00337 return index()[ind].offset; 00338 } 00339 00340 bool valid_offset(size_t offset) const 00341 { 00342 return offset == 0 00343 || offset == size() // make sure we not acess index[size] 00344 || (index()[offset].rule & mask_)!=0; 00345 } 00346 00347 size_t size() const 00348 { 00349 return index().size(); 00350 } 00351 00352 index_type const &index() const 00353 { 00354 return map_->index(); 00355 } 00356 00357 00358 segment_type value_; 00359 std::pair<size_t,size_t> current_; 00360 mapping_type const *map_; 00361 rule_type mask_; 00362 bool full_select_; 00363 }; 00364 00365 template<typename BaseIterator> 00366 class boundary_point_index_iterator : 00367 public boost::iterator_facade< 00368 boundary_point_index_iterator<BaseIterator>, 00369 boundary_point<BaseIterator>, 00370 boost::bidirectional_traversal_tag, 00371 boundary_point<BaseIterator> const & 00372 > 00373 { 00374 public: 00375 typedef BaseIterator base_iterator; 00376 typedef mapping<base_iterator> mapping_type; 00377 typedef boundary_point<base_iterator> boundary_point_type; 00378 00379 boundary_point_index_iterator() : current_(0),map_(0) 00380 { 00381 } 00382 00383 boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) : 00384 map_(map), 00385 mask_(mask) 00386 { 00387 if(is_begin) 00388 set_begin(); 00389 else 00390 set_end(); 00391 } 00392 boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) : 00393 map_(map), 00394 mask_(mask) 00395 { 00396 set(p); 00397 } 00398 00399 boundary_point_type const &dereference() const 00400 { 00401 return value_; 00402 } 00403 00404 bool equal(boundary_point_index_iterator const &other) const 00405 { 00406 return map_ == other.map_ && current_ == other.current_; 00407 } 00408 00409 void increment() 00410 { 00411 size_t next = current_; 00412 while(next < size()) { 00413 next++; 00414 if(valid_offset(next)) 00415 break; 00416 } 00417 update_current(next); 00418 } 00419 00420 void decrement() 00421 { 00422 size_t next = current_; 00423 while(next>0) { 00424 next--; 00425 if(valid_offset(next)) 00426 break; 00427 } 00428 update_current(next); 00429 } 00430 00431 private: 00432 void set_end() 00433 { 00434 current_ = size(); 00435 value_ = boundary_point_type(map_->end(),0); 00436 } 00437 void set_begin() 00438 { 00439 current_ = 0; 00440 value_ = boundary_point_type(map_->begin(),0); 00441 } 00442 00443 void set(base_iterator p) 00444 { 00445 size_t dist = std::distance(map_->begin(),p); 00446 00447 index_type::const_iterator b=index().begin(); 00448 index_type::const_iterator e=index().end(); 00449 index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist)); 00450 00451 if(ptr==index().end()) 00452 current_=size()-1; 00453 else 00454 current_=ptr - index().begin(); 00455 00456 while(!valid_offset(current_)) 00457 current_ ++; 00458 00459 std::ptrdiff_t diff = get_offset(current_) - dist; 00460 std::advance(p,diff); 00461 value_.iterator(p); 00462 update_rule(); 00463 } 00464 00465 void update_current(size_t pos) 00466 { 00467 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_); 00468 base_iterator i=value_.iterator(); 00469 std::advance(i,diff); 00470 current_ = pos; 00471 value_.iterator(i); 00472 update_rule(); 00473 } 00474 00475 void update_rule() 00476 { 00477 if(current_ != size()) { 00478 value_.rule(index()[current_].rule); 00479 } 00480 } 00481 size_t get_offset(size_t ind) const 00482 { 00483 if(ind == size()) 00484 return index().back().offset; 00485 return index()[ind].offset; 00486 } 00487 00488 bool valid_offset(size_t offset) const 00489 { 00490 return offset == 0 00491 || offset + 1 >= size() // last and first are always valid regardless of mark 00492 || (index()[offset].rule & mask_)!=0; 00493 } 00494 00495 size_t size() const 00496 { 00497 return index().size(); 00498 } 00499 00500 index_type const &index() const 00501 { 00502 return map_->index(); 00503 } 00504 00505 00506 boundary_point_type value_; 00507 size_t current_; 00508 mapping_type const *map_; 00509 rule_type mask_; 00510 }; 00511 00512 00513 } // details 00514 00516 00517 template<typename BaseIterator> 00518 class segment_index; 00519 00520 template<typename BaseIterator> 00521 class boundary_point_index; 00522 00523 00575 00576 template<typename BaseIterator> 00577 class segment_index { 00578 public: 00579 00583 typedef BaseIterator base_iterator; 00584 #ifdef BOOST_LOCALE_DOXYGEN 00585 00586 00587 00588 00589 00590 00591 00592 00593 00594 00595 00596 00597 00598 00599 typedef unspecified_iterator_type iterator; 00603 typedef unspecified_iterator_type const_iterator; 00604 #else 00605 typedef details::segment_index_iterator<base_iterator> iterator; 00606 typedef details::segment_index_iterator<base_iterator> const_iterator; 00607 #endif 00608 00609 00610 00611 00612 typedef segment<base_iterator> value_type; 00613 00623 segment_index() : mask_(0xFFFFFFFFu),full_select_(false) 00624 { 00625 } 00630 segment_index(boundary_type type, 00631 base_iterator begin, 00632 base_iterator end, 00633 rule_type mask, 00634 std::locale const &loc=std::locale()) 00635 : 00636 map_(type,begin,end,loc), 00637 mask_(mask), 00638 full_select_(false) 00639 { 00640 } 00645 segment_index(boundary_type type, 00646 base_iterator begin, 00647 base_iterator end, 00648 std::locale const &loc=std::locale()) 00649 : 00650 map_(type,begin,end,loc), 00651 mask_(0xFFFFFFFFu), 00652 full_select_(false) 00653 { 00654 } 00655 00666 segment_index(boundary_point_index<base_iterator> const &); 00677 segment_index const &operator = (boundary_point_index<base_iterator> const &); 00678 00679 00686 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale()) 00687 { 00688 map_ = mapping_type(type,begin,end,loc); 00689 } 00690 00700 iterator begin() const 00701 { 00702 return iterator(true,&map_,mask_,full_select_); 00703 } 00704 00712 iterator end() const 00713 { 00714 return iterator(false,&map_,mask_,full_select_); 00715 } 00716 00734 iterator find(base_iterator p) const 00735 { 00736 return iterator(p,&map_,mask_,full_select_); 00737 } 00738 00742 rule_type rule() const 00743 { 00744 return mask_; 00745 } 00749 void rule(rule_type v) 00750 { 00751 mask_ = v; 00752 } 00753 00766 00767 bool full_select() const 00768 { 00769 return full_select_; 00770 } 00771 00784 00785 void full_select(bool v) 00786 { 00787 full_select_ = v; 00788 } 00789 00790 private: 00791 friend class boundary_point_index<base_iterator>; 00792 typedef details::mapping<base_iterator> mapping_type; 00793 mapping_type map_; 00794 rule_type mask_; 00795 bool full_select_; 00796 }; 00797 00844 00845 00846 template<typename BaseIterator> 00847 class boundary_point_index { 00848 public: 00852 typedef BaseIterator base_iterator; 00853 #ifdef BOOST_LOCALE_DOXYGEN 00854 00855 00856 00857 00858 00859 00860 00861 00862 00863 00864 00865 00866 00867 00868 typedef unspecified_iterator_type iterator; 00872 typedef unspecified_iterator_type const_iterator; 00873 #else 00874 typedef details::boundary_point_index_iterator<base_iterator> iterator; 00875 typedef details::boundary_point_index_iterator<base_iterator> const_iterator; 00876 #endif 00877 00878 00879 00880 00881 typedef boundary_point<base_iterator> value_type; 00882 00892 boundary_point_index() : mask_(0xFFFFFFFFu) 00893 { 00894 } 00895 00900 boundary_point_index(boundary_type type, 00901 base_iterator begin, 00902 base_iterator end, 00903 rule_type mask, 00904 std::locale const &loc=std::locale()) 00905 : 00906 map_(type,begin,end,loc), 00907 mask_(mask) 00908 { 00909 } 00914 boundary_point_index(boundary_type type, 00915 base_iterator begin, 00916 base_iterator end, 00917 std::locale const &loc=std::locale()) 00918 : 00919 map_(type,begin,end,loc), 00920 mask_(0xFFFFFFFFu) 00921 { 00922 } 00923 00934 boundary_point_index(segment_index<base_iterator> const &other); 00945 boundary_point_index const &operator=(segment_index<base_iterator> const &other); 00946 00953 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale()) 00954 { 00955 map_ = mapping_type(type,begin,end,loc); 00956 } 00957 00967 iterator begin() const 00968 { 00969 return iterator(true,&map_,mask_); 00970 } 00971 00981 iterator end() const 00982 { 00983 return iterator(false,&map_,mask_); 00984 } 00985 00999 iterator find(base_iterator p) const 01000 { 01001 return iterator(p,&map_,mask_); 01002 } 01003 01007 rule_type rule() const 01008 { 01009 return mask_; 01010 } 01014 void rule(rule_type v) 01015 { 01016 mask_ = v; 01017 } 01018 01019 private: 01020 01021 friend class segment_index<base_iterator>; 01022 typedef details::mapping<base_iterator> mapping_type; 01023 mapping_type map_; 01024 rule_type mask_; 01025 }; 01026 01028 template<typename BaseIterator> 01029 segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) : 01030 map_(other.map_), 01031 mask_(0xFFFFFFFFu), 01032 full_select_(false) 01033 { 01034 } 01035 01036 template<typename BaseIterator> 01037 boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) : 01038 map_(other.map_), 01039 mask_(0xFFFFFFFFu) 01040 { 01041 } 01042 01043 template<typename BaseIterator> 01044 segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other) 01045 { 01046 map_ = other.map_; 01047 return *this; 01048 } 01049 01050 template<typename BaseIterator> 01051 boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other) 01052 { 01053 map_ = other.map_; 01054 return *this; 01055 } 01057 01058 typedef segment_index<std::string::const_iterator> ssegment_index; 01059 typedef segment_index<std::wstring::const_iterator> wssegment_index; 01060 #ifdef BOOST_HAS_CHAR16_T 01061 typedef segment_index<std::u16string::const_iterator> u16ssegment_index; 01062 #endif 01063 #ifdef BOOST_HAS_CHAR32_T 01064 typedef segment_index<std::u32string::const_iterator> u32ssegment_index; 01065 #endif 01066 01067 typedef segment_index<char const *> csegment_index; 01068 typedef segment_index<wchar_t const *> wcsegment_index; 01069 #ifdef BOOST_HAS_CHAR16_T 01070 typedef segment_index<char16_t const *> u16csegment_index; 01071 #endif 01072 #ifdef BOOST_HAS_CHAR32_T 01073 typedef segment_index<char32_t const *> u32csegment_index; 01074 #endif 01075 01076 typedef boundary_point_index<std::string::const_iterator> sboundary_point_index; 01077 typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index; 01078 #ifdef BOOST_HAS_CHAR16_T 01079 typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index; 01080 #endif 01081 #ifdef BOOST_HAS_CHAR32_T 01082 typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index; 01083 #endif 01084 01085 typedef boundary_point_index<char const *> cboundary_point_index; 01086 typedef boundary_point_index<wchar_t const *> wcboundary_point_index; 01087 #ifdef BOOST_HAS_CHAR16_T 01088 typedef boundary_point_index<char16_t const *> u16cboundary_point_index; 01089 #endif 01090 #ifdef BOOST_HAS_CHAR32_T 01091 typedef boundary_point_index<char32_t const *> u32cboundary_point_index; 01092 #endif 01093 01094 01095 01096 } // boundary 01097 01098 } // locale 01099 } // boost 01100 01107 01108 #ifdef BOOST_MSVC 01109 #pragma warning(pop) 01110 #endif 01111 01112 #endif 01113 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4