Boost.Locale
boost/locale/boundary/index.hpp
00001 //
00002 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
00003 //
00004 //  Distributed under the Boost Software License, Version 1.0. (See
00005 //  accompanying file LICENSE_1_0.txt or copy at
00006 //  http://www.boost.org/LICENSE_1_0.txt)
00007 //
00008 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
00009 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
00010 
00011 #include <boost/locale/config.hpp>
00012 #include <boost/locale/boundary/types.hpp>
00013 #include <boost/locale/boundary/facets.hpp>
00014 #include <boost/locale/boundary/segment.hpp>
00015 #include <boost/locale/boundary/boundary_point.hpp>
00016 #include <boost/iterator/iterator_facade.hpp>
00017 #include <boost/type_traits/is_same.hpp>
00018 #include <boost/shared_ptr.hpp>
00019 #include <boost/cstdint.hpp>
00020 #include <boost/assert.hpp>
00021 #ifdef BOOST_MSVC
00022 #  pragma warning(push)
00023 #  pragma warning(disable : 4275 4251 4231 4660)
00024 #endif
00025 #include <string>
00026 #include <locale>
00027 #include <vector>
00028 #include <iterator>
00029 #include <algorithm>
00030 #include <stdexcept>
00031 
00032 #include <iostream>
00033 
00034 namespace boost {
00035 
00036     namespace locale {
00037         
00038         namespace boundary {
00046 
00048 
00049             namespace details {
00050 
00051                 template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
00052                 struct mapping_traits {
00053                     typedef typename std::iterator_traits<IteratorType>::value_type char_type;
00054                     static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
00055                     {
00056                         std::basic_string<char_type> str(b,e);
00057                         return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
00058                     }
00059                 };
00060 
00061                 template<typename CharType,typename SomeIteratorType>
00062                 struct linear_iterator_traits {
00063                     static const bool is_linear =
00064                         is_same<SomeIteratorType,CharType*>::value
00065                         || is_same<SomeIteratorType,CharType const*>::value
00066                         || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value
00067                         || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value
00068                         || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value
00069                         || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value
00070                         ;
00071                 };
00072 
00073 
00074 
00075                 template<typename IteratorType>
00076                 struct mapping_traits<IteratorType,std::random_access_iterator_tag> {
00077 
00078                     typedef typename std::iterator_traits<IteratorType>::value_type char_type;
00079 
00080 
00081 
00082                     static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
00083                     {
00084                         index_type result;
00085 
00086                         //
00087                         // Optimize for most common cases
00088                         //
00089                         // C++0x requires that string is continious in memory and all known
00090                         // string implementations
00091                         // do this because of c_str() support. 
00092                         //
00093 
00094                         if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e)
00095                         {
00096                             char_type const *begin = &*b;
00097                             char_type const *end = begin + (e-b);
00098                             index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end);
00099                             result.swap(tmp);
00100                         }
00101                         else {
00102                             std::basic_string<char_type> str(b,e);
00103                             index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
00104                             result.swap(tmp);
00105                         }
00106                         return result;
00107                     }
00108                 };
00109 
00110                 template<typename BaseIterator>
00111                 class mapping {
00112                 public:
00113                     typedef BaseIterator base_iterator;
00114                     typedef typename std::iterator_traits<base_iterator>::value_type char_type;
00115 
00116 
00117                     mapping(boundary_type type,
00118                             base_iterator begin,
00119                             base_iterator end,
00120                             std::locale const &loc) 
00121                         :   
00122                             index_(new index_type()),
00123                             begin_(begin),
00124                             end_(end)
00125                     {
00126                         index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc);
00127                         index_->swap(idx);
00128                     }
00129 
00130                     mapping()
00131                     {
00132                     }
00133 
00134                     index_type const &index() const
00135                     {
00136                         return *index_;
00137                     }
00138 
00139                     base_iterator begin() const
00140                     {
00141                         return begin_;
00142                     }
00143 
00144                     base_iterator end() const
00145                     {
00146                         return end_;
00147                     }
00148 
00149                 private:
00150                     boost::shared_ptr<index_type> index_;
00151                     base_iterator begin_,end_;
00152                 };
00153 
00154                 template<typename BaseIterator>
00155                 class segment_index_iterator : 
00156                     public boost::iterator_facade<
00157                         segment_index_iterator<BaseIterator>,
00158                         segment<BaseIterator>,
00159                         boost::bidirectional_traversal_tag,
00160                         segment<BaseIterator> const &
00161                     >
00162                 {
00163                 public:
00164                     typedef BaseIterator base_iterator;
00165                     typedef mapping<base_iterator> mapping_type;
00166                     typedef segment<base_iterator> segment_type;
00167                     
00168                     segment_index_iterator() : current_(0,0),map_(0)
00169                     {
00170                     }
00171 
00172                     segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) :
00173                         map_(map),
00174                         mask_(mask),
00175                         full_select_(full_select)
00176                     {
00177                         set(p);
00178                     }
00179                     segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) :
00180                         map_(map),
00181                         mask_(mask),
00182                         full_select_(full_select)
00183                     {
00184                         if(is_begin)
00185                             set_begin();
00186                         else
00187                             set_end();
00188                     }
00189 
00190                     segment_type const &dereference() const
00191                     {
00192                         return value_;
00193                     }
00194 
00195                     bool equal(segment_index_iterator const &other) const
00196                     {
00197                         return map_ == other.map_ && current_.second == other.current_.second;
00198                     }
00199 
00200                     void increment()
00201                     {
00202                         std::pair<size_t,size_t> next = current_;
00203                         if(full_select_) {
00204                             next.first = next.second;
00205                             while(next.second < size()) {
00206                                 next.second++;
00207                                 if(valid_offset(next.second))
00208                                     break;
00209                             }
00210                             if(next.second == size())
00211                                 next.first = next.second - 1;
00212                         }
00213                         else {
00214                             while(next.second < size()) {
00215                                 next.first = next.second;
00216                                 next.second++;
00217                                 if(valid_offset(next.second))
00218                                     break;
00219                             }
00220                         }
00221                         update_current(next);
00222                     }
00223 
00224                     void decrement()
00225                     {
00226                         std::pair<size_t,size_t> next = current_;
00227                         if(full_select_) {
00228                             while(next.second >1) {
00229                                 next.second--;
00230                                 if(valid_offset(next.second))
00231                                     break;
00232                             }
00233                             next.first = next.second;
00234                             while(next.first >0) {
00235                                 next.first--;
00236                                 if(valid_offset(next.first))
00237                                     break;
00238                             }
00239                         }
00240                         else {
00241                             while(next.second >1) {
00242                                 next.second--;
00243                                 if(valid_offset(next.second))
00244                                     break;
00245                             }
00246                             next.first = next.second - 1;
00247                         }
00248                         update_current(next);
00249                     }
00250 
00251                 private:
00252 
00253                     void set_end()
00254                     {
00255                         current_.first  = size() - 1;
00256                         current_.second = size();
00257                         value_ = segment_type(map_->end(),map_->end(),0);
00258                     }
00259                     void set_begin()
00260                     {
00261                         current_.first = current_.second = 0;
00262                         value_ = segment_type(map_->begin(),map_->begin(),0);
00263                         increment();
00264                     }
00265 
00266                     void set(base_iterator p)
00267                     {
00268                         size_t dist=std::distance(map_->begin(),p);
00269                         index_type::const_iterator b=map_->index().begin(),e=map_->index().end();
00270                         index_type::const_iterator 
00271                             boundary_point=std::upper_bound(b,e,break_info(dist));
00272                         while(boundary_point != e && (boundary_point->rule & mask_)==0)
00273                             boundary_point++;
00274 
00275                         current_.first = current_.second = boundary_point - b;
00276                         
00277                         if(full_select_) {
00278                             while(current_.first > 0) {
00279                                 current_.first --;
00280                                 if(valid_offset(current_.first))
00281                                     break;
00282                             }
00283                         }
00284                         else {
00285                             if(current_.first > 0)
00286                                 current_.first --;
00287                         }
00288                         value_.first = map_->begin();
00289                         std::advance(value_.first,get_offset(current_.first));
00290                         value_.second = value_.first;
00291                         std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first));
00292 
00293                         update_rule();
00294                     }
00295 
00296                     void update_current(std::pair<size_t,size_t> pos)
00297                     {
00298                         std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
00299                         std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
00300                         std::advance(value_.first,first_diff);
00301                         std::advance(value_.second,second_diff);
00302                         current_ = pos;
00303                         update_rule();
00304                     }
00305 
00306                     void update_rule()
00307                     {
00308                         if(current_.second != size()) {
00309                             value_.rule(index()[current_.second].rule);
00310                         }
00311                     }
00312                     size_t get_offset(size_t ind) const
00313                     {
00314                         if(ind == size())
00315                             return index().back().offset;
00316                         return index()[ind].offset;
00317                     }
00318 
00319                     bool valid_offset(size_t offset) const
00320                     {
00321                         return  offset == 0 
00322                                 || offset == size() // make sure we not acess index[size]
00323                                 || (index()[offset].rule & mask_)!=0;
00324                     }
00325                     
00326                     size_t size() const
00327                     {
00328                         return index().size();
00329                     }
00330                     
00331                     index_type const &index() const
00332                     {
00333                         return map_->index();
00334                     }
00335                 
00336                     
00337                     segment_type value_;
00338                     std::pair<size_t,size_t> current_;
00339                     mapping_type const *map_;
00340                     rule_type mask_;
00341                     bool full_select_;
00342                 };
00343                             
00344                 template<typename BaseIterator>
00345                 class boundary_point_index_iterator : 
00346                     public boost::iterator_facade<
00347                         boundary_point_index_iterator<BaseIterator>,
00348                         boundary_point<BaseIterator>,
00349                         boost::bidirectional_traversal_tag,
00350                         boundary_point<BaseIterator> const &
00351                     >
00352                 {
00353                 public:
00354                     typedef BaseIterator base_iterator;
00355                     typedef mapping<base_iterator> mapping_type;
00356                     typedef boundary_point<base_iterator> boundary_point_type;
00357                     
00358                     boundary_point_index_iterator() : current_(0),map_(0)
00359                     {
00360                     }
00361 
00362                     boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) :
00363                         map_(map),
00364                         mask_(mask)
00365                     {
00366                         if(is_begin)
00367                             set_begin();
00368                         else
00369                             set_end();
00370                     }
00371                     boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) :
00372                         map_(map),
00373                         mask_(mask)
00374                     {
00375                         set(p);
00376                     }
00377 
00378                     boundary_point_type const &dereference() const
00379                     {
00380                         return value_;
00381                     }
00382 
00383                     bool equal(boundary_point_index_iterator const &other) const
00384                     {
00385                         return map_ == other.map_ && current_ == other.current_;
00386                     }
00387 
00388                     void increment()
00389                     {
00390                         size_t next = current_;
00391                         while(next < size()) {
00392                             next++;
00393                             if(valid_offset(next))
00394                                 break;
00395                         }
00396                         update_current(next);
00397                     }
00398 
00399                     void decrement()
00400                     {
00401                         size_t next = current_;
00402                         while(next>0) {
00403                             next--;
00404                             if(valid_offset(next))
00405                                 break;
00406                         }
00407                         update_current(next);
00408                     }
00409 
00410                 private:
00411                     void set_end()
00412                     {
00413                         current_ = size();
00414                         value_ = boundary_point_type(map_->end(),0);
00415                     }
00416                     void set_begin()
00417                     {
00418                         current_ = 0;
00419                         value_ = boundary_point_type(map_->begin(),0);
00420                     }
00421 
00422                     void set(base_iterator p)
00423                     {
00424                         size_t dist =  std::distance(map_->begin(),p);
00425 
00426                         index_type::const_iterator b=index().begin();
00427                         index_type::const_iterator e=index().end();
00428                         index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist));
00429 
00430                         if(ptr==index().end())
00431                             current_=size()-1;
00432                         else
00433                             current_=ptr - index().begin();
00434 
00435                         while(!valid_offset(current_))
00436                             current_ ++;
00437 
00438                         std::ptrdiff_t diff = get_offset(current_) - dist;
00439                         std::advance(p,diff);
00440                         value_.iterator(p);
00441                         update_rule();
00442                     }
00443 
00444                     void update_current(size_t pos)
00445                     {
00446                         std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
00447                         base_iterator i=value_.iterator();
00448                         std::advance(i,diff);
00449                         current_ = pos;
00450                         value_.iterator(i);
00451                         update_rule();
00452                     }
00453 
00454                     void update_rule()
00455                     {
00456                         if(current_ != size()) {
00457                             value_.rule(index()[current_].rule);
00458                         }
00459                     }
00460                     size_t get_offset(size_t ind) const
00461                     {
00462                         if(ind == size())
00463                             return index().back().offset;
00464                         return index()[ind].offset;
00465                     }
00466 
00467                     bool valid_offset(size_t offset) const
00468                     {
00469                         return  offset == 0 
00470                                 || offset + 1 >= size() // last and first are always valid regardless of mark
00471                                 || (index()[offset].rule & mask_)!=0;
00472                     }
00473                     
00474                     size_t size() const
00475                     {
00476                         return index().size();
00477                     }
00478                     
00479                     index_type const &index() const
00480                     {
00481                         return map_->index();
00482                     }
00483                 
00484                     
00485                     boundary_point_type value_;
00486                     size_t current_;
00487                     mapping_type const *map_;
00488                     rule_type mask_;
00489                 };
00490 
00491 
00492             } // details
00493 
00495 
00496             template<typename BaseIterator>
00497             class segment_index;
00498 
00499             template<typename BaseIterator>
00500             class boundary_point_index;
00501             
00502 
00554 
00555             template<typename BaseIterator>
00556             class segment_index {
00557             public:
00558                 
00562                 typedef BaseIterator base_iterator;
00563                 #ifdef BOOST_LOCALE_DOXYGEN
00564 
00565 
00566 
00567 
00568 
00569 
00570 
00571 
00572 
00573 
00574 
00575 
00576 
00577 
00578                 typedef unspecified_iterator_type iterator;
00582                 typedef unspecified_iterator_type const_iterator;
00583                 #else
00584                 typedef details::segment_index_iterator<base_iterator> iterator;
00585                 typedef details::segment_index_iterator<base_iterator> const_iterator;
00586                 #endif
00587 
00588 
00589 
00590 
00591                 typedef segment<base_iterator> value_type;
00592 
00602                 segment_index() : mask_(0xFFFFFFFFu),full_select_(false)
00603                 {
00604                 }
00609                 segment_index(boundary_type type,
00610                             base_iterator begin,
00611                             base_iterator end,
00612                             rule_type mask,
00613                             std::locale const &loc=std::locale()) 
00614                     :
00615                         map_(type,begin,end,loc),
00616                         mask_(mask),
00617                         full_select_(false)
00618                 {
00619                 }
00624                 segment_index(boundary_type type,
00625                             base_iterator begin,
00626                             base_iterator end,
00627                             std::locale const &loc=std::locale()) 
00628                     :
00629                         map_(type,begin,end,loc),
00630                         mask_(0xFFFFFFFFu),
00631                         full_select_(false)
00632                 {
00633                 }
00634 
00645                 segment_index(boundary_point_index<base_iterator> const &);
00656                 segment_index const &operator = (boundary_point_index<base_iterator> const &);
00657 
00658                 
00665                 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
00666                 {
00667                     map_ = mapping_type(type,begin,end,loc);
00668                 }
00669 
00679                 iterator begin() const
00680                 {
00681                     return iterator(true,&map_,mask_,full_select_);
00682                 }
00683 
00691                 iterator end() const
00692                 {
00693                     return iterator(false,&map_,mask_,full_select_);
00694                 }
00695 
00713                 iterator find(base_iterator p) const
00714                 {
00715                     return iterator(p,&map_,mask_,full_select_);
00716                 }
00717                
00721                 rule_type rule() const
00722                 {
00723                     return mask_;
00724                 }
00728                 void rule(rule_type v)
00729                 {
00730                     mask_ = v;
00731                 }
00732 
00745 
00746                 bool full_select()  const 
00747                 {
00748                     return full_select_;
00749                 }
00750 
00763 
00764                 void full_select(bool v) 
00765                 {
00766                     full_select_ = v;
00767                 }
00768                 
00769             private:
00770                 friend class boundary_point_index<base_iterator>;
00771                 typedef details::mapping<base_iterator> mapping_type;
00772                 mapping_type  map_;
00773                 rule_type mask_;
00774                 bool full_select_;
00775             };
00776 
00823 
00824 
00825             template<typename BaseIterator>
00826             class boundary_point_index {
00827             public:
00831                 typedef BaseIterator base_iterator;
00832                 #ifdef BOOST_LOCALE_DOXYGEN
00833 
00834 
00835 
00836 
00837 
00838 
00839 
00840 
00841 
00842 
00843 
00844 
00845 
00846 
00847                 typedef unspecified_iterator_type iterator;
00851                 typedef unspecified_iterator_type const_iterator;
00852                 #else
00853                 typedef details::boundary_point_index_iterator<base_iterator> iterator;
00854                 typedef details::boundary_point_index_iterator<base_iterator> const_iterator;
00855                 #endif
00856 
00857 
00858 
00859 
00860                 typedef boundary_point<base_iterator> value_type;
00861                 
00871                 boundary_point_index() : mask_(0xFFFFFFFFu)
00872                 {
00873                 }
00874                 
00879                 boundary_point_index(boundary_type type,
00880                             base_iterator begin,
00881                             base_iterator end,
00882                             rule_type mask,
00883                             std::locale const &loc=std::locale()) 
00884                     :
00885                         map_(type,begin,end,loc),
00886                         mask_(mask)
00887                 {
00888                 }
00893                 boundary_point_index(boundary_type type,
00894                             base_iterator begin,
00895                             base_iterator end,
00896                             std::locale const &loc=std::locale()) 
00897                     :
00898                         map_(type,begin,end,loc),
00899                         mask_(0xFFFFFFFFu)
00900                 {
00901                 }
00902 
00913                 boundary_point_index(segment_index<base_iterator> const &other);
00924                 boundary_point_index const &operator=(segment_index<base_iterator> const &other);
00925 
00932                 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
00933                 {
00934                     map_ = mapping_type(type,begin,end,loc);
00935                 }
00936 
00946                 iterator begin() const
00947                 {
00948                     return iterator(true,&map_,mask_);
00949                 }
00950 
00960                 iterator end() const
00961                 {
00962                     return iterator(false,&map_,mask_);
00963                 }
00964 
00978                 iterator find(base_iterator p) const
00979                 {
00980                     return iterator(p,&map_,mask_);
00981                 }
00982                 
00986                 rule_type rule() const
00987                 {
00988                     return mask_;
00989                 }
00993                 void rule(rule_type v)
00994                 {
00995                     mask_ = v;
00996                 }
00997 
00998             private:
00999 
01000                 friend class segment_index<base_iterator>;
01001                 typedef details::mapping<base_iterator> mapping_type;
01002                 mapping_type  map_;
01003                 rule_type mask_;
01004             };
01005            
01007             template<typename BaseIterator>
01008             segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) :
01009                 map_(other.map_),
01010                 mask_(0xFFFFFFFFu),
01011                 full_select_(false)
01012             {
01013             }
01014             
01015             template<typename BaseIterator>
01016             boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) :
01017                 map_(other.map_),
01018                 mask_(0xFFFFFFFFu)
01019             {
01020             }
01021 
01022             template<typename BaseIterator>
01023             segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other)
01024             {
01025                 map_ = other.map_;
01026                 return *this;
01027             }
01028             
01029             template<typename BaseIterator>
01030             boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other)
01031             {
01032                 map_ = other.map_;
01033                 return *this;
01034             }
01036           
01037             typedef segment_index<std::string::const_iterator> ssegment_index;      
01038             typedef segment_index<std::wstring::const_iterator> wssegment_index;    
01039             #ifdef BOOST_HAS_CHAR16_T
01040             typedef segment_index<std::u16string::const_iterator> u16ssegment_index;
01041             #endif
01042             #ifdef BOOST_HAS_CHAR32_T
01043             typedef segment_index<std::u32string::const_iterator> u32ssegment_index;
01044             #endif
01045            
01046             typedef segment_index<char const *> csegment_index;                     
01047             typedef segment_index<wchar_t const *> wcsegment_index;                 
01048             #ifdef BOOST_HAS_CHAR16_T
01049             typedef segment_index<char16_t const *> u16csegment_index;              
01050             #endif
01051             #ifdef BOOST_HAS_CHAR32_T
01052             typedef segment_index<char32_t const *> u32csegment_index;              
01053             #endif
01054 
01055             typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;
01056             typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index;
01057             #ifdef BOOST_HAS_CHAR16_T
01058             typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index;
01059             #endif
01060             #ifdef BOOST_HAS_CHAR32_T
01061             typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index;
01062             #endif
01063            
01064             typedef boundary_point_index<char const *> cboundary_point_index;       
01065             typedef boundary_point_index<wchar_t const *> wcboundary_point_index;   
01066             #ifdef BOOST_HAS_CHAR16_T
01067             typedef boundary_point_index<char16_t const *> u16cboundary_point_index;
01068             #endif
01069             #ifdef BOOST_HAS_CHAR32_T
01070             typedef boundary_point_index<char32_t const *> u32cboundary_point_index;
01071             #endif
01072 
01073 
01074 
01075         } // boundary
01076 
01077     } // locale
01078 } // boost
01079 
01086 
01087 #ifdef BOOST_MSVC
01088 #pragma warning(pop)
01089 #endif
01090 
01091 #endif
01092 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4