boost/algorithm/searching/boyer_moore_horspool.hpp
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
*/
#ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
#define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
#include <iterator> // for std::iterator_traits
#include <boost/assert.hpp>
#include <boost/static_assert.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/algorithm/searching/detail/bm_traits.hpp>
#include <boost/algorithm/searching/detail/debugging.hpp>
// #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
namespace boost { namespace algorithm {
/*
A templated version of the boyer-moore-horspool searching algorithm.
Requirements:
* Random access iterators
* The two iterator types (patIter and corpusIter) must
"point to" the same underlying type.
* Additional requirements may be imposed buy the skip table, such as:
** Numeric type (array-based skip table)
** Hashable type (map-based skip table)
http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
*/
template <typename patIter, typename traits = detail::BM_traits<patIter> >
class boyer_moore_horspool {
typedef typename std::iterator_traits<patIter>::difference_type difference_type;
public:
boyer_moore_horspool ( patIter first, patIter last )
: pat_first ( first ), pat_last ( last ),
k_pattern_length ( std::distance ( pat_first, pat_last )),
skip_ ( k_pattern_length, k_pattern_length ) {
// Build the skip table
std::size_t i = 0;
if ( first != last ) // empty pattern?
for ( patIter iter = first; iter != last-1; ++iter, ++i )
skip_.insert ( *iter, k_pattern_length - 1 - i );
#ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
skip_.PrintSkipTable ();
#endif
}
~boyer_moore_horspool () {}
/// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param p A predicate used for the search comparisons.
///
template <typename corpusIter>
corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
BOOST_STATIC_ASSERT (( boost::is_same<
typename std::iterator_traits<patIter>::value_type,
typename std::iterator_traits<corpusIter>::value_type>::value ));
if ( corpus_first == corpus_last ) return corpus_last; // if nothing to search, we didn't find it!
if ( pat_first == pat_last ) return corpus_first; // empty pattern matches at start
const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
// If the pattern is larger than the corpus, we can't find it!
if ( k_corpus_length < k_pattern_length )
return corpus_last;
// Do the search
return this->do_search ( corpus_first, corpus_last );
}
private:
/// \cond DOXYGEN_HIDE
patIter pat_first, pat_last;
const difference_type k_pattern_length;
typename traits::skip_table_t skip_;
/// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param k_corpus_length The length of the corpus to search
///
template <typename corpusIter>
corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
corpusIter curPos = corpus_first;
const corpusIter lastPos = corpus_last - k_pattern_length;
while ( curPos <= lastPos ) {
// Do we match right where we are?
std::size_t j = k_pattern_length - 1;
while ( pat_first [j] == curPos [j] ) {
// We matched - we're done!
if ( j == 0 )
return curPos;
j--;
}
curPos += skip_ [ curPos [ k_pattern_length - 1 ]];
}
return corpus_last;
}
// \endcond
};
/// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last,
/// patIter pat_first, patIter pat_last )
/// \brief Searches the corpus for the pattern.
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param pat_first The start of the pattern to search for (Random Access Iterator)
/// \param pat_last One past the end of the data to search for
///
template <typename patIter, typename corpusIter>
corpusIter boyer_moore_horspool_search (
corpusIter corpus_first, corpusIter corpus_last,
patIter pat_first, patIter pat_last ) {
boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
return bmh ( corpus_first, corpus_last );
}
}}
#endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP