Boost C++ Libraries

...one of the most highly regarded and expertly designed C++ library projects in the world. Herb Sutter and Andrei Alexandrescu, C++ Coding Standards

PrevUpHomeNext

MultiIndex to Bimap Path - Hashed indices

This is example 8 of Boost.MultiIndex.

Hashed indices can be used as an alternative to ordered indices when fast look-up is needed and sorting information is of no interest. The example features a word counter where duplicate entries are checked by means of a hashed index.

Boost.MultiIndex

Go to source code

#include <iostream>
#include <iomanip>

#include <boost/tokenizer.hpp>

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/key_extractors.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/lambda/lambda.hpp>

using namespace boost::multi_index;
namespace bl = boost::lambda;

// word_counter keeps the occurrences of words inserted. A hashed
// index allows for fast checking of preexisting entries.

struct word_counter_entry
{
    std::string  word;
    unsigned int occurrences;

    word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {}
};

typedef multi_index_container
<
    word_counter_entry,
    indexed_by
    <
        ordered_non_unique
        <
            BOOST_MULTI_INDEX_MEMBER(
                word_counter_entry,unsigned int,occurrences),
            std::greater<unsigned int>
        >,
        hashed_unique
        <
            BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word)
        >
  >

> word_counter;

typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;

int main()
{
    std::string text=
        "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... "
        "...snip..."
        "...no se salga un punto de la verdad.";

    // feed the text into the container

    word_counter   wc;
    text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
    unsigned int   total_occurrences = 0;

    for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end();
         it != it_end ; ++it )
    {
        ++total_occurrences;
        word_counter::iterator wit = wc.insert(*it).first;
        wc.modify_key( wit, ++ bl::_1 );
    }

    // list words by frequency of appearance

    std::cout << std::fixed << std::setprecision(2);

    for( word_counter::iterator wit = wc.begin(), wit_end=wc.end();
         wit != wit_end; ++wit )
    {
        std::cout << std::setw(11) << wit->word << ": "
                  << std::setw(5)
                  << 100.0 * wit->occurrences / total_occurrences << "%"
                  << std::endl;
    }

    return 0;
}

Boost.Bimap

Go to source code

#include <iostream>
#include <iomanip>

#include <boost/tokenizer.hpp>

#include <boost/bimap/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/multiset_of.hpp>
#include <boost/bimap/support/lambda.hpp>

using namespace boost::bimaps;

struct word        {};
struct occurrences {};

typedef bimap
<

     multiset_of< tagged<unsigned int,occurrences>, std::greater<unsigned int> >,
unordered_set_of< tagged< std::string,       word>                             >

> word_counter;

typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;

int main()
{

    std::string text=
        "Relations between data in the STL are represented with maps."
        "A map is a directed relation, by using it you are representing "
        "a mapping. In this directed relation, the first type is related to "
        "the second type but it is not true that the inverse relationship "
        "holds. This is useful in a lot of situations, but there are some "
        "relationships that are bidirectional by nature.";

    // feed the text into the container

    word_counter   wc;
    text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
    unsigned int   total_occurrences = 0;

    for( text_tokenizer::const_iterator it = tok.begin(), it_end = tok.end();
         it != it_end ; ++it )
    {
        ++total_occurrences;

        word_counter::map_by<occurrences>::iterator wit =
            wc.by<occurrences>().insert(
                 word_counter::map_by<occurrences>::value_type(0,*it)
            ).first;

        wc.by<occurrences>().modify_key( wit, ++_key);
    }

    // list words by frequency of appearance

    std::cout << std::fixed << std::setprecision(2);

    for( word_counter::map_by<occurrences>::const_iterator
            wit     = wc.by<occurrences>().begin(),
            wit_end = wc.by<occurrences>().end();

         wit != wit_end; ++wit )
    {
        std::cout << std::setw(15) << wit->get<word>() << ": "
                  << std::setw(5)
                  << 100.0 * wit->get<occurrences>() / total_occurrences << "%"
                  << std::endl;
    }

    return 0;
}


PrevUpHomeNext