...one of the most highly
regarded and expertly designed C++ library projects in the
world.
— Herb Sutter and Andrei
Alexandrescu, C++
Coding Standards
This is example 8 of Boost.MultiIndex.
#include <iostream> #include <iomanip> #include <boost/tokenizer.hpp> #include <boost/multi_index_container.hpp> #include <boost/multi_index/key_extractors.hpp> #include <boost/multi_index/ordered_index.hpp> #include <boost/multi_index/hashed_index.hpp> #include <boost/lambda/lambda.hpp> using namespace boost::multi_index; namespace bl = boost::lambda; // word_counter keeps the ocurrences of words inserted. A hashed // index allows for fast checking of preexisting entries. struct word_counter_entry { std::string word; unsigned int occurrences; word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {} }; typedef multi_index_container < word_counter_entry, indexed_by < ordered_non_unique < BOOST_MULTI_INDEX_MEMBER( word_counter_entry,unsigned int,occurrences), std::greater<unsigned int> >, hashed_unique < BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word) > > > word_counter; typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer; int main() { std::string text= "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... " "...snip..." "...no se salga un punto de la verdad."; // feed the text into the container word_counter wc; text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-")); unsigned int total_occurrences = 0; for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end(); it != it_end ; ++it ) { ++total_occurrences; word_counter::iterator wit = wc.insert(*it).first; wc.modify_key( wit, ++ bl::_1 ); } // list words by frequency of appearance std::cout << std::fixed << std::setprecision(2); for( word_counter::iterator wit = wc.begin(), wit_end=wc.end(); wit != wit_end; ++wit ) { std::cout << std::setw(11) << wit->word << ": " << std::setw(5) << 100.0 * wit->occurrences / total_occurrences << "%" << std::endl; } return 0; }
#include <iostream> #include <iomanip> #include <boost/tokenizer.hpp> #include <boost/bimap/bimap.hpp> #include <boost/bimap/unordered_set_of.hpp> #include <boost/bimap/multiset_of.hpp> #include <boost/bimap/support/lambda.hpp> using namespace boost::bimaps; struct word {}; struct occurrences {}; typedef bimap < multiset_of< tagged<unsigned int,occurrences>, std::greater<unsigned int> >, unordered_set_of< tagged< std::string, word> > > word_counter; typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer; int main() { std::string text= "Relations between data in the STL are represented with maps." "A map is a directed relation, by using it you are representing " "a mapping. In this directed relation, the first type is related to " "the second type but it is not true that the inverse relationship " "holds. This is useful in a lot of situations, but there are some " "relationships that are bidirectional by nature."; // feed the text into the container word_counter wc; text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-")); unsigned int total_occurrences = 0; for( text_tokenizer::const_iterator it = tok.begin(), it_end = tok.end(); it != it_end ; ++it ) { ++total_occurrences; word_counter::map_by<occurrences>::iterator wit = wc.by<occurrences>().insert( word_counter::map_by<occurrences>::value_type(0,*it) ).first; wc.by<occurrences>().modify_key( wit, ++_key); } // list words by frequency of appearance std::cout << std::fixed << std::setprecision(2); for( word_counter::map_by<occurrences>::const_iterator wit = wc.by<occurrences>().begin(), wit_end = wc.by<occurrences>().end(); wit != wit_end; ++wit ) { std::cout << std::setw(15) << wit->get<word>() << ": " << std::setw(5) << 100.0 * wit->get<occurrences>() / total_occurrences << "%" << std::endl; } return 0; }