libs/regex/test/regress/parse.cpp
/*
*
* Copyright (c) 1998-2002
* Dr John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
*
* FILE parse.cpp
* VERSION see <boost/version.hpp>
*
* Input parsing functions for regress.
*
*/
#include <boost/regex.hpp>
#include "regress.h"
#ifndef BOOST_REGEX_NO_TEST
#ifndef BOOST_RE_ALGO_INCLUDED
#include <algorithm>
#endif
using namespace boost;
#ifdef BOOST_REGEX_V3
#define regex_constants regbase
#endif
//
// start by defining all our flag types:
flag_info flag_data[] = {
{ BOOST_RE_STR("REG_BASIC"), 9, REG_BASIC, 0 },
{ BOOST_RE_STR("REG_EXTENDED"), 12, REG_EXTENDED, 0 },
{ BOOST_RE_STR("REG_ESCAPE_IN_LISTS"), 19, REG_ESCAPE_IN_LISTS, 0 },
{ BOOST_RE_STR("REG_ICASE"), 9, REG_ICASE, 0 },
{ BOOST_RE_STR("REG_NOSUB"), 9, REG_NOSUB, 0 },
{ BOOST_RE_STR("REG_NEWLINE"), 11, REG_NEWLINE, 0 },
{ BOOST_RE_STR("REG_NOCOLLATE"), 13, REG_NOCOLLATE, 0 },
{ BOOST_RE_STR("REG_NOSPEC"), 10, REG_NOSPEC, 0 },
{ BOOST_RE_STR("REG_NEWLINE_ALT"), 15, REG_NEWLINE_ALT , 0 },
{ BOOST_RE_STR("REG_PERL"), 8, REG_PERL, 0 },
{ BOOST_RE_STR("REG_AWK"), 7, REG_AWK, 0 },
{ BOOST_RE_STR("REG_EGREP"), 9, REG_EGREP, 0 },
{ BOOST_RE_STR("REG_NOTBOL"), 10, REG_NOTBOL, 1 },
{ BOOST_RE_STR("REG_NOTEOL"), 10, REG_NOTEOL, 1 },
{ BOOST_RE_STR("REG_STARTEND"), 12, REG_STARTEND, 1 },
{ BOOST_RE_STR("basic"), 5, regex_constants::basic, 2 },
{ BOOST_RE_STR("escape_in_lists"), 15, regex_constants::escape_in_lists, 2 },
{ BOOST_RE_STR("char_classes"), 12, regex_constants::char_classes, 2 },
{ BOOST_RE_STR("intervals"), 9, regex_constants::intervals, 2 },
{ BOOST_RE_STR("limited_ops"), 11, regex_constants::limited_ops, 2 },
{ BOOST_RE_STR("newline_alt"), 11, regex_constants::newline_alt, 2 },
{ BOOST_RE_STR("bk_plus_qm"), 10, regex_constants::bk_plus_qm, 2 },
{ BOOST_RE_STR("bk_braces"), 9, regex_constants::bk_braces, 2 },
{ BOOST_RE_STR("bk_parens"), 9, regex_constants::bk_parens, 2 },
{ BOOST_RE_STR("bk_refs"), 7, regex_constants::bk_refs, 2 },
{ BOOST_RE_STR("bk_vbar"), 7, regex_constants::bk_vbar, 2 },
{ BOOST_RE_STR("use_except"), 10, regex_constants::use_except, 2 },
{ BOOST_RE_STR("literal"), 7, regex_constants::literal, 2 },
#ifndef BOOST_REGEX_V3
{ BOOST_RE_STR("nosubs"), 6, regex_constants::nosubs, 2 },
{ BOOST_RE_STR("optimize"), 8, regex_constants::optimize, 2 },
{ BOOST_RE_STR("perlex"), 6, regex_constants::perlex, 2 },
#endif
{ BOOST_RE_STR("normal"), 6, regex_constants::normal, 2 },
{ BOOST_RE_STR("basic"), 5, regex_constants::basic, 2 },
{ BOOST_RE_STR("extended"), 8, regex_constants::extended, 2 },
{ BOOST_RE_STR("perl"), 6, regex_constants::perl, 2 },
{ BOOST_RE_STR("match_default"), 13, match_default, 3 },
{ BOOST_RE_STR("match_not_bol"), 13, match_not_bol, 3 },
{ BOOST_RE_STR("match_not_eol"), 13, match_not_eol, 3 },
{ BOOST_RE_STR("match_not_bob"), 13, match_not_bob, 3 },
{ BOOST_RE_STR("match_not_eob"), 13, match_not_eob, 3 },
{ BOOST_RE_STR("match_not_bow"), 13, match_not_bow, 3 },
{ BOOST_RE_STR("match_not_eow"), 13, match_not_eow, 3 },
{ BOOST_RE_STR("match_not_dot_newline"), 21, match_not_dot_newline, 3 },
{ BOOST_RE_STR("match_not_dot_null"), 18, match_not_dot_null, 3 },
{ BOOST_RE_STR("match_prev_avail"), 16, match_prev_avail, 3 },
{ BOOST_RE_STR("match_any"), 9, match_any, 3 },
{ BOOST_RE_STR("match_not_null"), 14, match_not_null, 3 },
{ BOOST_RE_STR("match_continuous"), 16, match_continuous, 3 },
{ BOOST_RE_STR("match_partial"), 13, match_partial, 3 },
#ifndef BOOST_REGEX_V3
{ BOOST_RE_STR("match_nosubs"), 12, match_nosubs, 3 },
{ BOOST_RE_STR("match_single_line"), 17, match_single_line, 3 },
#endif
{ BOOST_RE_STR("format_all"), 10, format_all, 3 },
{ BOOST_RE_STR("format_sed"), 10, format_sed, 3 },
{ BOOST_RE_STR("format_perl"), 11, format_perl, 3 },
{ BOOST_RE_STR("format_no_copy"), 14, format_no_copy, 3 },
{ BOOST_RE_STR("format_first_only"), 17, format_first_only, 3 },
{ BOOST_RE_STR("REG_NO_POSIX_TEST"), 17, REG_NO_POSIX_TEST, 4 },
{ BOOST_RE_STR("REG_UNICODE_ONLY"), 16, REG_UNICODE_ONLY, 4 },
{ BOOST_RE_STR("REG_GREP"), 8, REG_GREP, 4 },
{ BOOST_RE_STR("REG_MERGE"), 9, REG_MERGE, 4 },
{ BOOST_RE_STR("REG_MERGE_COPY"), 14, REG_MERGE_COPY, 4 },
{ BOOST_RE_STR("REG_PARTIAL_MATCH"), 17, REG_PARTIAL_MATCH, 4 },
{ BOOST_RE_STR(""), 0, 0, 0 },
};
// basically we create a simple token parser
// using regular expressions
const char_t* expression_text = BOOST_RE_STR("(;.*)|") // comment
BOOST_RE_STR("(^[[:blank:]]*-)|") // -
BOOST_RE_STR("([^\"[:space:]][^[:space:]]*)|") // token
BOOST_RE_STR("(\"((\\\\\"|[^\"])*)\")") // "token"
;
typedef basic_regex<char_t> re_parse_t;
typedef match_results<string_type::const_iterator> parse_grep;
typedef string_type::const_iterator parse_iterator;
re_parse_t parse_expression(expression_text, regex_constants::normal);
//
// now define our grep predicate function object:
class parse_function
{
int mode;
public:
parse_function() : mode(0) {}
parse_function(const parse_function& o) : mode(o.mode) {}
bool operator()(const parse_grep& i);
};
bool parse_function::operator()(const parse_grep& g)
{
parse_iterator i, j;
// determine what caused the match:
if(g[1].matched)
{
// we have a comment:
return true;
}
else if(g[2].matched)
{
// we have the start of a line of flags
mode = -1;
for(int i = 0; i < 5; ++i)
flags[i] = 0;
return true;
}
else if(g[3].matched)
{
// token:
i = g[3].first;
j = g[3].second;
}
else
{
// token delimited by ""
i = g[5].first;
j = g[5].second;
}
// now we need to switch depending upon what mode we are in:
switch(mode)
{
case -1:
{
// parse the flag:
unsigned int id = 0;
while(flag_data[id].len != 0)
{
if(static_cast<unsigned int>(j - i) != flag_data[id].len)
{
++id;
continue;
}
if(std::equal(i, j, flag_data[id].name) == true)
{
flags[flag_data[id].id] |= flag_data[id].value;
return true;
}
++id;
}
cout << "Warning: Unknown flag: ";
string_type t(i, j);
cout << make_narrow(t).c_str();
cout << endl;
return true;
}
case 0:
// set the expression text:
expression = string_type(i, j);
do_test = true;
break;
case 1:
// set the text to match:
search_text = string_type(i, j);
jm_trace("Initial search text: " << make_narrow(search_text).c_str());
expand_escapes(search_text);
jm_trace("Search text after escapes expanded: " << make_narrow(search_text).c_str());
break;
case 2:
// maybe set format string:
if(flags[4] & REG_MERGE)
{
format_string = string_type(i, j);
break;
}
else
{
matches[mode - 2] = to_int(i, j);
break;
}
case 3:
// maybe set format result:
if(flags[4] & REG_MERGE)
{
merge_string = string_type(i, j);
expand_escapes(merge_string);
break;
}
else
{
matches[mode - 2] = to_int(i, j);
break;
}
default:
jm_assert(mode >= 2);
// set the relevent int value:
matches[mode - 2] = to_int(i, j);
}
++mode;
return true;
}
void parse_input_line(const string_type& s)
{
// set matches back to starting values:
for(int i = 0; i < MAX_MATCHES; ++i)
{
matches[i] = -2;
}
parse_function op;
do_test = false;
regex_grep(op, s.begin(), s.end(), parse_expression);
jm_trace("expression: " << make_narrow(expression).c_str());
jm_trace("search string: " << make_narrow(search_text).c_str());
}
int to_int(string_type::const_iterator i, string_type::const_iterator j)
{
int val = 0;
bool neg = false;
if((i != j) && (*i == BOOST_RE_STR('-')))
{
neg = true;
++i;
}
while (i != j)
{
val *= 10;
val += *i - BOOST_RE_STR('0');
++i;
}
if(neg)
val *= -1;
return val;
}
void expand_escapes(string_type& s)
{
for(unsigned int i = 0; i < s.size(); ++i)
{
if(s[i] == BOOST_RE_STR('\\'))
{
switch(s[i+1])
{
case BOOST_RE_STR('a'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\a');
break;
case BOOST_RE_STR('b'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\b');
break;
case BOOST_RE_STR('f'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\f');
break;
case BOOST_RE_STR('n'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\n');
break;
case BOOST_RE_STR('r'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\r');
break;
case BOOST_RE_STR('t'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\t');
break;
case BOOST_RE_STR('v'):
s.erase(s.begin() + i);
s[i] = BOOST_RE_STR('\v');
break;
default:
if( (s[i + 1] >= BOOST_RE_STR('0')) && (s[i + 1] <= BOOST_RE_STR('9')) )
{
int val = 0;
unsigned int pos = i;
++i;
while((i < s.size()) && (s[i] >= BOOST_RE_STR('0')) && (s[i] <= BOOST_RE_STR('9')))
{
val *= 10;
val += s[i] - BOOST_RE_STR('0');
++i;
}
s.erase(s.begin() + pos, s.begin() + i);
if(0 == val)
{
s.insert(s.begin()+pos, ' ');
s[pos] = 0;
}
else
s.insert(s.begin() + pos, (string_type::value_type)val);
i = pos;
}
else
{
s.erase(s.begin() + i);
}
}
}
}
}
#endif