Boost C++ Libraries

...one of the most highly regarded and expertly designed C++ library projects in the world. Herb Sutter and Andrei Alexandrescu, C++ Coding Standards

This is the documentation for an old version of boost. Click here for the latest Boost documentation.

libs/regex/example/iso8859_1_regex_traits/iso8859_1_regex_traits.cpp

/*
 * (C) Copyright Christain Engstrom 2001.
 * Permission to copy, use, modify, sell and distribute this software
 * is granted provided this copyright notice appears in all copies.
 * This software is provided "as is" without express or implied
 * warranty, and with no claim as to its suitability for any purpose.
 */
 
#include "iso8859_1_regex_traits.hpp"

bool iso8859_1_regex_traits::is_class(char c, boost::uint_fast32_t f) const
{
  static const boost::uint_fast32_t cntrl =      base::char_class_cntrl;
  static const boost::uint_fast32_t space =      base::char_class_space;
  static const boost::uint_fast32_t blank =      base::char_class_blank;
  static const boost::uint_fast32_t digit =      base::char_class_digit;
  static const boost::uint_fast32_t xdigit =     base::char_class_xdigit;
  static const boost::uint_fast32_t underscore = base::char_class_underscore;
  static const boost::uint_fast32_t punct =      base::char_class_punct;
  static const boost::uint_fast32_t upper =      base::char_class_upper |
                                                 base::char_class_alpha;
  static const boost::uint_fast32_t lower =      base::char_class_lower |
                                                 base::char_class_alpha;


  static const boost::uint_fast32_t flag_table[UCHAR_MAX + 1] = {
//                            Hex  Oct  Dec  Char
  cntrl                  , //   0    0   0
  cntrl                  , //   1    1   1
  cntrl                  , //   2    2   2
  cntrl                  , //   3    3   3
  cntrl                  , //   4    4   4
  cntrl                  , //   5    5   5
  cntrl                  , //   6    6   6
  cntrl                  , //   7    7   7
  cntrl                  , //   8   10   8
  cntrl | space | blank  , //   9   11   9  <HT>
  cntrl | space          , //   a   12  10  <LF>
  cntrl | space          , //   b   13  11  <VT>
  cntrl | space          , //   c   14  12  <FF>
  cntrl | space          , //   d   15  13  <CR>
  cntrl                  , //   e   16  14
  cntrl                  , //   f   17  15
  cntrl                  , //  10   20  16
  cntrl                  , //  11   21  17
  cntrl                  , //  12   22  18
  cntrl                  , //  13   23  19
  cntrl                  , //  14   24  20
  cntrl                  , //  15   25  21
  cntrl                  , //  16   26  22
  cntrl                  , //  17   27  23
  cntrl                  , //  18   30  24
  cntrl                  , //  19   31  25
  cntrl                  , //  1a   32  26
  cntrl                  , //  1b   33  27
  cntrl                  , //  1c   34  28
  cntrl                  , //  1d   35  29
  cntrl                  , //  1e   36  30
  cntrl                  , //  1f   37  31
  space | blank          , //  20   40  32       |  Space
  punct                  , //  21   41  33  !
  punct                  , //  22   42  34  "
  punct                  , //  23   43  35  #
  punct                  , //  24   44  36  $
  punct                  , //  25   45  37  %
  punct                  , //  26   46  38  &
  punct                  , //  27   47  39  '
  punct                  , //  28   50  40  (
  punct                  , //  29   51  41  )
  punct                  , //  2a   52  42  *
  punct                  , //  2b   53  43  +
  punct                  , //  2c   54  44
  punct                  , //  2d   55  45  -
  punct                  , //  2e   56  46  .
  punct                  , //  2f   57  47  /
  digit | xdigit         , //  30   60  48  0
  digit | xdigit         , //  31   61  49  1
  digit | xdigit         , //  32   62  50  2
  digit | xdigit         , //  33   63  51  3
  digit | xdigit         , //  34   64  52  4
  digit | xdigit         , //  35   65  53  5
  digit | xdigit         , //  36   66  54  6
  digit | xdigit         , //  37   67  55  7
  digit | xdigit         , //  38   70  56  8
  digit | xdigit         , //  39   71  57  9
  punct                  , //  3a   72  58  :
  punct                  , //  3b   73  59  ;
  punct                  , //  3c   74  60  <
  punct                  , //  3d   75  61  =
  punct                  , //  3e   76  62  >
  punct                  , //  3f   77  63  ?
  punct                  , //  40  100  64  @
  upper | xdigit         , //  41  101  65  A
  upper | xdigit         , //  42  102  66  B
  upper | xdigit         , //  43  103  67  C
  upper | xdigit         , //  44  104  68  D
  upper | xdigit         , //  45  105  69  E
  upper | xdigit         , //  46  106  70  F
  upper                  , //  47  107  71  G
  upper                  , //  48  110  72  H
  upper                  , //  49  111  73  I
  upper                  , //  4a  112  74  J
  upper                  , //  4b  113  75  K
  upper                  , //  4c  114  76  L
  upper                  , //  4d  115  77  M
  upper                  , //  4e  116  78  N
  upper                  , //  4f  117  79  O
  upper                  , //  50  120  80  P
  upper                  , //  51  121  81  Q
  upper                  , //  52  122  82  R
  upper                  , //  53  123  83  S
  upper                  , //  54  124  84  T
  upper                  , //  55  125  85  U
  upper                  , //  56  126  86  V
  upper                  , //  57  127  87  W
  upper                  , //  58  130  88  X
  upper                  , //  59  131  89  Y
  upper                  , //  5a  132  90  Z
  punct                  , //  5b  133  91  [    |  Left square bracket
  punct                  , //  5c  134  92  \    |  Backslash
  punct                  , //  5d  135  93  ]    |  Right square bracket
  punct                  , //  5e  136  94  ^    |  Circumflex
  punct | underscore     , //  5f  137  95  _    |  Underscore
  punct                  , //  60  140  96  `
  lower | xdigit         , //  61  141  97  a
  lower | xdigit         , //  62  142  98  b
  lower | xdigit         , //  63  143  99  c
  lower | xdigit         , //  64  144 100  d
  lower | xdigit         , //  65  145 101  e
  lower | xdigit         , //  66  146 102  f
  lower                  , //  67  147 103  g
  lower                  , //  68  150 104  h
  lower                  , //  69  151 105  i
  lower                  , //  6a  152 106  j
  lower                  , //  6b  153 107  k
  lower                  , //  6c  154 108  l
  lower                  , //  6d  155 109  m
  lower                  , //  6e  156 110  n
  lower                  , //  6f  157 111  o
  lower                  , //  70  160 112  p
  lower                  , //  71  161 113  q
  lower                  , //  72  162 114  r
  lower                  , //  73  163 115  s
  lower                  , //  74  164 116  t
  lower                  , //  75  165 117  u
  lower                  , //  76  166 118  v
  lower                  , //  77  167 119  w
  lower                  , //  78  170 120  x
  lower                  , //  79  171 121  y
  lower                  , //  7a  172 122  z
  punct                  , //  7b  173 123  {
  punct                  , //  7c  174 124  |
  punct                  , //  7d  175 125  }
  punct                  , //  7e  176 126  ~
  cntrl                  , //  7f  177 127
  cntrl                  , //  80  200 128
  cntrl                  , //  81  201 129
  cntrl                  , //  82  202 130
  cntrl                  , //  83  203 131
  cntrl                  , //  84  204 132
  cntrl                  , //  85  205 133
  cntrl                  , //  86  206 134
  cntrl                  , //  87  207 135
  cntrl                  , //  88  210 136
  cntrl                  , //  89  211 137
  cntrl                  , //  8a  212 138
  cntrl                  , //  8b  213 139
  cntrl                  , //  8c  214 140
  cntrl                  , //  8d  215 141
  cntrl                  , //  8e  216 142
  cntrl                  , //  8f  217 143
  cntrl                  , //  90  220 144
  cntrl                  , //  91  221 145
  cntrl                  , //  92  222 146
  cntrl                  , //  93  223 147
  cntrl                  , //  94  224 148
  cntrl                  , //  95  225 149
  cntrl                  , //  96  226 150
  cntrl                  , //  97  227 151
  cntrl                  , //  98  230 152
  cntrl                  , //  99  231 153
  cntrl                  , //  9a  232 154
  cntrl                  , //  9b  233 155
  cntrl                  , //  9c  234 156
  cntrl                  , //  9d  235 157
  cntrl                  , //  9e  236 158
  cntrl                  , //  9f  237 159
  punct                  , //  a0  240 160  �    |  Non-breaking space
  punct                  , //  a1  241 161  �    |  Inverted exclamation mark
  punct                  , //  a2  242 162  �    |  Cent sign
  punct                  , //  a3  243 163  �    |  Pound sign
  punct                  , //  a4  244 164  �    |  Currency sign
  punct                  , //  a5  245 165  �    |  Yen sign
  punct                  , //  a6  246 166  �    |  Broken bar
  punct                  , //  a7  247 167  �    |  Section sign
  punct                  , //  a8  250 168  �    |  Diaeresis
  punct                  , //  a9  251 169  �    |  Copyright sign
  punct                  , //  aa  252 170  �    |  Feminine ordinal indicator
  punct                  , //  ab  253 171  �    |  Left-pointing double angle
  punct                  , //  ac  254 172  �    |  Not sign
  punct                  , //  ad  255 173  �    |  Soft hyphen
  punct                  , //  ae  256 174  �    |  Registered trademark sign
  punct                  , //  af  257 175  �    |  Macron
  punct                  , //  b0  260 176  �    |  Degree sign
  punct                  , //  b1  261 177  �    |  Plus-minus sign
  punct                  , //  b2  262 178  �    |  Superscript two
  punct                  , //  b3  263 179  �    |  Superscript three
  punct                  , //  b4  264 180  �    |  Acute accent
  punct                  , //  b5  265 181  �    |  Micro sign
  punct                  , //  b6  266 182  �    |  Pilcrow sign
  punct                  , //  b7  267 183  �    |  Middle dot
  punct                  , //  b8  270 184  �    |  Cedilla
  punct                  , //  b9  271 185  �    |  Superscript one
  punct                  , //  ba  272 186  �    |  Masculine ordinal indicator
  punct                  , //  bb  273 187  �    |  Right-pointing double angle
  punct                  , //  bc  274 188  �    |  Fraction one quarter
  punct                  , //  bd  275 189  �    |  Fraction one half
  punct                  , //  be  276 190  �    |  Fraction three quarters
  punct                  , //  bf  277 191  �    |  Inverted question mark
  upper                  , //  c0  300 192  �    |  "A" with grave accent
  upper                  , //  c1  301 193  �    |  "A" with acute accent
  upper                  , //  c2  302 194  �    |  "A" with circumflex
  upper                  , //  c3  303 195  �    |  "A" with tilde
  upper                  , //  c4  304 196  �    |  "A" with diaeresis
  upper                  , //  c5  305 197  �    |  "A" with ring above
  upper                  , //  c6  306 198  �    |  "AE" ligature
  upper                  , //  c7  307 199  �    |  "C" with cedilla
  upper                  , //  c8  310 200  �    |  "E" with grave accent
  upper                  , //  c9  311 201  �    |  "E" with acute accent
  upper                  , //  ca  312 202  �    |  "E" with circumflex
  upper                  , //  cb  313 203  �    |  "E" with diaeresis
  upper                  , //  cc  314 204  �    |  "I" with grave accent
  upper                  , //  cd  315 205  �    |  "I" with acute accent
  upper                  , //  ce  316 206  �    |  "I" with circumflex
  upper                  , //  cf  317 207  �    |  "I" with diaeresis
  upper                  , //  d0  320 208  �    |  Upper Icelandic D (Eth)
  upper                  , //  d1  321 209  �    |  "N" with tilde
  upper                  , //  d2  322 210  �    |  "O" with grave accent
  upper                  , //  d3  323 211  �    |  "O" with acute accent
  upper                  , //  d4  324 212  �    |  "O" with circumflex
  upper                  , //  d5  325 213  �    |  "O" with tilde
  upper                  , //  d6  326 214  �    |  "O" with diaeresis
  punct                  , //  d7  327 215  �    |  Multiplication sign
  upper                  , //  d8  330 216  �    |  "O" with stroke
  upper                  , //  d9  331 217  �    |  "U" with grave accent
  upper                  , //  da  332 218  �    |  "U" with acute accent
  upper                  , //  db  333 219  �    |  "U" with circumflex
  upper                  , //  dc  334 220  �    |  "U" with diaeresis
  upper                  , //  dd  335 221  �    |  "Y" with acute accent
  upper                  , //  de  336 222  �    |  Upper Icelandic T (Thorn)
  lower                  , //  df  337 223  �    |  German sharp s
  lower                  , //  e0  340 224  �    |  "a" with grave accent
  lower                  , //  e1  341 225  �    |  "a" with acute accent
  lower                  , //  e2  342 226  �    |  "a" with circumflex
  lower                  , //  e3  343 227  �    |  "a" with tilde
  lower                  , //  e4  344 228  �    |  "a" with diaeresis
  lower                  , //  e5  345 229  �    |  "a" with ring above
  lower                  , //  e6  346 230  �    |  Latin small ligature ae
  lower                  , //  e7  347 231  �    |  "c" with cedilla
  lower                  , //  e8  350 232  �    |  "e" with grave accent
  lower                  , //  e9  351 233  �    |  "e" with acute accent
  lower                  , //  ea  352 234  �    |  "e" with circumflex
  lower                  , //  eb  353 235  �    |  "e" with diaeresis
  lower                  , //  ec  354 236  �    |  "i" with grave accent
  lower                  , //  ed  355 237  �    |  "i" with acute accent
  lower                  , //  ee  356 238  �    |  "i" with circumflex
  lower                  , //  ef  357 239  �    |  "i" with diaeresis
  lower                  , //  f0  360 240  �    |  Lower Icelandic d (eth)
  lower                  , //  f1  361 241  �    |  "n" with tilde
  lower                  , //  f2  362 242  �    |  "o" with grave accent
  lower                  , //  f3  363 243  �    |  "o" with acute accent
  lower                  , //  f4  364 244  �    |  "o" with circumflex
  lower                  , //  f5  365 245  �    |  "o" with tilde
  lower                  , //  f6  366 246  �    |  "o" with diaeresis
  punct                  , //  f7  367 247  �    |  Division sign
  lower                  , //  f8  370 248  �    |  "o" with oblique bar
  lower                  , //  f9  371 249  �    |  "u" with grave accent
  lower                  , //  fa  372 250  �    |  "u" with acute accent
  lower                  , //  fb  373 251  �    |  "u" with circumflex
  lower                  , //  fc  374 252  �    |  "u" with diaeresis
  lower                  , //  fd  375 253  �    |  "y" with acute accent
  lower                  , //  fe  376 254  �    |  Lower Icelandic t (thorn)
  lower                    //  ff  377 255  �    |  "y" with diaeresis
  };

  return (flag_table[unsigned char(c)] & f) != 0;
}


//============================================================================
char iso8859_1_regex_traits::translate(char c, bool icase) const
{
//----------------------------------------------------------------------------
//      "ABCDEFGHIJKLMNOPQRSTUVWXYZ" -> "abcdefghijklmnopqrstuvwxyz"
//  "������������������������������" -> "������������������������������"
//----------------------------------------------------------------------------
  static const char lower_case_map[UCHAR_MAX + 1] = {
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
   64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
  112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};

  return icase ? lower_case_map[unsigned char(c)] : c;
}