Boost C++ Libraries

...one of the most highly regarded and expertly designed C++ library projects in the world. Herb Sutter and Andrei Alexandrescu, C++ Coding Standards

This is the documentation for an old version of Boost. Click here to view this page for the latest version.

boost/spirit/home/support/char_encoding/ascii.hpp

/*=============================================================================
    Copyright (c) 2001-2011 Hartmut Kaiser
    Copyright (c) 2001-2011 Joel de Guzman

    Distributed under the Boost Software License, Version 1.0. (See accompanying
    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
#define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM

#if defined(_MSC_VER)
#pragma once
#endif

#include <climits>
#include <boost/assert.hpp>
#include <boost/cstdint.hpp>

///////////////////////////////////////////////////////////////////////////////
// constants used to classify the single characters
///////////////////////////////////////////////////////////////////////////////
#define BOOST_CC_DIGIT    0x0001
#define BOOST_CC_XDIGIT   0x0002
#define BOOST_CC_ALPHA    0x0004
#define BOOST_CC_CTRL     0x0008
#define BOOST_CC_LOWER    0x0010
#define BOOST_CC_UPPER    0x0020
#define BOOST_CC_SPACE    0x0040
#define BOOST_CC_PUNCT    0x0080

namespace boost { namespace spirit { namespace char_encoding
{
    // The detection of isgraph(), isprint() and isblank() is done programmatically
    // to keep the character type table small. Additionally, these functions are
    // rather seldom used and the programmatic detection is very simple.

    ///////////////////////////////////////////////////////////////////////////
    // ASCII character classification table
    ///////////////////////////////////////////////////////////////////////////
    const unsigned char ascii_char_types[] =
    {
        /* NUL   0   0 */   BOOST_CC_CTRL,
        /* SOH   1   1 */   BOOST_CC_CTRL,
        /* STX   2   2 */   BOOST_CC_CTRL,
        /* ETX   3   3 */   BOOST_CC_CTRL,
        /* EOT   4   4 */   BOOST_CC_CTRL,
        /* ENQ   5   5 */   BOOST_CC_CTRL,
        /* ACK   6   6 */   BOOST_CC_CTRL,
        /* BEL   7   7 */   BOOST_CC_CTRL,
        /* BS    8   8 */   BOOST_CC_CTRL,
        /* HT    9   9 */   BOOST_CC_CTRL|BOOST_CC_SPACE,
        /* NL   10   a */   BOOST_CC_CTRL|BOOST_CC_SPACE,
        /* VT   11   b */   BOOST_CC_CTRL|BOOST_CC_SPACE,
        /* NP   12   c */   BOOST_CC_CTRL|BOOST_CC_SPACE,
        /* CR   13   d */   BOOST_CC_CTRL|BOOST_CC_SPACE,
        /* SO   14   e */   BOOST_CC_CTRL,
        /* SI   15   f */   BOOST_CC_CTRL,
        /* DLE  16  10 */   BOOST_CC_CTRL,
        /* DC1  17  11 */   BOOST_CC_CTRL,
        /* DC2  18  12 */   BOOST_CC_CTRL,
        /* DC3  19  13 */   BOOST_CC_CTRL,
        /* DC4  20  14 */   BOOST_CC_CTRL,
        /* NAK  21  15 */   BOOST_CC_CTRL,
        /* SYN  22  16 */   BOOST_CC_CTRL,
        /* ETB  23  17 */   BOOST_CC_CTRL,
        /* CAN  24  18 */   BOOST_CC_CTRL,
        /* EM   25  19 */   BOOST_CC_CTRL,
        /* SUB  26  1a */   BOOST_CC_CTRL,
        /* ESC  27  1b */   BOOST_CC_CTRL,
        /* FS   28  1c */   BOOST_CC_CTRL,
        /* GS   29  1d */   BOOST_CC_CTRL,
        /* RS   30  1e */   BOOST_CC_CTRL,
        /* US   31  1f */   BOOST_CC_CTRL,
        /* SP   32  20 */   BOOST_CC_SPACE,
        /*  !   33  21 */   BOOST_CC_PUNCT,
        /*  "   34  22 */   BOOST_CC_PUNCT,
        /*  #   35  23 */   BOOST_CC_PUNCT,
        /*  $   36  24 */   BOOST_CC_PUNCT,
        /*  %   37  25 */   BOOST_CC_PUNCT,
        /*  &   38  26 */   BOOST_CC_PUNCT,
        /*  '   39  27 */   BOOST_CC_PUNCT,
        /*  (   40  28 */   BOOST_CC_PUNCT,
        /*  )   41  29 */   BOOST_CC_PUNCT,
        /*  *   42  2a */   BOOST_CC_PUNCT,
        /*  +   43  2b */   BOOST_CC_PUNCT,
        /*  ,   44  2c */   BOOST_CC_PUNCT,
        /*  -   45  2d */   BOOST_CC_PUNCT,
        /*  .   46  2e */   BOOST_CC_PUNCT,
        /*  /   47  2f */   BOOST_CC_PUNCT,
        /*  0   48  30 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  1   49  31 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  2   50  32 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  3   51  33 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  4   52  34 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  5   53  35 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  6   54  36 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  7   55  37 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  8   56  38 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  9   57  39 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
        /*  :   58  3a */   BOOST_CC_PUNCT,
        /*  ;   59  3b */   BOOST_CC_PUNCT,
        /*  <   60  3c */   BOOST_CC_PUNCT,
        /*  =   61  3d */   BOOST_CC_PUNCT,
        /*  >   62  3e */   BOOST_CC_PUNCT,
        /*  ?   63  3f */   BOOST_CC_PUNCT,
        /*  @   64  40 */   BOOST_CC_PUNCT,
        /*  A   65  41 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
        /*  B   66  42 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
        /*  C   67  43 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
        /*  D   68  44 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
        /*  E   69  45 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
        /*  F   70  46 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
        /*  G   71  47 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  H   72  48 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  I   73  49 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  J   74  4a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  K   75  4b */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  L   76  4c */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  M   77  4d */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  N   78  4e */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  O   79  4f */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  P   80  50 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  Q   81  51 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  R   82  52 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  S   83  53 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  T   84  54 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  U   85  55 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  V   86  56 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  W   87  57 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  X   88  58 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  Y   89  59 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  Z   90  5a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
        /*  [   91  5b */   BOOST_CC_PUNCT,
        /*  \   92  5c */   BOOST_CC_PUNCT,
        /*  ]   93  5d */   BOOST_CC_PUNCT,
        /*  ^   94  5e */   BOOST_CC_PUNCT,
        /*  _   95  5f */   BOOST_CC_PUNCT,
        /*  `   96  60 */   BOOST_CC_PUNCT,
        /*  a   97  61 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
        /*  b   98  62 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
        /*  c   99  63 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
        /*  d  100  64 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
        /*  e  101  65 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
        /*  f  102  66 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
        /*  g  103  67 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  h  104  68 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  i  105  69 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  j  106  6a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  k  107  6b */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  l  108  6c */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  m  109  6d */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  n  110  6e */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  o  111  6f */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  p  112  70 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  q  113  71 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  r  114  72 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  s  115  73 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  t  116  74 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  u  117  75 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  v  118  76 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  w  119  77 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  x  120  78 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  y  121  79 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  z  122  7a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
        /*  {  123  7b */   BOOST_CC_PUNCT,
        /*  |  124  7c */   BOOST_CC_PUNCT,
        /*  }  125  7d */   BOOST_CC_PUNCT,
        /*  ~  126  7e */   BOOST_CC_PUNCT,
        /* DEL 127  7f */   BOOST_CC_CTRL,
    };

    ///////////////////////////////////////////////////////////////////////////
    //  Test characters for specified conditions (using ASCII)
    ///////////////////////////////////////////////////////////////////////////
    struct ascii
    {
        typedef char char_type;

        static bool
        isascii_(int ch)
        {
            return 0 == (ch & ~0x7f);
        }

        static bool
        ischar(int ch)
        {
            return isascii_(ch);
        }

        static int
        isalnum(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_ALPHA)
                || (ascii_char_types[ch] & BOOST_CC_DIGIT);
        }

        static int
        isalpha(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_ALPHA);
        }

        static int
        isdigit(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_DIGIT);
        }

        static int
        isxdigit(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_XDIGIT);
        }

        static int
        iscntrl(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_CTRL);
        }

        static int
        isgraph(int ch)
        {
            return ('\x21' <= ch && ch <= '\x7e');
        }

        static int
        islower(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_LOWER);
        }

        static int
        isprint(int ch)
        {
            return ('\x20' <= ch && ch <= '\x7e');
        }

        static int
        ispunct(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_PUNCT);
        }

        static int
        isspace(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_SPACE);
        }

        static int
        isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
        {
            return ('\x09' == ch || '\x20' == ch);
        }

        static int
        isupper(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return (ascii_char_types[ch] & BOOST_CC_UPPER);
        }

        ///////////////////////////////////////////////////////////////////////
        //  Simple character conversions
        ///////////////////////////////////////////////////////////////////////

        static int
        tolower(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return isupper(ch) ? (ch - 'A' + 'a') : ch;
        }

        static int
        toupper(int ch)
        {
            BOOST_ASSERT(isascii_(ch));
            return islower(ch) ? (ch - 'a' + 'A') : ch;
        }

        static ::boost::uint32_t
        toucs4(int ch)
        {
            return ch;
        }
    };

}}}

///////////////////////////////////////////////////////////////////////////////
// undefine macros
///////////////////////////////////////////////////////////////////////////////
#undef BOOST_CC_DIGIT
#undef BOOST_CC_XDIGIT
#undef BOOST_CC_ALPHA
#undef BOOST_CC_CTRL
#undef BOOST_CC_LOWER
#undef BOOST_CC_UPPER
#undef BOOST_CC_PUNCT
#undef BOOST_CC_SPACE

#endif