cpp_token_stream.h

Go to the documentation of this file.
00001 #ifndef cpp_token_h_included
00002 #define cpp_token_h_included
00003 
00004 
00005 //
00006 // Copyright 2002, Lowell Boggs Jr.
00007 //
00008 // This file or directory, containing source code for a computer program,
00009 // is Copyrighted by Lowell Boggs, Jr.  987 Regency Drive, Lewisville
00010 // TX (USA), 75067.  You may use, copy, modify, and distribute this
00011 // source file without charge or obligation so long as you agree to
00012 // the following:
00013 //
00014 //  1.  You must indemnify Lowell Boggs against any and all financial
00015 //      obligations caused by its use, misuse, function, or malfunction.
00016 //      Further, you acknowledge that there is no warranty of any kind,
00017 //      whatsoever.
00018 //
00019 //  2.  You agree not to attempt to patent any portion of this original
00020 //      work -- though you may attempt to patent your own extensions to
00021 //      it if you so choose.
00022 //
00023 //  3.  You keep this copyright notice with the file and all copies
00024 //      of the file and do not change it anyway except language translation.
00025 //
00026 // You are responsible for enforcing your own compliance with these
00027 // conditions and may not use this source file if you cannot agree to the
00028 // above terms and conditions.
00029 
00030 
00031 
00032 
00047 //
00048 
00049 
00050 #include <fstream>
00051 #include <string>
00052 #include <portable_strstream.h>
00053 
00054 namespace cxxtls
00055 {
00056 
00057 
00058 struct CPP_Token
00059   //
00081   //
00082 {
00083   std::string text_;  
00084 
00085 
00086 
00087   int         type_;  
00088 
00089 
00090 
00091 
00092 
00093                 
00094   int         line_;  
00095 
00096   std::string file_;  
00097 
00098   operator std::string const &() const
00099     //
00102   {
00103     return text_;
00104   }
00105 
00106   enum token_types_
00110   {
00111      // one character operators have a value equal to their character value
00112 
00113                 
00114      eof = 0x000, 
00115      eql = 0x100, 
00116      neq = 0x101, 
00117      leq = 0x102, 
00118      geq = 0x103, 
00119      sle = 0x104, 
00120      sre = 0x105, 
00121      dve = 0x106, 
00122      ple = 0x107, 
00123      mne = 0x108, 
00124      tme = 0x109, 
00125      mde = 0x10a, 
00126      adr = 0x10b, 
00127      orr = 0x10c, 
00128      ore = 0x10d, 
00129      ade = 0x10e, 
00130      inc = 0x10f, 
00131      dec = 0x110, 
00132      shr = 0x111, 
00133      shl = 0x112, 
00134      lan = 0x113, 
00135      der = 0x114, 
00136      teq = 0x115, 
00137      ceq = 0x116, 
00138      eli = 0x117, 
00139      ptr = 0x118, 
00140      nmb = 0x201, 
00141      aln = 0x202, 
00142      inv = 0x300, 
00143      special_tokens
00144   };
00145 
00146   static std::string type_name(int type);
00152 
00153 
00154 };
00155 
00156 inline
00157 std::ostream&
00158 operator<<( std::ostream& o, CPP_Token const &r )
00160 {
00161 
00162   return o
00163          << "\"" << r.file_
00164          << "\", line " << r.line_
00165          << " token, '" << r.text_ << "', is of type "
00166          << CPP_Token::type_name(r.type_);
00167 
00168 }
00169 
00170 class CPP_Buffer_Token_Source; // see bottom of file
00171 
00172 class CPP_Token_Stream_Prep
00173   //
00185   //
00186 {
00187 public:
00188 
00189    CPP_Token_Stream_Prep()
00191    {
00192    }
00193 
00194    virtual ~CPP_Token_Stream_Prep() {} //< destructor
00195 
00196    virtual void operator() (std::string const &text,
00197                             std::string const &file,
00198                             int                line) const = 0;
00204 
00205 
00206 };
00207 
00208 
00209 template<class Iterator>
00210 class CPP_Token_Stream
00211   //
00213   //
00245   //
00250   //
00251 {
00252   Iterator                     cur_;                   
00253   Iterator                     end_;                   
00254   int                          line_;                  
00255   std::string                  file_;                  
00256   bool                         initialized_;           
00257   CPP_Token_Stream_Prep const *preprocessor_handler_;  
00258   char                         char_type_[256];        
00259   friend class                 CPP_Buffer_Token_Source;
00260 
00261 public:
00262 
00263 
00264   CPP_Token_Stream(Iterator const& next,
00265                    Iterator const& end,
00266                    std::string filename="",
00267                    CPP_Token_Stream_Prep const *handler=0
00268                   )
00269   :
00270     cur_(next),
00271     end_(end),
00272     line_(1),
00273     file_(filename),
00274     initialized_(0),
00275     preprocessor_handler_(handler)
00276   {
00281   }
00282 
00283 
00284   void operator() (CPP_Token &token)
00290 
00291   {
00292 
00293     token.text_.resize(0);
00294     token.type_ = CPP_Token::inv;
00295 
00296     bool first_char = !initialized_;
00297 
00298     if(!initialized_)
00299     {
00300       initialize_char_type();
00301     }
00302 
00303     if(cur_ == end_)
00304     {
00305       token.type_ = CPP_Token::eof;
00306       token.line_ = line_;
00307       token.file_ = file_;
00308       return;
00309     }
00310 
00311 
00312     bool already_parsed = false;
00313 
00314     char c='@';
00315 
00316     // skip leading whitespace, comments, and #line directives
00317 
00318     if(first_char)
00319     {
00320       scan_till_first_nonblank();
00321         
00322       if(cur_ != end_ && *cur_ == '#')
00323         parse_preprocessor_directive();
00324     }
00325 
00326     while(cur_ != end_ &&
00327           ( char_type_[*cur_] == ' '  ||
00328             *cur_ == '\n' ||
00329             *cur_ == '/'  ||
00330             *cur_ == '\\'
00331           )
00332          )
00333     {
00334       c = *cur_;
00335 
00336       if(c == '\\')
00337       {
00338         if(cur_ == end_)    // handle line continuations
00339           c = ' ';
00340         else
00341         {
00342           ++cur_;
00343           c = *cur_;
00344         }
00345       }
00346 
00347       if(c == '/')
00348       {
00349         
00350         ++cur_;
00351         
00352         if(*cur_ == '/')
00353         {
00354           // handle end of line comments
00355         
00356           while(cur_ != end_ && *cur_ != '\n')
00357             ++cur_;
00358         
00359           if(cur_ != end_)
00360           {
00361             ++line_;
00362             ++cur_;
00363         
00364             scan_till_first_nonblank();
00365         
00366             if(cur_ != end_ && *cur_ == '#')
00367               parse_preprocessor_directive();
00368         
00369           }
00370         
00371           // now we should be at the beginning of the next line
00372         
00373         }
00374         else
00375         if( *cur_ == '*')
00376         {
00377           // handle old style C comments
00378         
00379           ++cur_;
00380         
00381           while(cur_ != end_)
00382           {
00383             // scan for the pair, "*/"
00384         
00385             c= *cur_;
00386         
00387             if(c == '\n')
00388             {
00389               ++line_;
00390             }
00391         
00392             ++cur_;
00393         
00394             if(cur_ == end_)
00395               break;
00396         
00397             if(c == '*' && *cur_ == '/')
00398             {
00399               ++cur_;
00400               break;
00401             }
00402           }
00403         
00404         
00405         }
00406         else
00407         {
00408           already_parsed = true;
00409         
00410           break;
00411         }
00412         
00413       }
00414       else
00415       if( c == '\n')
00416       {
00417         ++line_;
00418         
00419 
00420 
00421         if(cur_ != end_)
00422         {
00423           ++cur_;
00424 
00425           scan_till_first_nonblank();
00426         
00427           if(*cur_ == '#')
00428             parse_preprocessor_directive();
00429         }
00430       }
00431       else
00432       {
00433         // handle whitespace
00434         
00435         while( cur_ != end_ && char_type_[*cur_] == ' ' )
00436           ++cur_;
00437         
00438       }
00439 
00440 
00441     }
00442 
00443 
00444     // at this point we know we are not at the end_ of the stream, we can read and
00445     // classify the first character in the token
00446 
00447 
00448     if(cur_ == end_)
00449     {
00450       token.type_ = CPP_Token::eof;
00451       token.line_ = line_;
00452       token.file_ = file_;
00453       return;
00454     }
00455 
00456     if(!already_parsed)
00457     {
00458       c = *cur_;
00459       ++cur_;
00460     }
00461 
00462     char type = char_type_[c];
00463 
00464     token.text_ +=(c);
00465 
00466     token.line_ = line_;
00467     token.file_ = file_;
00468 
00469     // now, read any remaining characters in the token
00470 
00471     if(c == '\'')
00472     { // single character constants
00473 
00474        // the return value will contain the beginning and ending ' marks
00475 
00476        token.type_ = '\'';
00477 
00478        while(cur_ != end_)
00479        {
00480          char secondary_character = *cur_;
00481 
00482          if(secondary_character == '\\')
00483          {
00484            token.text_ +=('\\');
00485            ++cur_;
00486         
00487            if(cur_ != end_)
00488            {
00489              token.text_+=(*cur_);
00490              ++cur_;
00491            }
00492            continue;
00493          }
00494 
00495          token.text_+=(secondary_character);
00496          ++cur_;
00497         
00498          if(secondary_character == '\n')
00499          {
00500            ++line_;
00501            // no line number override should be occuring because
00502            // this is technically a bug in the use of the language that
00503            // I am trying to handle gracefully
00504          }
00505 
00506          if(secondary_character == '\'')
00507          {
00508            break;
00509          }
00510        }
00511 
00512     } // single character constants
00513     else
00514     if(c == '"')
00515     { // string constant
00516 
00517       // the return value will contain the leading and trailing double quotes
00518 
00519       token.type_ = '"';
00520 
00521        while(cur_ != end_)
00522        {
00523          char secondary_character = *cur_;
00524 
00525          if(secondary_character == '\\')
00526          {
00527            token.text_+=('\\');
00528            ++cur_;
00529         
00530            if(cur_ != end_)
00531            {
00532              token.text_+=(*cur_);
00533              ++cur_;
00534            }
00535            continue;
00536          }
00537 
00538          token.text_+=(secondary_character);
00539          ++cur_;
00540         
00541          if(secondary_character == '\n')
00542            ++line_;
00543            // no line number override should be occuring because
00544            // this is technically a bug in the use of the language that
00545            // I am trying to handle gracefully
00546         
00547 
00548          if(secondary_character == '"')
00549          {
00550            break;
00551          }
00552        }
00553 
00554     } // string constant
00555     else
00556     if( c == '.')
00557     {
00558       token.type_ = c;
00559 
00560       while(*cur_ == '.' && cur_ != end_)
00561       {
00562         token.type_ = CPP_Token::eli;
00563         token.text_ += c;
00564         c = *cur_;
00565         ++cur_;
00566       }
00567 
00568       return;
00569     }
00570     else
00571     if( c == '(' || c == ')' || c == '[' || c == ']' || c == '{' ||
00572         c == '}'
00573       )
00574     {
00575       token.type_ = c;
00576       return;
00577     }
00578     else
00579     {
00580       bool is_number=false;
00581 
00582       if(c >= '0' && c <= '9')
00583       {
00584         is_number = true;
00585         token.type_ = CPP_Token::nmb;
00586       }
00587       else
00588       if( (c >= 'a' && c <= 'z') ||
00589           (c >= 'A' && c <= 'Z') ||
00590           (c == '_')
00591         )
00592       {
00593         token.type_ = CPP_Token::aln;
00594       }
00595       else
00596       if( c == '-' && *cur_ == '>')
00597       {
00598         token.type_ = CPP_Token::ptr;
00599         token.text_ += '>';
00600         ++cur_;
00601         return;
00602       }
00603 
00604       while(cur_ != end_)
00605       { // main loop to aggregate characters of similar types
00606 
00607         char secondary_character = *cur_;
00608 
00609         while( secondary_character == '\\')
00610         {
00611           // handle line continuations
00612         
00613           ++cur_;
00614         
00615           if(cur_ != end_)
00616             secondary_character = *cur_;
00617           else
00618             secondary_character = ' ';
00619         
00620           if(secondary_character == '\n')
00621           {
00622             ++cur_;
00623             ++line_;
00624         
00625             if(cur_ != end_)
00626               secondary_character = *cur_;
00627             else
00628               secondary_character = ' ';
00629           }
00630         
00631         }
00632 
00633         if(secondary_character == '.' && !is_number && type == 'A')
00634           break;  // do not include '.' in names
00635 
00636 
00637         char secondary_type = char_type_[secondary_character];
00638         
00639         if(secondary_type != type)
00640         {
00641           if( !  (secondary_character == '=' && type == '>')  )
00642           {
00643             break;
00644           }
00645         
00646         }
00647 
00648         token.text_+=(secondary_character);
00649 
00650         ++cur_;
00651 
00652       } // main loop to aggregate characters of similar types
00653 
00654       token.type_ = token_type(token.text_);
00655 
00656     }
00657 
00658   }
00659 
00660   int token_type(std::string const &s)
00661 
00662   
00663 
00664 
00665 
00666 
00667 
00668 
00669 
00670   {
00671 
00672     char c = s[0];
00673 
00674     int size = s.size();
00675 
00676     switch(c)
00677     {
00678     case '0': case '1': case '2': case '3': case '4': case '5':
00679     case '6': case '7': case '8': case '9':
00680       return CPP_Token::nmb;
00681 
00682     case '=':
00683       if(size == 2 && s[1] == '=')
00684         return CPP_Token::eql;
00685         
00686       break;
00687 
00688     case ':':
00689       if(size == 2 && s[1] == ':')
00690         return CPP_Token::der;
00691         
00692       break;
00693 
00694     case '&':
00695       if(size == 1)
00696         return '&';
00697       else
00698       if(size == 2)
00699       {
00700         c = s[1];
00701         
00702         if(c == '=')
00703           return CPP_Token::ade;
00704         
00705         if(c == '&')
00706           return CPP_Token::lan;
00707         
00708       }
00709       return CPP_Token::inv;
00710 
00711     case '|':
00712       if(size == 1)
00713         return '|';
00714       else
00715       if(size == 2)
00716       {
00717         c = s[1];
00718         
00719         if(c == '=')
00720           return CPP_Token::ore;
00721         
00722         if(c == '|')
00723           return CPP_Token::orr;
00724         
00725       }
00726       return CPP_Token::inv;
00727 
00728     case '!':
00729       if(size == 1)
00730         return '!';
00731       else
00732       if(size == 2)
00733       {
00734         c = s[1];
00735         
00736         if(c == '=')
00737           return CPP_Token::neq;
00738         
00739       }
00740       return CPP_Token::inv;
00741 
00742     case '^':
00743       if(size == 1)
00744         return '^';
00745       else
00746       if(size == 2)
00747       {
00748         c = s[1];
00749         
00750         if(c == '=')
00751           return CPP_Token::ceq;
00752         
00753       }
00754       return CPP_Token::inv;
00755 
00756     case '%':
00757       if(size == 1)
00758         return '%';
00759       else
00760       if(size == 2)
00761       {
00762         c = s[1];
00763         
00764         if(c == '=')
00765           return CPP_Token::mde;
00766         
00767       }
00768       return CPP_Token::inv;
00769 
00770     case '/':
00771       if(size == 1)
00772         return '/';
00773       else
00774       if(size == 2)
00775       {
00776         c = s[1];
00777         
00778         if(c == '=')
00779           return CPP_Token::dve;
00780         
00781       }
00782       return CPP_Token::inv;
00783 
00784     case '~':
00785       if(size == 1)
00786         return '~';
00787       else
00788       if(size == 2)
00789       {
00790         c = s[1];
00791         
00792         if(c == '=')
00793           return CPP_Token::teq;
00794         
00795       }
00796       return CPP_Token::inv;
00797 
00798     case '*':
00799       if(size == 1)
00800         return '*';
00801       else
00802       if(size == 2)
00803       {
00804         c = s[1];
00805         
00806         if(c == '=')
00807           return CPP_Token::tme;
00808         
00809       }
00810       return CPP_Token::inv;
00811 
00812     case '+':
00813       if(size == 1)
00814         return '+';
00815       else
00816       if(size == 2)
00817       {
00818         c = s[1];
00819         
00820         if(c == '+')
00821           return CPP_Token::inc;
00822         
00823         if(c == '=')
00824           return CPP_Token::ple;
00825         
00826       }
00827       return CPP_Token::inv;
00828 
00829     case '-':
00830       if(size == 1)
00831         return '-';
00832       else
00833       if(size == 2)
00834       {
00835         c = s[1];
00836         
00837         if(c == '-')
00838           return CPP_Token::dec;
00839         
00840         if(c == '=')
00841           return CPP_Token::mne;
00842         
00843       }
00844       return CPP_Token::inv;
00845 
00846     case '>':
00847       if(size == 1)
00848         return '>';
00849       else
00850       if(size == 2)
00851       {
00852         c = s[1];
00853         
00854         if(c == '>')
00855           return CPP_Token::shr;
00856         
00857         if(c == '=')
00858           return CPP_Token::geq;
00859         
00860       }
00861       else
00862       if(size == 3)
00863       {
00864         return CPP_Token::sre;
00865       }
00866       return CPP_Token::inv;
00867 
00868     case '<':
00869       if(size == 1)
00870         return '<';
00871       else
00872       if(size == 2)
00873       {
00874         c = s[1];
00875         
00876         if(c == '<')
00877           return CPP_Token::shl;
00878         
00879         if(c == '=')
00880           return CPP_Token::leq;
00881         
00882       }
00883       else
00884       if(size == 3)
00885       {
00886         return CPP_Token::sle;
00887       }
00888       return CPP_Token::inv;
00889         
00890     default:
00891 
00892       if( (c >= 'a' && c <= 'z') ||
00893           (c >= 'A' && c <= 'Z') ||
00894           c == '_'               ||
00895           c == '$'
00896         )
00897         return CPP_Token::aln;
00898         
00899       break;
00900     }
00901 
00902     if(size == 1)
00903       return s[0];
00904 
00905     return CPP_Token::inv;
00906 
00907   }
00908 
00909 private:
00910 
00911   void initialize_char_type()
00912     //
00916     //
00917   {
00918     initialized_ = true;
00919 
00920     unsigned i;
00921 
00922     // first, give every character its own unique type
00923 
00924     for(i=0; i < sizeof(char_type_); ++i)  char_type_[i] = i;
00925 
00926     // next define classes of characters
00927 
00928     for(i=0; i < ' '; ++i)   char_type_[i] = 1;   // type 1 is an invalid character
00929     for(i=128; i < 256; ++i) char_type_[i] = 1;
00930 
00931     char_type_[' '] = ' ';  // a blank is a blank
00932     char_type_['\t']= ' ';
00933     char_type_['\r']= ' ';
00934     char_type_[0x0c]= ' ';  // \f not supported everywhere
00935 
00936     char_type_['\\']= '\\'; // this is a special case
00937 
00938 
00939     for(i='A'; i <= 'Z'; ++i) char_type_[i] = 'A';  // type 'A' is alphanumerics
00940     for(i='a'; i <= 'z'; ++i) char_type_[i] = 'A';  // underscore and '.' are also
00941     for(i='0'; i <= '9'; ++i) char_type_[i] = 'A';  // treated as alphanumeric -- you must
00942     char_type_['_'] = 'A';                          // split the returned tokens yourself
00943     char_type_['.'] = 'A';                          // if you don't want them treated this way.
00944     char_type_['$'] = 'A';
00945 
00946     char_type_['+'] = '=';  // type '=' refers to operators that can combine with '='
00947     char_type_['-'] = '=';
00948     char_type_['*'] = '=';
00949     char_type_['/'] = '=';
00950     char_type_['%'] = '=';
00951     char_type_['&'] = '=';
00952     char_type_['|'] = '=';
00953     char_type_['~'] = '=';
00954     char_type_['^'] = '=';
00955     char_type_['='] = '=';
00956     char_type_['<'] = '>';
00957     char_type_['>'] = '>';
00958     char_type_['!'] = '=';
00959 
00960   }
00961 
00962 
00963   void parse_preprocessor_directive()
00964     //
00975     //
00976     //
00977   {
00978     // the stream should contain '# [line] number ["file"] [stackdepth]'
00979 
00980     while(cur_ != end_ && *cur_ == '#')
00981     {
00982 
00983       ++cur_;
00984 
00985       // skip white spaces before the 'line' keyword or the number
00986 
00987       while(cur_ != end_ && *cur_ == ' ')
00988         ++cur_;
00989 
00990       if(cur_ == end_)
00991         return;
00992 
00993       if(*cur_ == 'l')
00994       {
00995         // assume this is a line keyword
00996 
00997         while(cur_ != end_ && *cur_ != ' ')
00998           ++cur_;
00999 
01000         // skip white spaces after the 'line' keyword
01001 
01002         while(cur_ != end_ && *cur_ == ' ')
01003           ++cur_;
01004         
01005         if(cur_ == end_)
01006           return;
01007       }
01008 
01009       char c = *cur_;
01010 
01011       if(c < 0 || c > '9')
01012       {
01013         // this is a pragma or other ignorable pre-processor directive
01014         // handle line continuations using either \ or comments that start
01015         // in the macro body and don't complete on the same line
01016         
01017         std::string directive;       // could also be named 'accumulator'
01018         int         start_line=line_;
01019 
01020         char slash        = ' ';
01021         bool in_a_comment = false;
01022 
01023         while(cur_ != end_ && *cur_ != '\n')
01024         {
01025           slash = *cur_;
01026           ++cur_;
01027         
01028           if(!in_a_comment)
01029             directive += slash;
01030         
01031           if(cur_ != end_)
01032           {
01033             if(*cur_ == '*' && slash == '/')
01034               in_a_comment = true;
01035             else
01036             if(*cur_ == '/' && slash == '*')
01037               in_a_comment = false;
01038           }
01039         
01040         }
01041         
01042         if(cur_ != end_)
01043         {
01044           ++cur_;  // skip the '\n'
01045           ++line_; // keep the line_ variable correct
01046 
01047           if(!in_a_comment)
01048             directive += '\n';
01049 
01050           scan_till_first_nonblank();
01051         
01052           while(slash == '\\' || in_a_comment)
01053           {
01054             // the next line is a continuation line
01055         
01056             while(cur_ != end_ && *cur_ != '\n')
01057             {
01058               slash = *cur_;
01059               ++cur_;
01060         
01061               if(!in_a_comment)
01062                 directive += slash;
01063         
01064               if(cur_ != end_)
01065               {
01066                 if(*cur_ == '*' && slash == '/')
01067                   in_a_comment = true;
01068                 else
01069                 if(*cur_ == '/' && slash == '*')
01070                   in_a_comment = false;
01071               }
01072               else
01073               {
01074                 in_a_comment=false;
01075               }
01076         
01077             }
01078         
01079             if(cur_ != end_)
01080             {
01081               ++cur_;  // skip the '\n'
01082               ++line_; // keep the line_ variable correct
01083         
01084               if(!in_a_comment)
01085                 directive += '\n';
01086 
01087               if(scan_till_first_nonblank())
01088                 slash=' ';
01089 
01090             }
01091             else
01092             {
01093              slash=' ';
01094              in_a_comment=false;
01095             }
01096           }
01097         
01098         }
01099 
01100         if(preprocessor_handler_)
01101           (*preprocessor_handler_)(directive, file_, start_line);
01102 
01103         continue;
01104 
01105       }
01106 
01107       // ok, so we should have 'number ["file"]'
01108 
01109       long line=0;
01110 
01111       while(cur_ != end_)
01112       {
01113         c = *cur_;
01114 
01115         if(c < '0' || c > '9')
01116           break;
01117         
01118         ++cur_;
01119 
01120         line *= 10;
01121         line += c - '0';
01122       }
01123 
01124       line_ = line;
01125 
01126       // so we have eaten the line number, now see if a file name
01127       // is specified
01128 
01129       // skip white spaces after the line number
01130 
01131       while(cur_ != end_ && *cur_ == ' ')
01132         ++cur_;
01133 
01134       if(cur_ == end_)
01135         return;
01136 
01137       if(*cur_ == '\n')
01138       {
01139         ++cur_;
01140         while(cur_ != end_ && char_type_[*cur_] == ' ')
01141          ++cur_;
01142         continue;
01143       }
01144 
01145       // ok we have a filename specified
01146 
01147       ++cur_;  // skip the leading "
01148 
01149       file_.resize(0);
01150 
01151       while(cur_ != end_ && (c = *cur_) != '"' && c != '\n' )
01152       {
01153         file_ += c;
01154         ++cur_;
01155       }
01156 
01157       while(cur_ != end_ && *cur_ != '\n')
01158         ++cur_;
01159 
01160       if(cur_ != end_)
01161       {
01162         ++cur_;  // skip the \n
01163         while(cur_ != end_ && char_type_[*cur_] == ' ')
01164          ++cur_;
01165       }
01166 
01167     }
01168 
01169   }
01170 
01171   bool scan_till_first_nonblank()
01175   {
01176     bool rv = false;
01177 
01178     for(;;)
01179     {
01180       // skip blanks and blank lines
01181 
01182       while(cur_ != end_ && char_type_[*cur_] == ' ')
01183         ++cur_;
01184         
01185       if(cur_ == end_ || *cur_ != '\n')
01186         return rv;
01187         
01188       // *cur_ == '\n'
01189 
01190       ++cur_;
01191       ++line_;
01192       rv = true;
01193 
01194         
01195     }
01196 
01197   }
01198 
01199 };
01200 
01201 struct CPP_Token_Source
01209 {
01210   virtual void operator()(CPP_Token&) = 0;
01212 
01213   virtual ~CPP_Token_Source() {}
01215 };
01216 
01217 
01218 
01219 class CPP_Stream_Token_Source
01220 : public CPP_Token_Source
01228 //
01229 {
01230 public:
01231 
01232   CPP_Token_Stream<std::istreambuf_iterator<char> > *stream_;
01234 
01235   CPP_Stream_Token_Source(std::istream &istr,
01236                           std::string filename="",
01237                           CPP_Token_Stream_Prep const* h=0)
01238   {
01242 
01243      std::istreambuf_iterator<char> cur(istr);
01244      std::istreambuf_iterator<char> end;
01245 
01246      stream_ = new CPP_Token_Stream<std::istreambuf_iterator<char> >(cur,end,filename,h);
01247         
01248   }
01249 
01250   ~CPP_Stream_Token_Source()
01252   {
01253     delete stream_;
01254   }
01255 
01256 
01257   void operator() (CPP_Token &token)
01259   {
01260     (*stream_)(token);
01261   }
01262 
01263 };
01264 
01265 class CPP_File_Token_Source
01266 : public CPP_Stream_Token_Source
01267 //
01269 //
01270 {
01271   std::ifstream *input_file_; 
01272 
01273 public:
01274 
01275   CPP_File_Token_Source(char const *f = "", CPP_Token_Stream_Prep const* h=0)
01276   : CPP_Stream_Token_Source( *( input_file_ = new std::ifstream(f) ), f, h )
01277   {
01280   }
01281 
01282   CPP_File_Token_Source(std::string const &f, CPP_Token_Stream_Prep const* h=0)
01283   : CPP_Stream_Token_Source( *( input_file_ = new std::ifstream(f.c_str()) ), f, h )
01284   {
01287 
01288   }
01289 
01290   ~CPP_File_Token_Source()
01291   {
01293 
01294     delete input_file_;
01295   }
01296 
01297   operator bool() const { return bool(*input_file_); } 
01298 
01299 };
01300 
01301 
01302 class CPP_String_Token_Source
01303 : public CPP_Token_Source
01304 {
01305 
01306   std::string copy_; 
01307 
01308   CPP_Token_Stream<std::string::const_iterator> stream_; 
01309 
01310 public:
01311 
01312   CPP_String_Token_Source(std::string s)
01313   : copy_(s),
01314     stream_(copy_.begin(), copy_.end())
01315   {
01320   }
01321 
01322   void operator() (CPP_Token &token)
01326   {
01327     stream_(token);
01328   }
01329 
01330 };
01331 
01332 
01333 class CPP_Buffer_Token_Source
01334 : public CPP_Token_Source
01338 {
01339 
01340   CPP_Token_Stream<char const *> stream_; 
01341 
01342 public:
01343 
01344   CPP_Buffer_Token_Source(char const *begin,
01345                           char const *end,
01346                           CPP_Token_Stream_Prep const *h=0)
01347   : stream_(begin, end,"",h)
01348   {
01356   }
01357 
01358   CPP_Buffer_Token_Source(char const *begin,
01359                           char const *end,
01360                           std::string f,
01361                           CPP_Token_Stream_Prep const *h=0)
01362   : stream_(begin, end,f,h)
01363   {
01373   }
01374 
01375 
01376   void operator() (CPP_Token &token)
01380   {
01381     stream_(token);
01382   }
01383 
01384   char const * cur() const { return stream_.cur_; } 
01385   char const * end() const { return stream_.end_; } 
01386 
01387 
01388 
01389 };
01390 
01391 } // namespace cxxtls
01392 #endif