cpptagdb.cxx

Go to the documentation of this file.
00001 
00002 //
00003 // Copyright 2002-2011, Lowell Boggs Jr.
00004 //
00005 // This file or directory, containing source code for a computer program,
00006 // is Copyrighted by Lowell Boggs, Jr.  987 Regency Drive, Lewisville
00007 // TX (USA), 75067.  You may use, copy, modify, and distribute this
00008 // source file without charge or obligation so long as you agree to
00009 // the following:
00010 //
00011 //  1.  You must indemnify Lowell Boggs against any and all financial
00012 //      obligations caused by its use, misuse, function, or malfunction.
00013 //      Further, you acknowledge that there is no warranty of any kind,
00014 //      whatsoever.
00015 //
00016 //  2.  You agree not to attempt to patent any portion of this original
00017 //      work -- though you may attempt to patent your own extensions to
00018 //      it if you so choose.
00019 //
00020 //  3.  You keep this copyright notice with the file and all copies
00021 //      of the file and do not change it anyway except language translation.
00022 //
00023 // You are responsible for enforcing your own compliance with these
00024 // conditions and may not use this source file if you cannot agree to the
00025 // above terms and conditions.
00026 
00027 
00075 
00076 
00077 #include <cxxtls/cpp_token_stream.h>
00078 #include <cstdlib>
00079 #include <fstream>
00080 #include <iostream>
00081 #include <cxxtls/file.h>
00082 #include <algorithm>
00083 #include <portable_strstream.h>
00084 #include <portable_io.h>
00085 #include <list>
00086 #include <cxxtls/options.h>
00087 #include <portable_io.h>
00088 #include <string.h>
00089 #include <ctype.h>
00090 
00091 using namespace std;
00092 using namespace cxxtls;
00093 
00094 static void parse_file(FileName const &);
00095 
00096 static ostreambuf_iterator<char>* output;
00097 
00098 static bool read_filenames_from_stdin = false; 
00099 static bool log_all_tokens_to_stderr  = false; 
00100 static bool log_filenames_to_stderr   = false; 
00101 static bool grep_style_output         = false; 
00102 static bool log_resumes               = false; 
00103 
00104 int main(int argc, char **argv, char **environ)
00105 {
00106 
00107 # ifdef _MSC_VER
00108   
00109     // force the output to be a binary stream and we'll control eht end of line sequencing our
00110     // selves
00111   
00112     cout.flush();
00113   
00114     setmode(1, O_BINARY);
00115 
00116 # endif
00117 
00118   output = new ostreambuf_iterator<char>(cout);
00119 
00120   //
00121   // parse the options
00122   //
00123 
00124   ProgramOptions ops("-v,-grep,-files,-tokens,-stdin,-resume,-help,-h,--help,-dcl;",
00125                      argc,
00126                      argv,
00127                      environ
00128                     );
00129 
00130   if(ops.option("-v"))
00131   {
00132      // print the version number and quit.
00133 
00134      cout << "cpptagdb version 2.7" << endl;
00135 
00136      exit(1);
00137 
00138      // version 2.7 a.  Fix bugs in the handling of template specializations and the inline keyword
00139      //                 as applied to declarations.
00140      // version 2.6 a.  added support for class template specializations.
00141      //
00142      // version 2.5 a.  Added support a common boost source code paradigm:  macros to generate class
00143      //                 names:
00144      //
00145      //                   struct MACRO(parm) { /* class body */ };
00146      //
00147      // version 2.3 a.  Eliminated the need for the -dcl option.
00148      //
00149      // version 2.2 a.  Fixed major bug:  Now it is possible to correctly detect the
00150      //                 following class name:
00151      //
00152      //                     class CursorWindow::Dialog::Impl { ... }
00153      //
00154      // version 2.1  a.  Added the -dcl option that lets the user add
00155      //                  synonyms for __declspec().  
00156      //
00157      //              b.  Added script cpptagdb_find_cdecl_synonyms,
00158      //                  which figures out what the parameters for -dcl
00159      //                  ought to be.
00160      //
00161      //              c.  Added builtin synonyms for boost macros and
00162      //                  MS visual studio 2010 macros.
00163 
00164      // Prior to version 2.1, I was not keeping track of the versions.
00165 
00166 
00167 
00168   }
00169 
00170 
00171   grep_style_output         = ops.option("-grep");
00172   log_filenames_to_stderr   = ops.option("-files");
00173   read_filenames_from_stdin = ops.option("-stdin");
00174   log_all_tokens_to_stderr  = ops.option("-tokens");
00175   log_resumes               = ops.option("-resume");
00176 
00177 
00178   if(ops.option("-help")  ||
00179      ops.option("--help") ||
00180      ops.option("-h")
00181     )
00182   {
00183     FileName program(ops.argv[0]);
00184 
00185     cerr << "Usage:" << endl
00186          << "  "     << program.basename()
00187          << "  [options] [.c and .h file names]" << endl
00188          << "Options:" << endl
00189          << "  -v       print program version and quit" << endl
00190          << "  -help    print this help message" << endl
00191          << "  -h       ditto" << endl
00192          << "  --help   ditto" << endl
00193          << "  -stdin   read the .c and .h file names from stdin" << endl
00194          << "           note:  one filename per line" << endl
00195          << "  -tokens  log all tokens to stderr" << endl
00196          << "  -files   log process files to stderr" << endl
00197          << "  -grep    use grep style output of the symbols" << endl
00198          << "  -resume  log current scope name when ending classes" << endl
00199          ;
00200 
00201     exit(0);
00202   }
00203 
00204   ops.argv.erase(ops.argv.begin());  // remove the program name from the
00205                                      // non-optional program arguments
00206 
00207 
00208   // get the list of file names to work on
00209 
00210   list<FileName> files;
00211 
00212   if(read_filenames_from_stdin)
00213   {
00214     string tmp;
00215 
00216     while(!cin.eof())
00217     {
00218       tmp.resize(0);
00219 
00220       getline(cin, tmp);
00221 
00222       if(tmp.size() == 0)
00223         break;
00224         
00225         
00226       files.push_back(tmp);
00227 
00228     }
00229   }
00230   else
00231   {
00232     size_t i;
00233 
00234     for(i=0; i < ops.argv.size(); ++i)
00235     {
00236       files.push_back(ops.argv[i]);
00237     }
00238 
00239   }
00240 
00241   // parse the named files
00242 
00243   list<FileName>::iterator l = files.begin();
00244   list<FileName>::iterator g = files.end();
00245 
00246 
00247   for(; l != g; ++l )
00248   {
00249     if(!l->exists())
00250     {
00251       cerr << *l << " error, file does not exist" << endl;
00252     }
00253     else
00254     if(!l->is_dir())
00255       parse_file(*l);
00256   }
00257 
00258   exit(0);
00259 }
00260 
00261 static void parse_declarations(CPP_Buffer_Token_Source &s);
00262 static void log_symbol(string type, string name, string file, int line, bool include_scope=true);
00263 
00264 
00265 class DefineHandler
00266 : public CPP_Token_Stream_Prep
00272 {
00273 public:
00274 
00275   void operator() (string const &define, string const &file, int line) const
00276   {
00277     string::const_iterator f(define.begin());
00278     string::const_iterator g(define.end());
00279 
00280     // only handle #defines
00281 
00282     if( f == g || *f != 'd')
00283       return;
00284 
00285     // skip the word 'define'
00286 
00287     while(f != g && !isspace(*f) ) ++f;
00288 
00289     // skip blanks after define
00290 
00291     while(f != g && ( isspace(*f) || *f == '\n') ) ++f;
00292 
00293     if(f == g)
00294       return;  // missing define variable name
00295 
00296     // skip past the text in the define variable name
00297 
00298     string::const_iterator l(f);
00299 
00300     while(l != g &&
00301           (
00302             (*l >= 'a' && *l <= 'z') ||
00303             (*l >= 'A' && *l <= 'Z') ||
00304             (*l >= '0' && *l <= '9') ||
00305             (*l == '_')
00306           )
00307          ) ++l;
00308 
00309     // f,l now defines the name of the define variable
00310 
00311     static string define_name("define");
00312 
00313     log_symbol(define_name, string(f,l), file, line, false);
00314 
00315   }
00316 };
00317 
00318 
00319 
00320 static void parse_file(FileName const& name)
00321 //
00322 // Parse a file and print out the definitions contained
00323 // therein.
00324 //
00325 {
00326   if(!grep_style_output)
00327     cout << '+' << name << endl;
00328 
00329   FileContents buffer;
00330 
00331   name.slurp(&buffer);
00332 
00333   if(!buffer.ok())
00334   {
00335     cerr << "Error opening " << name << ", " << buffer.error() << endl << flush;
00336     exit(1);
00337   }
00338 
00339   DefineHandler dh;
00340 
00341   if(log_filenames_to_stderr)
00342     cerr << name << endl;
00343 
00344   CPP_Buffer_Token_Source s(buffer.begin(), buffer.end(), name, &dh);
00345 
00346   parse_declarations(s);
00347 
00348 }
00349 
00350 static CPP_Token                token;
00351 static CPP_Buffer_Token_Source *stream;
00352 
00353 inline static void next_token()
00354 //
00355 // get the next token from the stream and leave it 'token'
00356 //
00357 {
00358   (*stream)(token);
00359 
00360   if(token.type_ == CPP_Token::aln &&
00361      token.text_[0] == 'o' &&
00362      token.text_ == "operator"
00363     )
00364   {
00365     (*stream)(token);
00366 
00367     if(token.type_ == '(')
00368     {
00369       token.type_ = CPP_Token::aln;
00370 
00371       token.text_ = "operator " + token.text_;
00372 
00373       CPP_Token tmp;
00374 
00375       (*stream)(tmp);
00376 
00377       token.text_ += tmp.text_;
00378     }
00379     else
00380     {
00381       token.type_ = CPP_Token::aln;
00382 
00383       token.text_ = "operator " + token.text_;
00384     }
00385 
00386 
00387 
00388   }
00389 
00390 
00391   if(log_all_tokens_to_stderr)
00392     cerr << token << endl;
00393 
00394 }
00395 
00396 inline bool eof()
00397 //
00398 // determine if we are at the end of the token stream
00399 //
00400 {
00401    return token.type_ == CPP_Token::eof;
00402 }
00403 
00404 static bool parse_declaration(bool log_func_forward_decls=false);
00405 
00406 string outer_scope;
00407 
00408 static void parse_declarations(CPP_Buffer_Token_Source &s)
00409 //
00410 // parse and print out all definitions in a file
00411 // (don't use this for parsing nested stuff)
00412 {
00413   stream = &s;
00414 
00415   for(next_token(); !eof(); )
00416   {
00417     outer_scope = "::";
00418 
00419     if(parse_declaration())  // automatically calls next_token()
00420       break;
00421   }
00422 
00423   if(!eof())
00424   {
00425     cerr << token << ", error:  expected end of file" << endl;
00426     cerr << __FILE__ << ":" << __LINE__ << endl;
00427   }
00428 
00429 }
00430 
00431 #define ASSUMED_TOKEN(type)   if(token.type_ != type)   {     cerr << token << ", error:  expected " << CPP_Token::type_name(type) << endl;     cerr << __FILE__ << ":" << __LINE__ << endl;     return true;   }
00432 
00433 
00434 
00435 // all parsing functions leave the token that terminated them
00436 // in the input stream.
00437 
00438 static bool parse_class(bool handling_typedefs=false);
00439 static bool parse_namespace();
00440 static bool parse_typedef();
00441 
00442 static bool parse_enum(bool handling_typedefs=false);
00443 static bool parse_extern();
00444 static bool parse_using();
00445 static bool parse_funcvar(bool log_forward_funcs_defs=false,
00446                           bool handling_typedefs=false);
00447 static void eat_template_parms();
00448 static void eat_function_body();
00449 
00450 static bool parse_declaration(bool log_func_forward_decls)
00451 {
00452   if(token.type_ == ';' ) // allow randomly placed semicolons
00453   {
00454     next_token();
00455     return false;
00456   }
00457 
00458   if(token.type_ == '{')
00459   {
00460     // allow randomly placed {} blocks
00461 
00462     eat_function_body();
00463     next_token();
00464     return false;
00465 
00466   }
00467 
00468   if(token.type_ == CPP_Token::eof)
00469     return true;
00470 
00471   if(token.type_ == '=')  // handle oddly placed variable initializations
00472   {
00473      while(token.type_ != ';')
00474      {
00475        if(token.type_ == '{')
00476          eat_function_body();
00477 
00478        next_token();
00479      }
00480 
00481      return false;
00482   }
00483 
00484 
00485 //  if(token.type_ != CPP_Token::der)
00486 //    ASSUMED_TOKEN(CPP_Token::aln);
00487 
00488   if(token.text_ == "template")
00489   {
00490     // eat template<parms>
00491 
00492     next_token();  // eat 'template'
00493 
00494     eat_template_parms();
00495 
00496     if(token.type_ == '>')
00497       next_token();
00498 
00499     // now all references to the template keyword and its parms are
00500     // gone fromthe stream
00501 
00502   }
00503 
00504   if(token.type_ == CPP_Token::aln && token.text_[0] == 'i' &&
00505      token.text_ == "inline")
00506      next_token();
00507 
00508   if(token.text_ == "class"  || token.text_ == "struct" || token.text_ == "union")
00509     return parse_class();
00510   else
00511   if(token.text_ == "typedef" )
00512     return parse_typedef();
00513   else
00514   if(token.text_ == "enum" )
00515     return parse_enum();
00516   else
00517   if(token.text_ == "namespace" )
00518     return parse_namespace();
00519   else
00520   if(token.text_ == "using" )
00521     return parse_using();
00522   else
00523   if(token.text_ == "extern")
00524     return parse_extern();
00525   else
00526   if(token.text_ == "friend")
00527   {
00528     next_token(); // eat the friend keyword
00529 
00530     if((token.type_ == CPP_Token::aln) &&
00531        (token.text_ == "class" ||
00532         token.text_ == "struct"
00533        )
00534       )
00535     {
00536       // prevent a misnaming of the scope caused by the
00537       // parsing of friend declarations.
00538 
00539       while(token.type_ != ';')
00540         next_token();
00541     }
00542 
00543     return parse_declaration(log_func_forward_decls);
00544 
00545   }
00546   else
00547   if(token.type_ == CPP_Token::aln &&
00548      token.text_[0] == 'p'         &&
00549        (token.text_ == "public"    ||
00550         token.text_ == "private"   ||
00551         token.text_ == "protected"
00552        )
00553     )
00554   {
00555     //
00556     // eat the public:, private:, and protected: syntax so as not
00557     // to confuse the 'public: typedef int x' as a variable definition.
00558     //
00559     next_token();
00560 
00561     if(token.type_ == ':')
00562       next_token();
00563 
00564     return false;
00565   }
00566   else
00567     return parse_funcvar(log_func_forward_decls);
00568 
00569 }
00570 
00571 
00572 static void log_symbol(string type, string name, string file, int line, bool include_scope)
00573 {
00574   // the file parameter is unused unless we are doing grep style output
00575 
00576   if(name.size() != 0)
00577   {
00578     if(grep_style_output)
00579     {
00580       char buffer[40];
00581 
00582       sprintf(buffer, "%d", line);
00583 
00584       copy(file.begin(), file.end(), *output);
00585       *(*output)++ = ':';
00586 
00587       copy(buffer, buffer + strlen(buffer), *output);
00588       *(*output)++ = ':';
00589 
00590       *(*output)++ = ' ';
00591 
00592       copy(type.begin(), type.end(), *output);
00593       *(*output)++ = ' ';
00594 
00595       if(include_scope)
00596       {
00597         if(name.begin() != name.end() && *name.begin() != ':')
00598             copy(outer_scope.begin(), outer_scope.end(), *output);  // only symbols not in the global scope,
00599                                                                     // prefix them with their scope
00600       }
00601         
00602       copy(name.begin(), name.end(), *output);
00603 
00604 
00605       *output = copy_end_of_line(*output);
00606 
00607     }
00608     else
00609     {
00610 
00611       *(*output)++ = '-';
00612 
00613       copy(type.begin(), type.end(), *output);
00614       *(*output)++ = ' ';
00615 
00616       if(include_scope)
00617       {
00618         if(name.begin() != name.end() && *name.begin() != ':')
00619              copy(outer_scope.begin(), outer_scope.end(), *output); // only symbols not in the global scope,
00620                                                                     // only symbols not in the global scope,   
00621       }
00622         
00623       copy(name.begin(), name.end(), *output);
00624       *(*output)++ = ' ';
00625 
00626       char buffer[40];
00627 
00628       sprintf(buffer, "%d", line);
00629 
00630       char *scan = buffer;
00631 
00632       while(*scan)
00633         *(*output)++ = *scan++;
00634 
00635       *output = copy_end_of_line(*output);
00636 
00637     }
00638   }
00639 }
00640 
00641 struct ScopeBinder
00642 //
00646 //
00647 {
00648   string saved_scope; 
00649 
00650   ScopeBinder(string new_scope)
00653   {
00654     saved_scope = outer_scope;
00655     outer_scope = new_scope;
00656   }
00657 
00658   ~ScopeBinder()
00660   {
00661     outer_scope = saved_scope;
00662 
00663     if(log_resumes)
00664     {
00665       string tmp = outer_scope;
00666 
00667       int l = tmp.size();
00668 
00669       if(l > 2 && tmp[l-1] == ':' && tmp[l-2] == ':')
00670       {
00671         tmp.erase( tmp.begin() + (l-2), tmp.end() );
00672       }
00673 
00674       log_symbol("resume", tmp, token.file_, token.line_, false);
00675     }
00676   }
00677 
00678 };
00679 
00680 
00681 static void parse_variables_defined(bool log_func_forward_decls=false,
00682                                     bool handling_typedefs=false
00683                                    )
00684 // parse and log the names of variables defined as part of a struct
00685 // declaration:
00686 //
00687 //   struct x { ... }  var1, *var2, function(..), array[...], ... ;
00688 {
00689 
00690   if(token.type_ != ';')
00691     parse_funcvar(log_func_forward_decls, handling_typedefs);
00692 
00693   return;
00694 
00695 }
00696 static void eat_function_parms(string *parmtext=0);
00697 
00698 
00699 
00700 static bool parse_class(bool handling_typedefs)
00701 //
00702 //  parse class, union, and struct declarations and keep track
00703 //  of the name of the 'scope' of each nested class
00704 //
00705 {
00706 
00707   string class_type = token.text_;
00708 
00709   next_token();  // eat the 'struct' or 'class' keyword
00710 
00711   string name = token.text_;  // save the class name
00712   string file = token.file_;
00713   int    line = token.line_;
00714   bool   maybeFunc = false;
00715 
00716   if(token.type_ == CPP_Token::der)
00717   {
00718       // handle this:
00719       //
00720       //  class ::SomeClass { ... }
00721 
00722       next_token();
00723 
00724       ASSUMED_TOKEN(CPP_Token::aln);
00725 
00726       name += token.text_;
00727       line  = token.line_;
00728 
00729 
00730   }
00731 
00732 
00733   if(token.type_ == '{')
00734   {
00735     name="unnamedstruct"; // handle struct { ... }
00736   }
00737   else
00738   {
00739       if(token.type_ == '[')
00740       {
00741         // someone in plain old c has defined int class[470];
00742         
00743         int depth=0;
00744         
00745         while(!eof())   // eat the contents of the arrays
00746         {
00747           if(token.type_ == '[')
00748             ++depth;
00749           else
00750           if(token.type_ == ']')
00751           {
00752             --depth;
00753             if(depth == 0)
00754               break;
00755           }
00756           next_token();
00757         }
00758 
00759         next_token();  // eat the closing ]
00760         
00761       }
00762       else if(token.type_ == '(')
00763       {
00764         // oops, someone in plain old C has done this:  int class() ...
00765         
00766         eat_function_parms();
00767         next_token();
00768       }
00769 
00770 
00771       if(token.type_ == ';' || token.type_ == ',' )
00772       {
00773           // we are not parsing a real class definition
00774           // but rather a plain old c style variable
00775           // declaration like 'int class;'
00776           next_token();
00777           return false;
00778       }
00779 
00780     ASSUMED_TOKEN(CPP_Token::aln);
00781 
00782     while(token.type_ == CPP_Token::aln)
00783     {
00784 
00785         next_token(); // should get here at least once
00786 
00787         if(token.type_ == CPP_Token::aln)
00788         {
00789            maybeFunc=true;
00790 
00791            // Assume we parsing something like this:
00792            //
00793            //   class MACRO ClassName {... }
00794            //               ^
00795            //
00796            // But we also have to support this:
00797            //
00798            //   class SomeClass  function() { return SomeClass(); }
00799            //                    ^
00800            //
00801            // And this
00802            // 
00803            //   class MACRO RealClassName { ... }
00804            //               ^
00805 
00806            name = token.text_;  // save the class name
00807            file = token.file_;
00808            line = token.line_;
00809         }
00810         else
00811         if( maybeFunc && token.type_ == ';' )
00812         {
00813            log_symbol("variable", name, file, line);
00814            return false;
00815         }
00816 
00817     }
00818 
00819     while(token.type_ == CPP_Token::der)
00820     {
00821        // handle NAME::NAME::NAME... as the class name, as in
00822        //
00823        //   class CursorWindow::Dialog::Impl { ... }
00824 
00825        name += token.text_;
00826        line  = token.line_;
00827 
00828        next_token();
00829 
00830        if(token.type_ != CPP_Token::aln)
00831          break;
00832 
00833        name += token.text_;
00834        line  = token.line_;
00835        next_token();
00836 
00837     }
00838 
00839   }
00840 
00841 
00842   if(token.type_ == ';')
00843     return false;
00844 
00845   if(token.type_ == CPP_Token::aln ||
00846      token.type_ == '&'            ||
00847      token.type_ == '*'
00848     )
00849   {
00850     // this is a variable declaration such as when some does this
00851     // in plain old c   typedef int class;   class &v1, *v2, x;
00852 
00853     parse_variables_defined(false, handling_typedefs);
00854 
00855     return false;
00856 
00857   }
00858 
00859   if(token.type_ == '(')
00860   {
00861       // we have something like this:
00862       //
00863       //   class something(...) 
00864       
00865       eat_function_parms(); 
00866 
00867       if(token.type_ == ')')
00868       {
00869         next_token();
00870 
00871         if(maybeFunc)
00872         {
00873             if(token.type_ == '{' ||
00874                token.type_ == ';' ||
00875                (token.type_ == CPP_Token::aln && token.text_ == "const")
00876               )
00877             {
00878                 // we have something like this:
00879                 //
00880                 //  class  ClassName  functName() const { ... }
00881                 //                                ^     ^
00882                 // we are not defining a class, we are defining a function
00883 
00884                 while(   token.type_  != CPP_Token::eof
00885                       && token.type_  != ';'
00886                       && token.type_  != '{'
00887                       && token.type_  != '}'
00888                      )
00889                 {
00890                    next_token(); // skip till { or end of declaration
00891 
00892                 }
00893 
00894                 if(token.type_ == CPP_Token::eof)
00895                    return true;
00896 
00897                 if(token.type_ == '{')
00898                 {
00899                     log_symbol("function", name, file, line);
00900                    
00901                 }
00902 
00903                 return false;
00904 
00905             }
00906              
00907         }
00908 
00909       }
00910       
00911       // assume we are seeing something like this:
00912       //
00913       //   class compiler_directive(parms)   name { ...
00914 
00915       if(token.type_ == CPP_Token::aln || token.type_ == CPP_Token::der)
00916       {      
00917           name = token.text_;  // save the class name
00918           file = token.file_;
00919           line = token.line_;
00920       }
00921       
00922       if(token.type_ == CPP_Token::der)
00923       {
00924           // we are seeing this:
00925           //
00926           //   class directive(parms) ::name ...
00927 
00928           next_token();
00929 
00930           if(token.type_ == CPP_Token::aln)
00931           {
00932              name += token.text_;
00933              line  = token.line_;
00934           }
00935 
00936           next_token();
00937 
00938       }
00939       else
00940       {
00941 
00942         if(token.type_ == '<')
00943         {
00944            eat_template_parms();
00945            if(token.type_ == '>')
00946               next_token();
00947         }
00948 
00949         if(token.type_ == '{' || token.type_ == ':' )
00950           {
00951             // user has done something like this
00952             //   class function(parms) { .. }
00953             //                         ^
00954             // Normally, this is a syntax error, but if function
00955             // is really a macro that computes the class name, then, mabye
00956             // we want to not complain about it...
00957 
00958             if(token.type_ == ':')
00959               {
00960                 while(   token.type_ != ';'
00961                       && token.type_ != '{'
00962                       && token.type_ != CPP_Token::eof
00963                      )
00964                   {
00965                     next_token(); // consume tokens until { or ;
00966                   }
00967               }
00968 
00969           }
00970         else
00971         if(token.type_ == ';')
00972         {
00973              next_token();
00974              return false; // forward decl of badly formed class def
00975         }
00976         else
00977         if(token.type_ == CPP_Token::der)
00978         {
00979            // we are seeing someting like this:
00980            //   struct Macro(parms)<parms>  ::result<templateParms> : baseMacro(parms) {};
00981            //                               ^
00982            //
00983            // this is a multi-level specialization
00984 
00985            name=""; // we are going to reconstruct the name to refer to result
00986 
00987            while(token.type_ == CPP_Token::der)
00988            {
00989               next_token();
00990               
00991               ASSUMED_TOKEN(CPP_Token::aln);
00992 
00993               name+= token.text_;
00994               line = token.line_;
00995               file = token.file_;
00996 
00997               next_token();
00998            }
00999 
01000         }
01001         else
01002         {
01003             ASSUMED_TOKEN(CPP_Token::aln);
01004             next_token();
01005         }
01006 
01007       }
01008 
01009       while(token.type_ == CPP_Token::der)
01010       {
01011          name += token.text_;
01012          line  = token.line_;
01013          next_token();
01014 
01015          if(token.type_ != CPP_Token::aln)
01016             break;
01017 
01018          name += token.text_;
01019          line  = token.line_;
01020 
01021          next_token();
01022 
01023       }
01024 
01025       // At this point, we are here:
01026       //
01027       //   class directive(name) ::name::name::name ...
01028       //                                            ^
01029 
01030   }
01031   else
01032   if(token.type_ == '[' || token.type_ == '=')
01033   {
01034      // have syntax like this:
01035      //
01036      //   struct T array[40];
01037      //                 ^
01038 
01039      log_symbol("variable", name, file, line);
01040 
01041      while(    token.type_ != CPP_Token::eof 
01042            &&  token.type_ != ';'
01043           )
01044      {
01045          next_token();
01046      }
01047 
01048      return false;
01049 
01050   }
01051   else
01052   if(token.type_ == '<')
01053   {
01054       // we have something like
01055       //
01056       //   template<> struct Class<instanceParms> { ... };
01057 
01058       eat_template_parms();
01059       if(token.type_ == '>')
01060          next_token();
01061      
01062   }
01063 
01064 
01065 
01066   // now we know that we really are in a normal c++ class
01067   // union, or struct declaration.  The classname now gets
01068   // the added to the scope.
01069 
01070 
01071   log_symbol(class_type, name, file, line);
01072 
01073   string newScope;
01074 
01075   static string colon_colon("::");
01076 
01077 
01078   if(name[0] != ':')
01079   {
01080       newScope = outer_scope + name + colon_colon;
01081   }
01082   else
01083   {
01084      newScope = name + colon_colon;
01085   }
01086 
01087   ScopeBinder saved_scope(newScope);
01088 
01089   while(!eof() && token.type_ != '{' && token.type_ != ';')
01090     next_token();
01091 
01092   if(eof())
01093     return true;
01094 
01095   if(token.type_ != ';')
01096     {
01097       next_token(); // eat }
01098 
01099   while(token.type_ != '}')
01100   {
01101     if(token.type_ == '~')   // destructors mess up the parse_declaration logic
01102     {
01103       next_token();
01104       
01105       if(token.type_ == CPP_Token::aln)
01106       {
01107           static string tilde("~");
01108 
01109           token.text_ = tilde + token.text_;
01110       }
01111 
01112     }
01113 
01114     if(parse_declaration(true))
01115       return true;
01116   }
01117     }
01118 
01119   next_token();
01120 
01121   outer_scope = saved_scope.saved_scope;
01122 
01123   parse_variables_defined(false,handling_typedefs);  // as part of this class declaration
01124 
01125   return false;
01126 
01127 }
01128 
01129 static void eat_type_name()
01130 //
01131 // parse and discard a typename -- particularly it handles
01132 // the unsigned type's bizare variants.
01133 //
01134 {
01135   while(token.type_ == CPP_Token::aln &&
01136         ( (token.text_ == "typename") || (token.text_ == "const") )
01137        )
01138   {
01139     next_token();
01140   }
01141 
01142   if(token.text_ == "unsigned")
01143   {
01144     next_token();
01145 
01146     if(token.type_ == CPP_Token::aln &&
01147        (token.text_ == "int"    ||
01148         token.text_ == "short"  ||
01149         token.text_ == "long"   ||
01150         token.text_ == "char"
01151        )
01152       )
01153         next_token();
01154   }
01155   else
01156   {
01157     do
01158     {
01159       next_token();
01160     }
01161     while(token.type_ == CPP_Token::der);
01162 
01163   }
01164 
01165   eat_template_parms();  // if this is a template type name eat the parms
01166 
01167   while(   (!eof() && token.type_ == '*') 
01168         || (token.type_ == '&' )
01169         || (token.type_ == CPP_Token::aln && token.text_ == "const")
01170        )
01171   {
01172     next_token(); // eat normal type modifiers
01173   }
01174 
01175 }
01176 
01177 
01178 static bool parse_typedef()
01179 {
01180   next_token();  // eat the word 'typedef'
01181 
01182   if(token.text_ == "class" || token.text_ == "struct" || token.text_ == "union")
01183     return parse_class(true);
01184 
01185   if(token.text_ == "enum")
01186     return parse_enum(true);
01187 
01188   eat_type_name();
01189 
01190   parse_variables_defined(false, true);
01191                                         
01192 
01193   return false;
01194 }
01195 
01196 static bool parse_enum(bool handling_typedefs)
01197 //
01198 //  parse and log enumeration declarations
01199 //
01200 //    enum { ... } variables
01201 //    enum name {  ... } variables
01202 //    enum name variables
01203 //    enum [name] { name [=value,]... } [variables]
01204 //
01205 {
01206 
01207   next_token();  // eat 'enum'
01208 
01209   CPP_Token save = token;
01210 
01211   if(token.type_ != '{')
01212      next_token();
01213 
01214   if(token.type_ == '{')
01215   {
01216     static string enum_type("enum");
01217     static string enum_value("enumeration");
01218 
01219     if(save.text_[0] != '{')
01220         log_symbol(enum_type, save.text_, save.file_, save.line_);
01221 
01222     // parse { name [=value, ...] }
01223 
01224     next_token();  // eat '{'
01225 
01226     while(token.type_ != '}')
01227     {
01228        if(token.type_ == CPP_Token::aln)
01229        {
01230          log_symbol(enum_value, token.text_, token.file_, token.line_);
01231         
01232          next_token();
01233         
01234          if(token.type_ == '=')
01235          {
01236            // eat initializer
01237         
01238            while(!eof() && token.type_ != ',' && token.type_ != '}')
01239              next_token();
01240         
01241          }
01242         
01243        }
01244        else
01245          next_token();
01246     }
01247 
01248     next_token();  // eat the closing '}'
01249 
01250   }
01251 
01252   parse_variables_defined(false, handling_typedefs);
01253 
01254   return false;
01255 }
01256 static bool parse_namespace()
01257   //
01258   // handle namespace [optional name] ;
01259   //
01260 {
01261   next_token(); // skip the namespace keyword
01262 
01263   ScopeBinder old_scope(outer_scope);
01264 
01265   if(token.type_ == CPP_Token::aln)
01266   {
01267     log_symbol("namespace", token.text_, token.file_, token.line_, true);
01268 
01269     outer_scope += token.text_ + "::" ;
01270     next_token();
01271   }
01272   else
01273   {
01274     outer_scope = "<filescope>::";
01275   }
01276 
01277   if(token.type_ == '{')
01278   {
01279      next_token();
01280 
01281      while(!eof() && token.type_ != '}')
01282      {
01283        if(parse_declaration())
01284          return true;
01285      }
01286 
01287      next_token();
01288 
01289      return false;
01290 
01291   }
01292 
01293   return parse_declaration();
01294 
01295 }
01296 static bool parse_using()
01297 //
01298 // handle using namespace ;
01299 // or     using namespace::identifier ;
01300 //
01301 {
01302   next_token();  // eat 'using'
01303 
01304   bool is_namespace(false);
01305 
01306   if(token.text_ == "namespace")
01307   {
01308     next_token();
01309     is_namespace = true;
01310   }
01311 
01312   CPP_Token tmp(token);
01313 
01314   tmp.text_.resize(0);
01315 
01316   while(!eof() && token.type_ != ';')
01317   {
01318     if(token.type_ == CPP_Token::aln ||
01319        token.type_ == CPP_Token::der
01320       )
01321     {
01322       tmp.text_ += token.text_;
01323     }
01324 
01325     next_token();
01326   }
01327 
01328   next_token(); // eat the semicolon
01329 
01330   static string used_symbol("used");
01331 
01332   if(!is_namespace && tmp.text_.size())
01333     log_symbol(used_symbol, tmp.text_, tmp.file_, tmp.line_);
01334 
01335   return false;
01336 }
01337 
01338 static void eat_function_body()
01339 //
01340 // ignore all tokens between matching {}'s.
01341 // (leaves '}' in the stream)
01342 //
01343 {
01344   int depth=0;
01345 
01346   while(!eof())
01347   {
01348      if(token.type_ == '{')
01349        ++depth;
01350      else
01351      if(token.type_ == '}')
01352      {
01353        --depth;
01354        if(depth == 0)
01355          return;
01356      }
01357 
01358      next_token();
01359   }
01360 
01361 }
01362 
01363 static void eat_function_parms(string *parmtext)
01364 //
01365 // ignore all tokens between matching ()'s.
01366 // (leaves ')' in the stream).  The first alphanumeric
01367 // token between the paren's is stuck in parmtext if
01368 // non-zero
01369 //
01370 {
01371   int depth=0;
01372 
01373   while(!eof())
01374   {
01375      if(token.type_ == '(')
01376        ++depth;
01377      else
01378      if(token.type_ == ')')
01379      {
01380        --depth;
01381        if(depth == 0)
01382          return;
01383      }
01384 
01385      if(parmtext && depth == 1 && token.type_ == CPP_Token::aln)
01386      {
01387        *parmtext = token.text_;
01388        parmtext = 0;
01389      }
01390 
01391      next_token();
01392   }
01393 
01394 }
01395 
01396 static void eat_template_parms()
01397   // parse and discard template parms
01398   //
01399   // that is:   < stuff, more_stuff< a, b, c>, ... >
01400   // Also handles:
01401   //
01402   //   class templatename< ... >
01403   //
01404 {
01405 
01406   int depth=0;
01407 
01408   if(token.type_ == CPP_Token::aln &&
01409      token.text_ == "class"
01410     )
01411   {
01412     next_token();  // eat 'class' keyword
01413     next_token();  // eat template specialization name
01414   }
01415 
01416   if(token.type_ != '<')
01417     return;
01418 
01419   while(!eof())
01420   {
01421      if(token.type_ == '<')
01422        ++depth;
01423      else
01424      if(token.type_ == '>')
01425      {
01426        --depth;
01427        if(depth == 0)
01428          return;
01429      }
01430 
01431      next_token();
01432   }
01433 
01434 
01435 }
01436 
01437 static bool parse_funcvar(bool log_forward_func_declarations,
01438                           bool handling_typedefs
01439                          )
01440 //
01441 // parse all function and variable declarations -- and handle
01442 // any leftover trash that occurs whenever you are parsing
01443 // something that is not strictly syntactically correct.
01444 //
01445 // So instead of defining a function like this:
01446 //
01447 //    int function(parms) { body }
01448 //
01449 // This code will accept
01450 //
01451 //    trash ... trash int trash trash ... function(parms) trash .. trash { ... }
01452 //
01453 // It will also parse handle things like this:
01454 //
01455 //    trash struct { members } varname ;
01456 //
01457 // The 'trash' that is excepted includes pretty much any random tokens
01458 // and when '[', '{', the text up to the corresponding closing token
01459 // is simply ignored.
01460 //
01461 {
01462   // scan tokens until you first encounter one of
01463   //  (, [, {, struct/class/union/template or ;
01464   // that tells you that you are defining.
01465 
01466   CPP_Token tmp; // save token just before terminator
01467 
01468   if(token.text_ == "static")
01469     next_token();
01470 
01471   do
01472   {
01473 
01474 
01475 
01476     if(token.type_ == '{')
01477     {
01478       // some bizarre code fragment we misunderstood
01479 
01480       eat_function_body();
01481 
01482       next_token();  // eat the trailing '}'
01483 
01484 
01485       if(token.type_ == ',')
01486       {
01487           static string unknown_type("unknown");
01488         
01489           next_token();
01490           log_symbol(unknown_type, tmp.text_, tmp.file_, tmp.line_);
01491           return parse_funcvar(log_forward_func_declarations, handling_typedefs);
01492       }
01493       return false;
01494 
01495     }
01496 
01497     if(token.type_ == CPP_Token::aln)
01498     {
01499       if(token.text_ == "struct"   ||
01500          token.text_ == "class"    ||
01501          token.text_ == "union"    ||
01502          token.text_ == "template" ||
01503          token.text_ == "extern"
01504         )
01505         return parse_declaration();
01506 
01507       tmp = token;
01508 
01509       next_token();
01510 
01511       while(token.type_ == CPP_Token::der)
01512       {
01513         tmp.text_ += token.text_;
01514         next_token();
01515 
01516         if(token.type_ == CPP_Token::aln || token.type_ == '~')
01517         {
01518            tmp.text_ += token.text_;
01519            tmp.line_  = token.line_;
01520            tmp.file_  = token.file_;
01521 
01522            if(token.type_ == '~')
01523              {
01524                next_token();
01525                if(token.type_ == CPP_Token::aln)
01526                  {
01527                    tmp.text_ += token.text_;
01528                    tmp.line_  = token.line_;
01529                    tmp.file_  = token.file_;
01530 
01531                  }
01532              }
01533 
01534            next_token();
01535         }
01536 
01537       }
01538 
01539 
01540 
01541       if(token.type_ == '<')
01542       {
01543         eat_template_parms();
01544       }
01545 
01546     }
01547     else
01548     if(token.type_ == '=')
01549     {
01550       // scoop up and ignore a variable initializer
01551 
01552       while(token.type_ != ',' &&
01553             token.type_ != ';' &&
01554             !eof()
01555            )
01556       {
01557         if(token.type_ == '{')
01558           eat_function_body();
01559         else
01560         if(token.type_ == '(')
01561           eat_function_parms();
01562         
01563         next_token();
01564       }
01565 
01566 
01567     }
01568     else
01569       if(token.type_ == ';')
01570         return false; // not end of file (I think)
01571     else
01572       next_token();
01573 
01574     if(token.type_ == '~')
01575     {
01576       next_token();
01577 
01578       if(token.type_ == CPP_Token::aln)
01579       {
01580         static string tilde("~");
01581         
01582         token.text_ = tilde + token.text_;
01583       }
01584 
01585     }
01586 
01587     while( token.type_ == CPP_Token::der) // ::
01588     {
01589       next_token();
01590 
01591       if(token.type_ == '~')
01592       {
01593         next_token();
01594 
01595         if(token.type_ == CPP_Token::aln)
01596         {
01597           static string tilde("~");
01598         
01599           token.text_ = tilde + token.text_;
01600 
01601         }
01602 
01603       }
01604 
01605       if(token.type_ != CPP_Token::aln)
01606         break;
01607         
01608       tmp.text_ += "::";
01609       tmp.text_ += token.text_;
01610       tmp.line_  = token.line_;
01611       next_token();
01612     }
01613 
01614   }
01615   while(!eof()             &&
01616         token.type_ != '(' &&
01617         token.type_ != ';' &&
01618         token.type_ != ',' &&
01619         token.type_ != '['
01620        );
01621 
01622   bool is_function= (token.type_ == '(');
01623 
01624   string parmtext;
01625   string file = token.file_;
01626   int    line = token.line_;
01627 
01628 
01629   if(token.type_ == '(')
01630   {
01631     eat_function_parms(&parmtext);
01632     next_token();
01633   }
01634   else
01635   if(token.type_ == '[')
01636   {
01637      // eat until matching ']' -- and eat it too
01638 
01639      int depth = 0;
01640 
01641      while(!eof())
01642      {
01643        if(token.type_ == '[')
01644          ++depth;
01645        else
01646        if(token.type_ == ']')
01647        {
01648          --depth;
01649          if(depth == 0)
01650            break;
01651        }
01652 
01653        next_token();
01654 
01655      }
01656 
01657      next_token();
01658 
01659      // we should see either ';' or ',' as the token there
01660 
01661   }
01662 
01663 
01664   if(token.type_ == '(')
01665   {
01666     // we are dealing with something like a pointer to a function
01667     // where the following syntax was found:
01668     //
01669     //  int (*f)(parms) {}
01670 
01671     tmp.text_ = parmtext;
01672     tmp.file_ = file;
01673     tmp.line_ = line;
01674 
01675     eat_function_parms(&parmtext);
01676     next_token();
01677 
01678   }
01679   else
01680   if(token.type_ == ':')
01681   {
01682     // ignore the base class initalizers in a constructor's name
01683 
01684     while(!eof() && token.type_ != '{')
01685       next_token();
01686 
01687   }
01688 
01689   switch(token.type_)
01690   {
01691     case CPP_Token::eof:
01692          return true;
01693 
01694     case ';':
01695 
01696          {
01697            static string variable_type("variable");
01698            static string typename_type("typename");
01699            static string function_type("function");
01700         
01701            if(!is_function || log_forward_func_declarations)
01702             log_symbol( handling_typedefs 
01703                          ? typename_type 
01704                          : (is_function 
01705                             ? function_type
01706                             : variable_type
01707                            ),
01708                        tmp.text_,
01709                        tmp.file_,
01710                        tmp.line_
01711                       );
01712         
01713            next_token();
01714          }
01715          return false;
01716         
01717     case ',':
01718 
01719          {
01720            static string variable_type("variable");
01721            static string typename_type("typename");
01722 
01723            // do not log functions at this point -- the best you can have
01724            // in this code section is a forward declaration and we do not
01725            // log them -- only bodies.
01726 
01727 
01728            if(!is_function)
01729             log_symbol(handling_typedefs ? typename_type 
01730                                           : variable_type
01731                        , 
01732                        tmp.text_, 
01733                        tmp.file_, 
01734                        tmp.line_);
01735         
01736            next_token();
01737            return parse_funcvar(log_forward_func_declarations, handling_typedefs);
01738          }
01739     default:
01740 
01741          {
01742            static string function_type("function");
01743            static string variable_type("variable");
01744            static string typename_type("typename");
01745 
01746            if(is_function || (token.type_ != '=') )
01747                log_symbol(function_type, tmp.text_, tmp.file_, tmp.line_);
01748            else
01749                log_symbol(handling_typedefs ? typename_type : variable_type, 
01750                           tmp.text_, 
01751                           tmp.file_, 
01752                           tmp.line_);
01753 
01754          }
01755         
01756          // handle a goofy g++ variant syntax
01757         
01758            if(token.text_ == "return")
01759            {
01760              while(!eof() && token.type_ != '{')
01761                next_token();
01762            }
01763         
01764          // end goofy g++ variant syntax
01765 
01766          while(!eof() && 
01767                token.type_ != ';' && 
01768                token.type_ != '{' &&
01769                token.type_ != ','
01770               )
01771          {
01772            if(token.type_ == '(')
01773            {
01774              string trash;
01775              eat_function_parms(&trash);
01776              next_token();
01777            }
01778            else
01779              next_token();
01780          }
01781 
01782          if(token.type_ == '{')
01783          {
01784            eat_function_body();
01785            next_token();
01786         
01787            while(token.type_ == ':')
01788            {
01789              // presume we are passing a class definition with #if's in it
01790              // like this:
01791              //
01792              //   class X
01793              //   {
01794              //     X()
01795              //     #if 1
01796              //        : base1() {}
01797              //     #else
01798              //        : base2() {}   <-- the parser is now sitting at the ':'
01799              //     #endif
01800         
01801              while(token.type_ != '{')
01802                next_token();
01803         
01804              eat_function_body();
01805         
01806              next_token(); // eat the closing }
01807         
01808         
01809            }
01810         
01811          }
01812          else
01813              if(token.type_ == ',')
01814              {
01815                  next_token();
01816                  return parse_funcvar(log_forward_func_declarations, handling_typedefs);
01817              }
01818           else
01819                next_token();
01820         
01821          break;
01822         
01823   }
01824 
01825 
01826   return false;
01827 }
01828 
01829 static bool parse_extern()
01830 //
01831 //  parse extern variables and extern code definitions:
01832 //
01833 //    extern type varname;
01834 //    extern type funcname;
01835 //    extern "c" declaration;
01836 //    extern "c" { declarations ... }
01837 //
01838 {
01839   next_token();  // eat 'extern'
01840 
01841   if(token.type_ == '"')
01842   {
01843      next_token();
01844 
01845      if(token.type_ != '{')
01846      {
01847        return parse_declaration();
01848      }
01849 
01850      next_token();
01851 
01852      while(token.type_ != '}')
01853      {
01854        if(parse_declaration())
01855          return true;
01856      }
01857 
01858      next_token();
01859 
01860   }
01861   else
01862   {
01863      return parse_declaration();
01864   }
01865 
01866   return false;
01867 }
Generated on Wed Feb 29 22:50:04 2012 for CXXUtilities by  doxygen 1.6.3