muSED.h

Go to the documentation of this file.
00001 #include <cxxtls/foreach.h>
00002 #include <cxxtls/simple_regex.h>
00003 #include <cxxtls/charSetTranslator.h>
00004 #include <list>
00005 #include <memory>
00006 #include <map>
00007 #include <portable_io.h>
00008 
00012 
00013 namespace cxxtls
00014 {
00015 
00016 namespace muSED
00029 {
00030     using namespace cxxtls;
00031 
00032     // micro sed engine.  Sed is the "string editor" program on unix.
00033     // muSED lets you execute simplified sed scripts on string containers (vectors, 
00034     // arrays, lists, dequeues, etc.
00035 
00036 
00037 
00038     struct ScriptRuntimeContext;  // compiled script plus state variables and buffers
00039 
00040 //========================================================================================
00041      
00042      
00043      template<class StringIterator1, class StringIterator2>
00044      void skipBlanks(StringIterator1 &firstChar,
00045                      StringIterator2 &lastChar
00046                     )
00047      {
00053     
00054     
00055         while(    (firstChar != lastChar)
00056               &&  (*firstChar <= ' ')
00057              )
00058              ++firstChar;
00059      }
00060 
00061 //========================================================================================
00062 
00063      void expandEscapes(std::string &input)
00066          ;
00067 
00068 //========================================================================================
00069      template<class StringIterator1, class StringIterator2>
00070      bool parseEscapedString(char            delim, 
00071                              std::string    &output,
00072                              StringIterator1 &firstChar,
00073                              StringIterator2 &lastChar
00074                             )
00075      {
00076 
00092 
00093         char c;
00094 
00095         StringIterator1 savedFirst = firstChar;
00096 
00097         while(    (firstChar != lastChar)
00098               &&  (c = *firstChar)
00099               &&  (c != delim)
00100              )
00101         {
00102            if(c == '\\')
00103            {
00104               ++firstChar;
00105 
00106               if(firstChar == lastChar)
00107               {
00108                  firstChar = savedFirst;  
00109                  return false;  //  failure
00110               }
00111 
00112               output += '\\';
00113               output += *firstChar++;
00114 
00115            }
00116            else
00117            {
00118               output += c;
00119 
00120               ++firstChar;
00121            }
00122         }
00123 
00124 
00125         return true;
00126 
00127      }  // parseEscapedString()
00128 
00129 //========================================================================================
00130 
00131 
00132 
00133     struct Filter
00134 
00146 
00147     {
00148         // line number, regex, lastLine or other filter
00149 
00150         enum type { none, regex, lineNumber, lastLine };
00151 
00152         type type_;
00153 
00154         SimpleRegex regex_;
00155 
00156         size_t line_;
00157 
00158 
00159         Filter()
00160         : type_(none)
00161         , line_(0)
00162         {
00163         }
00164 
00165         Filter &operator= (Filter const &rhs)
00166         {
00167            type_ = rhs.type_;
00168 
00169            line_ = rhs.line_;
00170 
00171            if(type_ == regex)
00172            {
00173               // slight optimization, don't ccopy the regex unnecessarily
00174 
00175               regex_ = rhs.regex_;
00176            }
00177 
00178            return *this;
00179 
00180         }
00181 
00182         bool passes(size_t lineNo, bool isLast, std::string const &line)
00194             ;
00195 
00196         template<class StringIterator>
00197         bool parse(StringIterator &first, StringIterator const &last)
00211         {
00212             skipBlanks(first, last);
00213 
00214             if(first == last)
00215                return false;
00216 
00217             type_ = none;
00218 
00219             StringIterator savedFirst = first;
00220 
00221             char c = *first;
00222 
00223             switch(c)
00224             {
00225                case '/':
00226                    {
00227                       //
00228                       //  parse a regex, return false if not found (also restore cp
00229                       //
00230 
00231                       std::string buf;
00232 
00233                       ++first;
00234 
00235                       if(!parseEscapedString('/', buf, first, last))
00236                       {
00237                          first = savedFirst;
00238                          return false;
00239                       }
00240 
00241                       type_ = regex;
00242                       regex_ = buf;
00243                       ++first;  // skip the trailing /
00244 
00245                    }
00246                    return true; // if we get here!
00247 
00248                case '$':
00249                    type_ = lastLine;
00250                    ++first;
00251                    return true;
00252 
00253                default:
00254                    if(!isdigit(c))
00255                       return false;   // does not indicate an error
00256                    {
00257                       
00258                       char buffer[256];
00259 
00260                       char *p = &buffer[0];
00261                       char *e = &buffer[sizeof(buffer)-1];
00262 
00263                       while(   (p != e) 
00264                             && (first != last)
00265                             && (c = *first)
00266                             && isdigit(c)
00267                            )
00268                       {
00269                           *p++ = c;
00270                           ++first;
00271                       }
00272 
00273                       *p++ = 0;
00274 
00275                       int lineNo = -1;
00276 
00277                       sscanf(buffer, "%d", &lineNo);
00278                       
00279                       if(lineNo == -1)
00280                       {
00281                          first = savedFirst;
00282                          return false;
00283                       }
00284 
00285                       line_ = lineNo;
00286                       type_ = lineNumber;
00287 
00288                    }
00289                    return true; // if we get here
00290             }
00291 
00292             return true;
00293         }
00294 
00295         virtual std::string debugString() const;
00296 
00297 
00298 
00299 
00300     };  // struct Filter
00301 //========================================================================================
00302     struct Statement;
00303 
00304     enum LineRangeActivationState
00308     {
00309         neverExecuted=0,   
00310         inRange,           
00311         lastLine,          
00312     };
00313 
00314 
00315     typedef std::map<Statement*, LineRangeActivationState> ActivationMap;
00316 
00317 //========================================================================================
00318 
00319     struct Statement
00413     {   // struct Statement
00414 
00415          std::list<Statement *>  childStatements_;
00416          ActivationMap           childStatementActivationMap_;
00417 
00418          Statement *parentStatement_;  // parent owns me, not the other way around
00419 
00420          Statement(Statement *parentStatement)  // null parent only ok for very top level statement
00421          : parentStatement_(parentStatement)
00422          {
00423          }
00424 
00425          LineRangeActivationState activationState() ;
00426 
00427          void setActivationState(LineRangeActivationState newState);
00428 
00429          virtual bool execute(ScriptRuntimeContext *context) = 0; 
00433 
00434          void clear()
00435          {
00436             CXXTLS_FOREACH(Statement *cur, childStatements_)
00437             {
00438                delete cur;
00439             }
00440 
00441             childStatements_.clear();
00442          }
00443 
00444          void adoptChild(Statement *newbie)
00449          {
00450              childStatements_.push_back(newbie);
00451          }
00452 
00453 
00454          virtual ~Statement()
00455          {
00456             clear();
00457          }
00458 
00459          std::string debugPrefix() const
00460          {
00461              char buffer[256];
00462 
00463              snprintf(buffer, sizeof(buffer), "ST[%p]=", this);
00464 
00465              return buffer;
00466          }
00467 
00468 
00469          virtual std::string debugString() const
00470          {
00471             return "<invalid>";
00472          }
00473 
00474          static void debugPrint(Statement *p)
00475          {
00476             printf("%s\n", p->debugString().c_str());
00477             fflush(stdout);
00478          }
00479 
00480 
00481     
00482     };  // struct statement
00483 //========================================================================================
00484     struct OuterStatement
00485     :  public Statement
00489     {
00490         OuterStatement()
00491         :  Statement(0)
00492         {
00493         }
00494 
00495 
00496         virtual bool execute(ScriptRuntimeContext *context);
00497 
00498 
00499          virtual std::string debugString() const
00500          {
00501             return "<Outer>";
00502          }
00503 
00504 
00505 
00506 
00507     };
00508 //========================================================================================
00509 
00510     struct FilterableStatement
00511     :  public Statement
00544     {  // struct FilterableStatement
00545 
00546        virtual ~FilterableStatement(){}
00547 
00548        int filters_;
00549 
00550        Filter filter1_;
00551        Filter filter2_;
00552        bool   inverted_;
00553 
00554 
00555        FilterableStatement(Statement*         parent,
00556                            int                fc, 
00557                            Filter const      &f1, 
00558                            Filter const      &f2,
00559                            bool               inverted
00560                           )
00561         :  Statement(parent)
00562         ,  filters_(fc)
00563         ,  filter1_(f1)
00564         ,  filter2_(f2)
00565         ,  inverted_(inverted)
00566         {
00567         }
00568 
00569 
00570         bool active(ScriptRuntimeContext *context, Statement *statement);  
00589 
00590         virtual std::string debugStringPrefix() const;
00591 
00592             
00593     }; // struct FilterableStatement
00594 
00595 //========================================================================================
00596 
00597     struct BlockStatement
00598     : public FilterableStatement
00624     {
00625 
00626         std::auto_ptr<SimpleRegex>  whileCondition_;
00627 
00628         bool execute(ScriptRuntimeContext *context);
00629 
00630         BlockStatement(Statement *parent,
00631                        int filterCount,
00632                        Filter &fc1,
00633                        Filter &fc2,
00634                        bool    filterInverted,
00635                        SimpleRegex *condition
00636                       )
00637         :  FilterableStatement(parent, filterCount, fc1, fc2, filterInverted)
00638         ,  whileCondition_(condition)
00639         {
00640         }
00641            
00642          virtual std::string debugString() const;
00643 
00644          // note that there is no parse() function here because the compileStream() method, below(),
00645          // parses the block statement for you.
00646 
00647 
00648     }; // BlockStatement
00649 
00650 //========================================================================================
00651 
00652     struct SubstStatement
00653     :  public FilterableStatement
00680     {
00681 
00682         virtual ~SubstStatement(){}
00683 
00684         int count_;  // number of substitutions
00685 
00686         SimpleRegex target_;
00687         SimpleRegex replacement_;
00688 
00689         SubstStatement(Statement         *parent,
00690                        int                fc, 
00691                        Filter const      &f1,
00692                        Filter const      &f2,
00693                        bool               filtersInverted,
00694                        std::string const &tgt,
00695                        std::string const &rep,
00696                        int                count,
00697                        bool               insensitive
00698                       )
00699         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
00700         , count_(count)
00701         , target_(tgt, (insensitive? "i" : ""))
00702         , replacement_(rep)
00703         {
00704         }
00705 
00706         bool execute(ScriptRuntimeContext *context);
00712             
00713         template<class StringIterator1, class StringIterator2, class CompiledScript>
00714         static Statement* parse(Statement             *parent,
00715                                 int                   filterCount, 
00716                                 Filter const          &f1, 
00717                                 Filter const          &f2, 
00718                                 bool                   inverted,
00719                                 StringIterator1       &firstChar,
00720                                 StringIterator2 const &lastChar,
00721                                 size_t                 line,
00722                                 CompiledScript        &script
00723                                )
00744         {
00745         
00746             // parse the sed 's' command
00747         
00748             ++firstChar;
00749         
00750             if(firstChar == lastChar)
00751             {
00752                script.fail("missing / in s command", line);
00753                return 0;
00754             }
00755         
00756             char c = *firstChar++;
00757         
00758             if(    (c <= ' ') 
00759                ||  (c >= 'a' && c <= 'z')
00760                ||  (c >= 'A' && c <= 'Z')
00761               )
00762             {
00763                std::string tmp = "invalid s command delimiter '";
00764                tmp += c;
00765                tmp += "'";
00766         
00767                script.fail(tmp, line);
00768                return 0;
00769         
00770             }
00771         
00772             char delim = c;
00773         
00774             std::string target;
00775             std::string replacement;
00776             int count=0;
00777         
00778             // we are currently here in the parsing of an s command:
00779             //
00780             //   s/target/replacement/1
00781             //     ^
00782         
00783             if(!parseEscapedString(delim, target, firstChar, lastChar))
00784             {
00785                script.fail("badly formed LHS in s command", line);
00786                return 0;
00787             }
00788 
00789 
00790         
00791             ++firstChar;
00792         
00793             // we are currently here in the parsing of an s command:
00794             //
00795             //   s/target/replacement/1
00796             //            ^
00797         
00798             
00799             if(!parseEscapedString(delim, replacement, firstChar, lastChar))
00800             {
00801                script.fail("badly formed RHS in s command", line);
00802                return 0;
00803             }
00804         
00805             ++firstChar;
00806         
00807             if(   (firstChar == lastChar)
00808                || (   (*firstChar != '1')
00809                    && (*firstChar != 'g')
00810                   )
00811               )
00812             {
00813                script.fail("badly formed count in s command -- expected 1 or g", line);
00814                return 0;
00815                 
00816             }
00817         
00818             if(*firstChar == '1')
00819                count=1;
00820         
00821             // hah!  good syntax
00822         
00823             ++firstChar;  // skip the g or the 1
00824 
00825             bool insensitive=false;
00826 
00827             if(firstChar != lastChar && *firstChar == 'i')
00828             {
00829                ++firstChar;
00830                insensitive = true;
00831             }
00832 
00833         
00834             expandEscapes(target);       // replace \n in target with newline, and so on.
00835             expandEscapes(replacement);  // replace \n in target with newline, and so on.
00836 
00837 
00838             return new SubstStatement(   parent,
00839                                          filterCount,
00840                                          f1,
00841                                          f2,
00842                                          inverted,
00843                                          target,
00844                                          replacement,
00845                                          count,
00846                                          insensitive
00847                                      );
00848         }
00849 
00850          virtual std::string debugString() const;
00851 
00852 
00853           
00854     }; // struct SubstStatement
00855 
00856 //========================================================================================
00857 
00858     struct TranslateStatement
00859     :  public FilterableStatement
00887     {
00888 
00889         virtual ~TranslateStatement(){}
00890 
00891         std::string target_;
00892         std::string replacement_;
00893 
00894         CharSetTranslator translator_;
00895 
00896         TranslateStatement(Statement         *parent,
00897                        int                fc, 
00898                        Filter const      &f1,
00899                        Filter const      &f2,
00900                        bool               filtersInverted,
00901                        std::string const &tgt,
00902                        std::string const &rep
00903                       )
00904         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
00905         , target_(tgt)
00906         , replacement_(rep)
00907         , translator_(tgt,rep)
00908         {
00909         }
00910 
00911         bool execute(ScriptRuntimeContext *context);
00919             
00920         template<class StringIterator1, class StringIterator2, class CompiledScript>
00921         static Statement* parse(Statement             *parent,
00922                                 int                   filterCount, 
00923                                 Filter const          &f1, 
00924                                 Filter const          &f2, 
00925                                 bool                   inverted,
00926                                 StringIterator1       &firstChar,
00927                                 StringIterator2 const &lastChar,
00928                                 size_t                 line,
00929                                 CompiledScript        &script
00930                                )
00951         {
00952         
00953             // parse the sed 'y' command
00954         
00955             ++firstChar;
00956         
00957             if(firstChar == lastChar)
00958             {
00959                script.fail("missing / in s command", line);
00960                return 0;
00961             }
00962         
00963             char c = *firstChar++;
00964         
00965             if(    (c <= ' ') 
00966                ||  (c >= 'a' && c <= 'z')
00967                ||  (c >= 'A' && c <= 'Z')
00968               )
00969             {
00970                std::string tmp = "invalid y command delimiter '";
00971                tmp += c;
00972                tmp += "'";
00973         
00974                script.fail(tmp, line);
00975                return 0;
00976         
00977             }
00978         
00979             char delim = c;
00980         
00981             std::string target;
00982             std::string replacement;
00983         
00984             // we are currently here in the parsing of an y command:
00985             //
00986             //   y/target/replacement/
00987             //     ^
00988         
00989             if(!parseEscapedString(delim, target, firstChar, lastChar))
00990             {
00991                script.fail("badly formed LHS in 'y' command", line);
00992                return 0;
00993             }
00994         
00995             ++firstChar;
00996         
00997             // we are currently here in the parsing of an s command:
00998             //
00999             //   y/target/replacement/
01000             //            ^
01001         
01002             
01003             if(!parseEscapedString(delim, replacement, firstChar, lastChar))
01004             {
01005                script.fail("badly formed RHS in 'y' command", line);
01006                return 0;
01007             }
01008         
01009             ++firstChar;
01010         
01011         
01012             // hah!  good syntax
01013         
01014             expandEscapes(target);       // replace \n in target with newline, and so on.
01015             expandEscapes(replacement);  // replace \n in target with newline, and so on.
01016 
01017 
01018             return new TranslateStatement(   parent,
01019                                              filterCount,
01020                                              f1,
01021                                              f2,
01022                                              inverted,
01023                                              target,
01024                                              replacement
01025                                          );
01026         }
01027 
01028          virtual std::string debugString() const;
01029 
01030 
01031           
01032     }; // struct TranslateStatement
01033 
01034 //========================================================================================
01035 
01036     struct PrintStatement
01037     :  public FilterableStatement
01075     {
01076 
01077         virtual ~PrintStatement(){}
01078 
01079         bool firstLineOnly_;   
01080 
01081 
01082 
01083 
01084         PrintStatement(Statement         *parent,
01085                        int                fc,
01086                        Filter const      &f1,
01087                        Filter const      &f2,
01088                        bool               filtersInverted,
01089                        bool               firstLineOnly
01090                       )
01091         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
01092         , firstLineOnly_(firstLineOnly)
01093         {
01094         }
01095 
01096 
01097         bool execute(ScriptRuntimeContext *context);
01099 
01100         template<class StringIterator1, class StringIterator2, class CompiledScript>
01101         static Statement* parse(Statement             *parent,
01102                                 int                   filterCount, 
01103                                 Filter const          &f1, 
01104                                 Filter const          &f2, 
01105                                 bool                   inverted,
01106                                 StringIterator1       &firstChar,
01107                                 StringIterator2 const &lastChar,
01108                                 size_t                 line,
01109                                 CompiledScript        &script
01110                                )
01111         {
01112 
01113             bool firstLineOnly = *firstChar == 'P';
01114 
01115             ++firstChar;  // skip the p
01116 
01117             return new PrintStatement(   parent,
01118                                          filterCount,
01119                                          f1,
01120                                          f2,
01121                                          inverted,
01122                                          firstLineOnly
01123                                      );
01124         }
01125 
01126          virtual std::string debugString() const;
01127 
01128 
01129 
01130           
01131     }; // struct PrintStatement
01132 
01133 //========================================================================================
01134 
01135     struct SwapStatement
01136     :  public FilterableStatement
01147     {
01148 
01149         virtual ~SwapStatement(){}
01150 
01151         SwapStatement(Statement         *parent,
01152                        int                fc,
01153                        Filter const      &f1,
01154                        Filter const      &f2,
01155                        bool               filtersInverted
01156                       )
01157         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
01158         {
01159         }
01160 
01161 
01162         bool execute(ScriptRuntimeContext *context);
01164 
01165         template<class StringIterator1, class StringIterator2, class CompiledScript>
01166         static Statement* parse(Statement             *parent,
01167                                 int                   filterCount, 
01168                                 Filter const          &f1, 
01169                                 Filter const          &f2, 
01170                                 bool                   inverted,
01171                                 StringIterator1       &firstChar,
01172                                 StringIterator2 const &lastChar,
01173                                 size_t                 line,
01174                                 CompiledScript        &script
01175                                )
01176         {
01177 
01178             ++firstChar;  // skip the p
01179 
01180             return new SwapStatement(   parent,
01181                                          filterCount,
01182                                          f1,
01183                                          f2,
01184                                          inverted
01185                                      );
01186         }
01187 
01188          virtual std::string debugString() const;
01189 
01190 
01191           
01192     }; // struct SwapStatement
01193 
01194 
01195 //========================================================================================
01196 
01197     struct DeleteStatement
01198     :  public FilterableStatement
01213     {
01214 
01215         virtual ~DeleteStatement(){}
01216 
01217         DeleteStatement(Statement         *parent,
01218                        int                fc,
01219                        Filter const      &f1,
01220                        Filter const      &f2,
01221                        bool               filtersInverted
01222                       )
01223         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
01224         {
01225         }
01226 
01227 
01228         bool execute(ScriptRuntimeContext *context);
01230 
01231         template<class StringIterator1, class StringIterator2, class CompiledScript>
01232         static Statement* parse(Statement             *parent,
01233                                 int                   filterCount, 
01234                                 Filter const          &f1, 
01235                                 Filter const          &f2, 
01236                                 bool                   inverted,
01237                                 StringIterator1       &firstChar,
01238                                 StringIterator2 const &lastChar,
01239                                 size_t                 line,
01240                                 CompiledScript        &script
01241                                )
01242         {
01243 
01244             ++firstChar;  // skip the d
01245 
01246             return new DeleteStatement(   parent,
01247                                          filterCount,
01248                                          f1,
01249                                          f2,
01250                                          inverted
01251                                      );
01252         }
01253 
01254          virtual std::string debugString() const;
01255 
01256 
01257           
01258     }; // struct DeleteStatement
01259 
01260 
01261 //========================================================================================
01262 
01263     struct QuitStatement
01264     :  public FilterableStatement
01280     {
01281         bool printCurrent_;             
01282 
01283 
01284         virtual ~QuitStatement(){}
01285 
01286         QuitStatement(Statement         *parent,
01287                        int                fc,
01288                        Filter const      &f1,
01289                        Filter const      &f2,
01290                        bool               filtersInverted,
01291                        bool               printCurrent
01292                       )
01293         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
01294         , printCurrent_(printCurrent)
01295         {
01296         }
01297 
01298 
01299         bool execute(ScriptRuntimeContext *context);
01301 
01302         template<class StringIterator1, class StringIterator2, class CompiledScript>
01303         static Statement* parse(Statement             *parent,
01304                                 int                   filterCount, 
01305                                 Filter const          &f1, 
01306                                 Filter const          &f2, 
01307                                 bool                   inverted,
01308                                 StringIterator1       &firstChar,
01309                                 StringIterator2 const &lastChar,
01310                                 size_t                 line,
01311                                 CompiledScript        &script
01312                                )
01313         {
01314             bool printCurrent = (*firstChar) == 'q';
01315 
01316             ++firstChar;  // skip the q
01317 
01318             return new QuitStatement(   parent,
01319                                          filterCount,
01320                                          f1,
01321                                          f2,
01322                                          inverted,
01323                                          printCurrent
01324                                      );
01325         }
01326 
01327          virtual std::string debugString() const;
01328 
01329 
01330           
01331     }; // struct QuitStatement
01332 
01333 
01334 //========================================================================================
01335 
01336     struct HoldStatement
01337     :  public FilterableStatement
01350 
01351     {
01352 
01353         bool append_;
01354 
01355         virtual ~HoldStatement(){}
01356 
01357         HoldStatement(Statement         *parent,
01358                        int                fc,
01359                        Filter const      &f1,
01360                        Filter const      &f2,
01361                        bool               filtersInverted,
01362                        bool               append=false
01363                       )
01364         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
01365         , append_(append)
01366         {
01367         }
01368 
01369 
01370         bool execute(ScriptRuntimeContext *context);
01372 
01373         template<class StringIterator1, class StringIterator2, class CompiledScript>
01374         static Statement* parse(Statement             *parent,
01375                                 int                   filterCount, 
01376                                 Filter const          &f1, 
01377                                 Filter const          &f2, 
01378                                 bool                   inverted,
01379                                 StringIterator1       &firstChar,
01380                                 StringIterator2 const &lastChar,
01381                                 size_t                 line,
01382                                 CompiledScript        &script,
01383                                 bool                   append
01384                                )
01385         {
01386 
01387             ++firstChar;  // skip the p
01388 
01389             return new HoldStatement(   parent,
01390                                          filterCount,
01391                                          f1,
01392                                          f2,
01393                                          inverted,
01394                                          append
01395                                      );
01396         }
01397 
01398 
01399          virtual std::string debugString() const;
01400 
01401 
01402           
01403     }; // struct HoldStatement
01404 
01405 
01406 //========================================================================================
01407 
01408     struct GetStatement
01409     :  public FilterableStatement
01422 
01423     {
01424 
01425         bool append_;
01426 
01427         virtual ~GetStatement(){}
01428 
01429         GetStatement(Statement         *parent,
01430                        int                fc,
01431                        Filter const      &f1,
01432                        Filter const      &f2,
01433                        bool               filtersInverted,
01434                        bool               append=false
01435                       )
01436         : FilterableStatement(parent, fc, f1, f2, filtersInverted)
01437         , append_(append)
01438         {
01439         }
01440 
01441 
01442         bool execute(ScriptRuntimeContext *context);
01444 
01445         template<class StringIterator1, class StringIterator2, class CompiledScript>
01446         static Statement* parse(Statement             *parent,
01447                                 int                   filterCount, 
01448                                 Filter const          &f1, 
01449                                 Filter const          &f2, 
01450                                 bool                   inverted,
01451                                 StringIterator1       &firstChar,
01452                                 StringIterator2 const &lastChar,
01453                                 size_t                 line,
01454                                 CompiledScript        &script,
01455                                 bool                   append
01456                                )
01457         {
01458 
01459             ++firstChar;  // skip the p
01460 
01461             return new GetStatement(   parent,
01462                                          filterCount,
01463                                          f1,
01464                                          f2,
01465                                          inverted,
01466                                          append
01467                                      );
01468         }
01469 
01470          virtual std::string debugString() const;
01471 
01472 
01473           
01474     }; // struct GetStatement
01475 
01476 
01477 //========================================================================================
01478 
01479     struct CompiledScript
01490 
01491     {
01492 
01493          OuterStatement  outerStatement_; // this statement alone has a null parent
01494 
01495          void clear()
01496          {
01497              outerStatement_.clear();
01498          }
01499 
01500          ~CompiledScript()
01501          {
01502             clear();
01503          }
01504 
01505 
01506          bool ok_;  // script has valid contents.
01507 
01508          std::string error_;
01509 
01510 
01511          template<class StringContainer>
01512          CompiledScript(StringContainer const &script)
01513          : ok_(false)
01514          , error_("Error:  no script specified")
01515          {
01516 
01517              compile(script.begin(), script.end());
01518 
01519          }
01520 
01521          bool ok() const { return ok_; }
01522 
01523          std::string const &error() const { return error_; }
01524 
01525          bool execute(ScriptRuntimeContext *context)
01526          {
01527              // execute all statements on the specified line
01528 
01529              if(!ok_)
01530                 return true; // can't execute trash
01531 
01532 
01533              return outerStatement_.execute(context);
01534          }
01535 
01536 
01537          void fail(std::string const &errormsg, int line)
01539          {
01540             ok_    = false;
01541 
01542             char buffer[40];
01543 
01544             snprintf(buffer, sizeof(buffer), " at line %d",  line);
01545 
01546 
01547             error_ = errormsg + buffer;
01548 
01549             clear();  // it must all compile or none of it does.
01550             
01551          }
01552 
01553          typedef std::pair<std::string::const_iterator&, std::string::const_iterator&> CharRange;
01554 
01555 
01556          template<class LineRange>
01557          static void compileStream(  LineRange lineRange,                  // pair of REFERENCES to iterators
01558                                      CharRange charRange,                  // pair of REFERENCES to iterators
01559                                      Statement *parent,                    // owner of statements to be created
01560                                      int &line,
01561                                      CompiledScript *script
01562                                   )
01563          {
01564 
01568 
01569 
01573 
01574              std::string::const_iterator &firstChar = charRange.first;   // aliases to charRange references
01575              std::string::const_iterator &lastChar  = charRange.second;
01576 
01577 
01578              while(lineRange.first != lineRange.second)
01579              {
01580                 // parse all lines in the script
01581 
01582                 ++line;
01583 
01584 
01585                 Filter filter1;
01586                 Filter filter2;
01587                 int    filterCount=0;
01588 
01589                 while(firstChar != lastChar)
01590                 {
01591                     // process all characters on the current line, with the
01592                     // possibility that if we hit a new line within an unclosed
01593                     // curly-brace block that the current line will be replaced
01594                     // with the next line, ad infinitum, until we hit the end
01595                     // of the curly block and all it's nested components...
01596 
01597 
01598                     skipBlanks(firstChar, lastChar);
01599 
01600                     if(firstChar == lastChar)
01601                        break;
01602 
01603 
01604                     if( *firstChar == ';' || *firstChar == '\n')
01605                     {
01606                        ++firstChar;
01607                        continue;
01608                     }
01609 
01610 
01611                     if(filterCount == 0)
01612                     {
01613                         if(filter1.parse(firstChar, lastChar))
01614                         {
01615                            ++filterCount;
01616 
01617                            if(firstChar != lastChar && *firstChar == ',')
01618                            {
01619                                ++firstChar;  // skip the comma
01620 
01621                                if(filter2.parse(firstChar, lastChar))
01622                                   ++filterCount;
01623 
01624                                if(filterCount != 2)
01625                                {
01626                                   script->fail("Error, expected filter expression after ','", line);
01627                                }
01628                            }
01629                         }
01630                     }
01631 
01632 
01633 
01634                     // filterCount will tell us if there are any filters
01635                     // at all.
01636 
01637                     // now parse the statement
01638 
01639                     skipBlanks(firstChar, lastChar);
01640 
01641                     bool filterInverted = false;
01642 
01643                     if(firstChar != lastChar && *firstChar == '!')
01644                     {
01645                         filterInverted = true;
01646                         ++firstChar;
01647                         skipBlanks(firstChar, lastChar);
01648                     }
01649 
01650 
01651                     if(firstChar != lastChar)
01652                     { // there is a command still on this line
01653 
01654 
01655                        // we have a statement, now parse it!
01656 
01657                        char cmd = *firstChar;
01658 
01659                        // NOTE:  the following code is stupid.
01660                        // Since all sed commands have a unique 1 character identifier,
01661                        // we should have a table matching that character with the
01662                        // kind of statement it matches, and let the parsing code
01663                        // reside in the statement itself.  As it is, this function is
01664                        // too big and is coupled to statement contstructors...
01665 
01666 
01667                        if(cmd == '#')
01668                        {
01669                           // this line is a comment
01670                           firstChar = lastChar;
01671                        }
01672                        else
01673                        if(cmd ==  's')
01674                        {
01675 
01676                          std::auto_ptr<Statement> s(SubstStatement::parse(parent,
01677                                                                           filterCount,
01678                                                                           filter1,
01679                                                                           filter2,
01680                                                                           filterInverted,
01681                                                                           firstChar,
01682                                                                           lastChar,
01683                                                                           line,
01684                                                                           *script
01685                                                     )
01686                                               );
01687                          if(s.get())
01688                          {
01689                              parent->adoptChild(s.release());
01690                          }  
01691                          else
01692                          {
01693                             return;
01694                          }
01695 
01696                        }
01697                        else
01698                        if(cmd ==  'y')
01699                        {
01700 
01701                          std::auto_ptr<Statement> s(TranslateStatement::parse(parent,
01702                                                                           filterCount,
01703                                                                           filter1,
01704                                                                           filter2,
01705                                                                           filterInverted,
01706                                                                           firstChar,
01707                                                                           lastChar,
01708                                                                           line,
01709                                                                           *script
01710                                                     )
01711                                               );
01712                          if(s.get())
01713                          {
01714                              parent->adoptChild(s.release());
01715                          }  
01716                          else
01717                          {
01718                             return;
01719                          }
01720 
01721                        }
01722                        else
01723                        if( cmd == 'p' || cmd == 'P')
01724                        {
01725                                                  
01726                            std::auto_ptr<Statement> s(PrintStatement::parse(parent,
01727                                                                             filterCount,
01728                                                                             filter1,
01729                                                                             filter2,
01730                                                                             filterInverted,
01731                                                                             firstChar,
01732                                                                             lastChar,
01733                                                                             line,
01734                                                                             *script
01735                                                                             )
01736                                                      );
01737                            
01738                           if(s.get())
01739                           {
01740                               parent->adoptChild(s.release());
01741                           }
01742                           else
01743                           {
01744                               return;
01745                           }
01746 
01747                        }
01748                        else
01749                        if( cmd == 'd')
01750                        {
01751                                                  
01752                            std::auto_ptr<Statement> s(DeleteStatement::parse(parent,
01753                                                                             filterCount,
01754                                                                             filter1,
01755                                                                             filter2,
01756                                                                             filterInverted,
01757                                                                             firstChar,
01758                                                                             lastChar,
01759                                                                             line,
01760                                                                             *script
01761                                                                             )
01762                                                      );
01763                            
01764                           if(s.get())
01765                           {
01766                               parent->adoptChild(s.release());
01767                           }
01768                           else
01769                           {
01770                               return;
01771                           }
01772 
01773                        }
01774                        else
01775                        if( cmd == 'q' || cmd == 'Q' )
01776                        {
01777                                                  
01778                            std::auto_ptr<Statement> s(QuitStatement::parse(parent,
01779                                                                             filterCount,
01780                                                                             filter1,
01781                                                                             filter2,
01782                                                                             filterInverted,
01783                                                                             firstChar,
01784                                                                             lastChar,
01785                                                                             line,
01786                                                                             *script
01787                                                                             )
01788                                                      );
01789                            
01790                           if(s.get())
01791                           {
01792                               parent->adoptChild(s.release());
01793                           }
01794                           else
01795                           {
01796                               return;
01797                           }
01798 
01799                        }
01800                        else
01801                        if( cmd == 'h'  || cmd == 'H' )
01802                        {
01803                                                  
01804                            std::auto_ptr<Statement> s(HoldStatement::parse(parent,
01805                                                                             filterCount,
01806                                                                             filter1,
01807                                                                             filter2,
01808                                                                             filterInverted,
01809                                                                             firstChar,
01810                                                                             lastChar,
01811                                                                             line,
01812                                                                             *script,
01813                                                                             cmd == 'H'
01814                                                                             )
01815                                                      );
01816                            
01817                           if(s.get())
01818                           {
01819                               parent->adoptChild(s.release());
01820                           }
01821                           else
01822                           {
01823                               return;
01824                           }
01825 
01826                        }
01827                        else
01828                        if( cmd == 'g'  || cmd == 'G' )
01829                        {
01830                                                  
01831                            std::auto_ptr<Statement> s(GetStatement::parse(parent,
01832                                                                             filterCount,
01833                                                                             filter1,
01834                                                                             filter2,
01835                                                                             filterInverted,
01836                                                                             firstChar,
01837                                                                             lastChar,
01838                                                                             line,
01839                                                                             *script,
01840                                                                             cmd == 'G'
01841                                                                             )
01842                                                      );
01843                            
01844                           if(s.get())
01845                           {
01846                               parent->adoptChild(s.release());
01847                           }
01848                           else
01849                           {
01850                               return;
01851                           }
01852 
01853                        }
01854                        else
01855                        if( cmd == 'x')
01856                        {
01857                                                  
01858                            std::auto_ptr<Statement> s(SwapStatement::parse(parent,
01859                                                                             filterCount,
01860                                                                             filter1,
01861                                                                             filter2,
01862                                                                             filterInverted,
01863                                                                             firstChar,
01864                                                                             lastChar,
01865                                                                             line,
01866                                                                             *script
01867                                                                             )
01868                                                      );
01869                            
01870                           if(s.get())
01871                           {
01872                               parent->adoptChild(s.release());
01873                           }
01874                           else
01875                           {
01876                               return;
01877                           }
01878 
01879                        }
01880                        else
01881                        if(cmd == '{' || cmd == 'W')
01882                        {
01883                           // handle block statements here, recursively
01884 
01885                          std::auto_ptr<SimpleRegex> regex;
01886 
01887                          if(cmd == 'W')
01888                          {
01889                              // parse the regex
01890 
01891                              ++firstChar;  // skip the W
01892 
01893                              skipBlanks(firstChar, lastChar);
01894 
01895                              char c = ' ';
01896 
01897                              if(firstChar != lastChar)
01898                                  c = *firstChar;
01899 
01900                              if(    (c <= ' ') 
01901                                 ||  (c >= 'a' && c <= 'z')
01902                                 ||  (c >= 'A' && c <= 'Z')
01903                                )
01904                              {
01905                                 std::string tmp = "invalid W command regex delimiter '";
01906                                 tmp += c;
01907                                 tmp += "'";
01908                          
01909                                 script->fail(tmp, line);
01910                                 return ;
01911                          
01912                              }
01913 
01914                              std::string target;
01915                              
01916                              // we are currently here in the parsing of an s command:
01917                              //
01918                              //     /regex/
01919                              //     ^
01920                              
01921                              ++firstChar;
01922                              if(!parseEscapedString(c, target, firstChar, lastChar))
01923                              {
01924                                 script->fail("badly formed W command regex", line);
01925                                 return;
01926                              }
01927 
01928                              ++firstChar;
01929 
01930                              // we are currently here in the parsing of an s command:
01931                              //
01932                              //     /regex/
01933                              //            ^
01934 
01935                              skipBlanks(firstChar, lastChar);
01936 
01937                              if(firstChar == lastChar || *firstChar != '{')
01938                              {
01939                                 script->fail("Missing '{' after 'W' command regex", line);
01940                                 return ;
01941                              }
01942 
01943                              expandEscapes(target);
01944                              regex.reset( new SimpleRegex(target) );
01945 
01946                          }
01947 
01948 
01949                          ++firstChar;
01950 
01951                          Statement *childBlock;
01952 
01953                          parent->adoptChild( childBlock =
01954                                              new BlockStatement(parent,
01955                                                               filterCount,
01956                                                               filter1,
01957                                                               filter2,
01958                                                               filterInverted,
01959                                                               regex.release()
01960                                                              )
01961                                                 );
01962 
01963                          // note:  lineRange and charRange are pairs of references to
01964                          // iterators, so the call below passes this function's input
01965                          // references to its child.  
01966 
01967                          compileStream(lineRange, charRange, childBlock, line, script);
01968 
01969                          if( (firstChar == lastChar) || *firstChar != '}')
01970                          {
01971                             if(script->ok())
01972                                 script->fail("block statement missing }", line);
01973 
01974                             return;
01975                          }
01976 
01977                          ++firstChar;
01978 
01979 
01980 
01981                        }
01982                        else
01983                        if(cmd == '}')
01984                        {
01985                           // although this might be an error, treat the }
01986                           // character as an end of script flag.  This is
01987                           // needed or at least helpful to recursive block
01988                           // parsing.
01989 
01990                           return;
01991 
01992                        }
01993                        else
01994                        {
01995                            std::string tmp = "unexpected command: ";
01996 
01997                            tmp += cmd;
01998 
01999                            script->fail(tmp, line);
02000 
02001                            return;
02002                           
02003                        }
02004 
02005 
02006 
02007                     } // there is a command still on this line
02008 
02009 
02010                     // by this point, the statement on the currentline has been processed,
02011                     // but we must deal with whatever comes after the valid syntax on the line.
02012                     // we will handle it by skipping blanks and then interpreting what we 
02013                     // see.
02014 
02015                     skipBlanks(firstChar, lastChar);
02016 
02017                     if(firstChar != lastChar)
02018                     { // there may be more on this same line
02019 
02020                         if(*firstChar == ';')
02021                         {
02022                            ++firstChar;     // the skip the ';'
02023                            filterCount=0;
02024 
02025                         }
02026                         else
02027                         if(*firstChar == '#')
02028                         {
02029                            firstChar = lastChar;  // end of line comment
02030                         }
02031 
02032                        
02033                     } // there may be more on this same line
02034 
02035 
02036                     
02037                 }  // process the current line
02038 
02039 
02040 
02041 
02042 
02043 
02044                 ++lineRange.first;  // step to next line
02045 
02046                 if(lineRange.first != lineRange.second)
02047                 {
02048                    firstChar = lineRange.first->begin();
02049                    lastChar  = lineRange.first->end();
02050                 }
02051 
02052 
02053              }
02054 
02055              
02056          }
02057 
02058 
02059          template<class StringIterator>
02060          void compile(StringIterator first, StringIterator last)
02061          {
02062              if(first == last)
02063                return;  // leave the CompiledScript in the current
02064                         // error state
02065 
02066              ok_    = true;
02067              error_ = "";
02068 
02069              int line=0;
02070 
02071              std::string::const_iterator firstChar = first->begin(),
02072                                          lastChar  = first->end();
02073 
02074              typedef std::pair<StringIterator&, StringIterator&>                           LineRange;
02075 
02076              compileStream( LineRange(first,last), 
02077                             CharRange(firstChar,lastChar),
02078                             &outerStatement_, 
02079                             line, 
02080                             this 
02081                           );
02082 
02083          }
02084 
02085 
02086     };  // struct CompiledScript
02087 
02088 //========================================================================================
02089 
02090     struct ScriptRuntimeContext
02116     {
02117        CompiledScript &script_;
02118 
02119        bool running_;                               
02120 
02121 
02122        std::list<std::string> *output_;             
02123 
02124 
02125        size_t lineNumber_;                          
02126                                                                                                     
02127        bool   isLast_;                              
02128 
02129 
02130 
02131        std::map<Statement*, bool>  activationMap_;  
02132 
02133 
02134 
02135 
02136 
02137 
02138 
02139 
02140 
02141        std::string buffer1_, buffer2_;              
02142 
02143        std::string *pattern_;                       
02144 
02145 
02146        int maxWhileIterations_;                     
02147 
02148 
02149 
02150        ScriptRuntimeContext(CompiledScript &s)
02151        : script_(s)
02152        , running_(false)
02153        , output_(0)
02154        , lineNumber_(0)
02155        , isLast_(false)
02156        , pattern_(&buffer1_)
02157        , maxWhileIterations_(1000 * 1000)
02158        {
02159        }
02160 
02161        void stop() 
02162        {
02163           running_ = false;  // the apply() functions set this to true
02164        }
02165 
02166        void start() 
02167        {
02168           running_ = true;
02169        }
02170 
02171        bool running() const
02172        {
02173            return running_;
02174        }
02175 
02176 
02177        bool ok() const { return script_.ok(); }     
02178 
02179 
02180        std::string const &error() const { return script_.error(); }
02183 
02184        std::string *patternBuffer()
02189        {
02190            return pattern_;
02191        }
02192 
02193        std::string *holdBuffer()
02195        {
02196 
02197 
02198            if(pattern_ == &buffer1_)
02199               return &buffer2_;
02200 
02201            return &buffer1_;
02202        }
02203 
02204        void bufferSwap()
02206        {
02208 
02209           if(pattern_ == &buffer1_)
02210              pattern_ = &buffer2_;
02211           else
02212              pattern_ = &buffer1_;
02213          
02214        }
02215 
02216        void hold()
02218        {
02219            *holdBuffer() = *patternBuffer();
02220        }
02221 
02222        void Hold()
02224        {
02225           // a script will typically not execute H until it has first executed h
02226 
02227           *holdBuffer() += '\n';
02228           *holdBuffer() += *patternBuffer();
02229        }
02230 
02231        void get()
02233        {
02234           *patternBuffer() = *holdBuffer();
02235        }
02236 
02237        void Get()
02239        {
02240           *patternBuffer() += '\n';
02241           *patternBuffer() += *holdBuffer();
02242        }
02243 
02244        int maxWhileIterations() const { return maxWhileIterations_; }
02248 
02249        void setMaxWhileIterations(int count) { maxWhileIterations_ = count; }
02255 
02256 
02257 
02258 
02259        void print(bool firstLineOnly);  
02260 
02261 
02262 
02263 
02264 
02265 
02266        bool execute(std::string const &currentLine, 
02267                     size_t             lineNumber, 
02268                     bool               isLast, 
02269                     std::list<std::string> &tmp,
02270                     ActivationMap          *activationMap
02271                    )
02272        {
02273           if(!ok())
02274               return false;
02275 
02276           output_          = &tmp;
02277 
02278           *patternBuffer() = currentLine;
02279           lineNumber_      = lineNumber;
02280           isLast_          = isLast;
02281 
02282           running_ = true;  // the q command will call stop() to set this to false.
02283 
02284           return script_.execute(this);
02285        }
02286 
02287 
02288     };  // struct ScriptRuntimeContext
02289 
02290 //========================================================================================
02291 
02292     template<class StringContainer1, class StringContainer2>
02293     std::string apply(CompiledScript          &script,
02294                       StringContainer1 const  &input,
02295                       StringContainer2        &output,
02296                       int                      maxIter=0
02297                      )
02298     {
02316 
02317 
02318 
02319       ScriptRuntimeContext context(script);
02320       ActivationMap        activationMap;
02321 
02322       if(!context.ok())
02323           return context.error(); //  check the script's error if you get false here
02324       
02325 
02326       if(maxIter)
02327          context.setMaxWhileIterations(maxIter);
02328 
02329       std::string currentLine;
02330 
02331       size_t lineNo = 1;
02332 
02333 
02334 
02335       output.clear();
02336 
02337 
02338       typename StringContainer1::const_iterator first =input.begin(),
02339                                                 last  = input.end();
02340 
02341       if(first == last)
02342          return std::string();  // success
02343 
02344       context.start();
02345 
02346 
02347       do
02348       {
02349           currentLine =  *first++;
02350 
02351           bool eof = (first == last);
02352 
02353           if(apply(context, currentLine, lineNo, eof, output, &activationMap))
02354           {
02355              if(!context.running())
02356                   break;
02357           }
02358 
02359           ++lineNo;
02360 
02361       }
02362       while(first != last);
02363 
02364       return std::string();  // success
02365 
02366     }  // apply()
02367 
02368 //========================================================================================
02369 
02370     template<class ScriptContainer, class StringContainer1, class StringContainer2>
02371     std::string apply(ScriptContainer const  &scriptText, 
02372                       StringContainer1 const &input,
02373                       StringContainer2       &output,
02374                       int                     maxIter=0
02375                      )
02389 
02390     {
02391         CompiledScript script(scriptText);
02392 
02393         if(!script.ok())
02394           return script.error();
02395 
02396         apply(script, input, output, maxIter);
02397 
02398         return std::string();
02399 
02400     }
02401 
02402 //========================================================================================
02403 
02404     template<class StringContainer1, class StringContainer2>
02405     std::string oneLiner(std::string      const &scriptText, 
02406                          StringContainer1 const &input,
02407                          StringContainer2       &output,
02408                          int                     maxIter=0
02409                         )
02420 
02421     {
02422         std::list<std::string>  scriptContainer;
02423 
02424         scriptContainer.push_back(scriptText);
02425 
02426         CompiledScript script(scriptContainer);
02427 
02428         if(!script.ok())
02429             return script.error();
02430 
02431         apply(scriptContainer, input, output, maxIter);
02432 
02433         return std::string();
02434 
02435     }
02436 
02437 //========================================================================================
02438 
02439    template<class StringContainer>
02440    bool apply(ScriptRuntimeContext &context, 
02441               std::string const    &currentLine,
02442               size_t                lineNumber,
02443               bool                  isLast,
02444               StringContainer      &output,
02445               ActivationMap        *activationMap
02446              )
02474    {
02475        
02476        std::list<std::string> tmp;
02477 
02478        bool rv = context.execute(currentLine, lineNumber, isLast, tmp, activationMap);
02479 
02480        CXXTLS_FOREACH(std::string const &outputString, tmp)
02481        {
02482           output.push_back(outputString);
02483        }
02484 
02485        return rv;
02486 
02487    }
02488 
02489 
02490 
02491 
02492 
02493 
02494 
02495 
02496 
02497 }; // namespace muSED
02498 
02499 }; // namespace cxxtls
Generated on Wed Feb 29 22:50:04 2012 for CXXUtilities by  doxygen 1.6.3