00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include <cxxtls/simple_regex.h>
00027 #include <regex.h>
00028 #include <memory.h>
00029 #include <string>
00030
00031 #ifdef RE_DUP_MAX
00032 # undef RE_DUP_MAX
00033 #endif
00034
00035 #include <algorithm>
00036
00037 using namespace std;
00038
00039 namespace cxxtls
00040 {
00041
00042
00043
00044
00045
00046
00047 static char const *errors[]=
00048 {
00049 0,
00050 "no match",
00051 "invalid pattern",
00052 "collation error",
00053 "Invalid character class name",
00054 "Trailing backslash",
00055 "Invalid back reference",
00056 "Unmatched left bracket",
00057 "Parenthesis imbalance",
00058 "Unmatched \\{",
00059 "Invalid contents of \\{\\}",
00060 "Invalid range end",
00061 "Out of memory",
00062 "No preceding re for repetition operator",
00063 "Premature end",
00064 "Compiled pattern bigger than 2^16 bytes",
00065 "Unmatched ) or \\); not returned from regcomp"
00066 };
00067
00068 static char const * compile_helper(char const *string,
00069 regex_t *compiledPattern,
00070 std::string const &options
00071 )
00072
00073 {
00074 static int initialized=0;
00075
00076 if(!initialized)
00077 {
00078 initialized=1;
00079 re_set_syntax(RE_SYNTAX_GREP);
00080 }
00081
00082 int opts=0;
00083
00084 if(options.find_first_of('i') < options.size())
00085 {
00086 opts |= REG_ICASE;
00087 }
00088
00089 int err = regcomp(compiledPattern, string, opts);
00090
00091 return errors[err];
00092 }
00093
00094
00095
00096
00097
00098 SimpleRegex::
00099 SimpleRegex(string const &r, string const &options)
00100 : expression_(r)
00101 , options_(options)
00102 {
00103 match_count_ = 0;
00104
00105 if(r.size() == 2 && r[0] != '.' )
00106 {
00107
00108
00109
00110
00111 if(r[1] == '*' )
00112 {
00113 expression_ = ".";
00114 }
00115 }
00116
00117 error_ = compile_helper(expression_.c_str(),
00118 &compiled_expression,
00119 options_.c_str()
00120 );
00121
00122 ok_ = (error_ == 0);
00123
00124 }
00125
00126 SimpleRegex::
00127 SimpleRegex(char const *r, char const *options)
00128 : expression_(r)
00129 , options_(options)
00130 {
00131 match_count_ = 0;
00132
00133 if(expression_.size() == 2)
00134 {
00135 if(r[1] == '*' )
00136 {
00137 expression_ = ".";
00138 }
00139 }
00140
00141 error_ = compile_helper(expression_.c_str(),
00142 &compiled_expression,
00143 options_.c_str()
00144 );
00145
00146 ok_ = (error_ == 0);
00147
00148 }
00149
00150 SimpleRegex::
00151 SimpleRegex(SimpleRegex const &r)
00152 : expression_(r.expression_)
00153 , options_(r.options_)
00154 {
00155 match_count_ = 0;
00156
00157 error_ = compile_helper(expression_.c_str(),
00158 &compiled_expression,
00159 options_.c_str()
00160 );
00161
00162 ok_ = (error_ == 0);
00163
00164 }
00165
00166
00167
00168
00169
00170
00171 SimpleRegex &
00172 SimpleRegex::
00173 operator=(SimpleRegex const &r)
00174 {
00175 match_count_ = 0;
00176 expression_ = r.expression_;
00177 options_ = r.options_;
00178
00179 error_ = compile_helper(expression_.c_str(),
00180 &compiled_expression,
00181 options_.c_str()
00182 );
00183
00184 return *this;
00185 }
00186
00187 SimpleRegex &
00188 SimpleRegex::
00189 operator=(char const * r)
00190 {
00191 match_count_ = 0;
00192 expression_ = r;
00193 options_="";
00194
00195 error_ = compile_helper(expression_.c_str(),
00196 &compiled_expression,
00197 options_.c_str()
00198 );
00199
00200 return *this;
00201
00202 }
00203
00204 SimpleRegex &
00205 SimpleRegex::
00206 operator=(string const &r)
00207 {
00208 match_count_ = 0;
00209 expression_ = r;
00210 options_ = "";
00211
00212 error_ = compile_helper(expression_.c_str(), &compiled_expression, "");
00213
00214 return *this;
00215
00216 }
00217
00218 SimpleRegex::
00219 ~SimpleRegex()
00220 {
00221 regfree(&compiled_expression);
00222 }
00223
00224 int
00225 SimpleRegex::
00226 operator() (char const *s, int sLength) const
00227
00228
00229
00230
00231 {
00232 match_count_ = 0;
00233
00234 if(!ok_)
00235 return 0;
00236
00237 regmatch_t matches[max_matches];
00238
00239 int rc = regexec1(&compiled_expression, s, sLength, max_matches, matches, 0);
00240
00241 if(rc != 0)
00242 {
00243 error_ = errors[rc];
00244
00245 return 0;
00246 }
00247
00248 int i;
00249
00250 for(i=0; i < max_matches; ++i)
00251 {
00252
00253 if( matches[i].rm_so >= 0 )
00254 {
00255 matches_[i].offset = matches[i].rm_so;
00256 matches_[i].length = matches[i].rm_eo - matches[i].rm_so;
00257 }
00258 else
00259 {
00260 break;
00261 }
00262 }
00263
00264 if(i == 0)
00265 error_ = errors[1];
00266
00267
00268 match_count_ = i;
00269
00270 return i;
00271
00272
00273 }
00274
00275 int
00276 SimpleRegex::
00277 operator() (string const &s, int offset, int length) const
00278 {
00279 match_count_ = 0;
00280
00281 if(!ok_)
00282 return 0;
00283
00284 int string_length = s.size();
00285
00286 if(offset >= string_length)
00287 {
00288
00289
00290
00291
00292
00293 if(
00294 (expression_ == "^$")
00295 || ( (expression_.size() == 4)
00296 && (expression_[0] == '^')
00297 && (expression_[2] == '*')
00298 && (expression_[3] == '$')
00299 )
00300 )
00301 {
00302
00303
00304 }
00305 else
00306 {
00307 error_ = errors[1];
00308 return 0;
00309 }
00310 }
00311
00312 if(offset + length > string_length)
00313 length = string_length - offset;
00314
00315 char const *p = s.c_str();
00316
00317 p += offset;
00318
00319 int count = SimpleRegex::operator() (p, length);
00320
00321 if(count)
00322 {
00323 int i;
00324
00325 for(i=0; i < count; ++i)
00326 {
00327 matches_[i].offset += offset;
00328 }
00329
00330 }
00331
00332 return count;
00333
00334 }
00335
00336
00337 string
00338 SimpleRegex::
00339 expand(string const &original, string const &in) const
00340 {
00341
00342
00343
00344 string rv;
00345
00346 size_t offset=0;
00347
00348 size_t in_size = in.size();
00349
00350 while(offset < in_size)
00351 {
00352 string::const_iterator start = in.begin() + offset;
00353 string::const_iterator end = in.end();
00354 string::const_iterator slash = find(start, end, '\\');
00355
00356 if(slash == end)
00357 {
00358
00359
00360 rv.append(start, end);
00361 return rv;
00362
00363 }
00364 else
00365 {
00366
00367
00368 rv.append(start, slash);
00369
00370 ++slash;
00371
00372 if(slash == end)
00373 return rv;
00374
00375 char c = *slash;
00376
00377 if(c >= '0' && c < '9')
00378 {
00379
00380
00381 ++slash;
00382
00383 c -= '0';
00384
00385
00386
00387 if( c < match_count_)
00388 {
00389
00390 rv.append(original.begin() + matches_[int(c)].offset,
00391 original.begin() + matches_[int(c)].offset +
00392 matches_[int(c)].length
00393 );
00394 }
00395
00396 }
00397 else
00398 {
00399 rv += *slash++;
00400
00401 }
00402
00403 offset = slash - in.begin();
00404
00405 }
00406 }
00407
00408 return rv;
00409
00410 }
00411
00412
00413 string
00414 SimpleRegex::
00415 substitute(string const &original, string const &replacement) const
00416 {
00417 if(match_count_ == 0)
00418 return original;
00419
00420
00421
00422 string real_replacement = expand(original, replacement);
00423
00424 string rv;
00425
00426 size_t start = matches_[0].offset;
00427 size_t end = start + matches_[0].length;
00428
00429 rv.append(original.begin(),
00430 original.begin() + start
00431 );
00432
00433 rv.append(real_replacement);
00434
00435 rv.append(original.begin() + end, original.end() );
00436
00437 return rv;
00438 }
00439
00440 bool
00441 SimpleRegex::
00442 anchored() const
00443 {
00444 if(expression_.size() && expression_[0] == '^')
00445 return true;
00446
00447 return false;
00448 }
00449
00450 bool
00451 SimpleRegex::
00452 replace(string &rv, string const &source, string const &replacement, int maxIterations) const
00453
00454
00455 {
00456 SimpleRegex const &expr = *this;
00457
00458 bool matchFound=false;
00459
00460 string const &pattern = expr;
00461
00462 if(pattern.size() && pattern[0] == '^')
00463 {
00464
00465
00466
00467 maxIterations = 1;
00468 }
00469
00470
00471
00472 {
00473 rv=source;
00474
00475 int offset = 0;
00476 int length = rv.size();
00477
00478 if(expr.empty())
00479 maxIterations = 0;
00480 else
00481 if(maxIterations && expr.anchored())
00482 maxIterations = 1;
00483
00484 while(maxIterations--)
00485 {
00486 int matchCount = expr(rv, offset, length);
00487
00488 if(matchCount)
00489 {
00490
00491
00492 string result = expr.substitute(rv, replacement);
00493
00494 int sizeDelta = int(result.size()) - int(rv.size());
00495
00496 SimpleRegex::match const *matches = expr.matches();
00497
00498 int endOfMatch = matches[0].offset + matches[0].length;
00499
00500 offset = endOfMatch;
00501
00502 offset += sizeDelta;
00503
00504 length = result.size() - offset;
00505
00506 matchFound = true;
00507
00508 rv = result;
00509
00510 }
00511 else
00512 {
00513 break;
00514 }
00515
00516 }
00517
00518 }
00519
00520 return matchFound;
00521 }
00522 }