00001
00002 #ifdef __BORLANDC__
00003 #define alloca alloca
00004
00005 extern void *alloca(unsigned);
00006
00007 #endif
00008
00009 #ifdef _MSC_VER
00010 #include <malloc.h>
00011 #endif
00012
00013 #if defined(__sparc) || defined __hpux
00014 #include <alloca.h>
00015 #endif
00016
00017 #define const
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 #if defined (_AIX) && !defined (REGEX_MALLOC)
00042 #pragma alloca
00043 #endif
00044
00045 #define _GNU_SOURCE
00046
00047
00048 #include <sys/types.h>
00049
00050 #ifdef HAVE_CONFIG_H
00051 #include "config.h"
00052 #endif
00053
00054
00055
00056 #ifdef emacs
00057
00058 #include "lisp.h"
00059 #include "buffer.h"
00060 #include "syntax.h"
00061
00062
00063 #undef NULL
00064
00065 #else
00066
00067
00068
00069 #if HAVE_STRING_H || STDC_HEADERS
00070 #include <string.h>
00071 #ifndef bcmp
00072 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00073 #endif
00074 #ifndef bcopy
00075 #define bcopy(s, d, n) memcpy ((d), (s), (n))
00076 #endif
00077 #ifndef bzero
00078 #define bzero(s, n) memset ((s), 0, (n))
00079 #endif
00080 #else
00081 #include <strings.h>
00082 #endif
00083
00084 #ifdef STDC_HEADERS
00085 #include <stdlib.h>
00086 #else
00087 char *malloc ();
00088 char *realloc ();
00089 #endif
00090
00091
00092
00093
00094
00095
00096 #ifndef Sword
00097 #define Sword 1
00098 #endif
00099
00100 #ifdef SYNTAX_TABLE
00101
00102 extern char *re_syntax_table;
00103
00104 #else
00105
00106
00107 #define CHAR_SET_SIZE 256
00108
00109 static char re_syntax_table[CHAR_SET_SIZE];
00110
00111 static void
00112 init_syntax_once ()
00113 {
00114 register int c;
00115 static int done = 0;
00116
00117 if (done)
00118 return;
00119
00120 bzero (re_syntax_table, sizeof re_syntax_table);
00121
00122 for (c = 'a'; c <= 'z'; c++)
00123 re_syntax_table[c] = Sword;
00124
00125 for (c = 'A'; c <= 'Z'; c++)
00126 re_syntax_table[c] = Sword;
00127
00128 for (c = '0'; c <= '9'; c++)
00129 re_syntax_table[c] = Sword;
00130
00131 re_syntax_table['_'] = Sword;
00132
00133 done = 1;
00134 }
00135
00136 #endif
00137
00138 #define SYNTAX(c) re_syntax_table[c]
00139
00140 #endif
00141
00142
00143 #include "regex.h"
00144
00145
00146 #include <ctype.h>
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158 #if ! defined (isascii) || defined (STDC_HEADERS)
00159 #undef isascii
00160 #define isascii(c) 1
00161 #endif
00162
00163 #ifdef isblank
00164 #define ISBLANK(c) (isascii (c) && isblank (c))
00165 #else
00166 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00167 #endif
00168 #ifdef isgraph
00169 #define ISGRAPH(c) (isascii (c) && isgraph (c))
00170 #else
00171 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
00172 #endif
00173
00174 #define ISPRINT(c) (isascii (c) && isprint (c))
00175 #define ISDIGIT(c) (isascii (c) && isdigit (c))
00176 #define ISALNUM(c) (isascii (c) && isalnum (c))
00177 #define ISALPHA(c) (isascii (c) && isalpha (c))
00178 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
00179 #define ISLOWER(c) (isascii (c) && islower (c))
00180 #define ISPUNCT(c) (isascii (c) && ispunct (c))
00181 #define ISSPACE(c) (isascii (c) && isspace (c))
00182 #define ISUPPER(c) (isascii (c) && isupper (c))
00183 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
00184
00185 #ifndef NULL
00186 #define NULL 0
00187 #endif
00188
00189
00190
00191
00192
00193 #undef SIGN_EXTEND_CHAR
00194 #if __STDC__
00195 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00196 #else
00197
00198 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00199 #endif
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211 #ifdef REGEX_MALLOC
00212
00213 #define REGEX_ALLOCATE malloc
00214 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00215
00216 #else
00217
00218
00219 #ifndef alloca
00220
00221
00222 #ifdef __GNUC__
00223 #define alloca __builtin_alloca
00224 #else
00225 #if HAVE_ALLOCA_H
00226 #include <alloca.h>
00227 #else
00228 #ifndef _AIX
00229 char *alloca ();
00230 #endif
00231 #endif
00232 #endif
00233
00234 #endif
00235
00236 #define REGEX_ALLOCATE alloca
00237
00238
00239 #define REGEX_REALLOCATE(source, osize, nsize) \
00240 (destination = (char *) alloca (nsize), \
00241 bcopy (source, destination, osize), \
00242 destination)
00243
00244 #endif
00245
00246
00247
00248
00249
00250 #define FIRST_STRING_P(ptr) \
00251 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00252
00253
00254 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00255 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00256 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00257
00258 #define BYTEWIDTH 8
00259
00260 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00261
00262 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00263 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00264
00265 typedef char boolean;
00266 #define false 0
00267 #define true 1
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 typedef enum
00279 {
00280 no_op = 0,
00281
00282
00283 exactn = 1,
00284
00285
00286 anychar,
00287
00288
00289
00290
00291
00292
00293
00294 charset,
00295
00296
00297
00298 charset_not,
00299
00300
00301
00302
00303
00304
00305
00306
00307 start_memory,
00308
00309
00310
00311
00312
00313
00314
00315
00316 stop_memory,
00317
00318
00319
00320 duplicate,
00321
00322
00323 begline,
00324
00325
00326 endline,
00327
00328
00329
00330 begbuf,
00331
00332
00333 endbuf,
00334
00335
00336 jump,
00337
00338
00339 jump_past_alt,
00340
00341
00342
00343 on_failure_jump,
00344
00345
00346
00347 on_failure_keep_string_jump,
00348
00349
00350
00351 pop_failure_jump,
00352
00353
00354
00355
00356
00357
00358
00359
00360 maybe_pop_jump,
00361
00362
00363
00364
00365
00366
00367 dummy_failure_jump,
00368
00369
00370
00371 push_dummy_failure,
00372
00373
00374
00375 succeed_n,
00376
00377
00378
00379 jump_n,
00380
00381
00382
00383
00384 set_number_at,
00385
00386 wordchar,
00387 notwordchar,
00388
00389 wordbeg,
00390 wordend,
00391
00392 wordbound,
00393 notwordbound
00394
00395 #ifdef emacs
00396 ,before_dot,
00397 at_dot,
00398 after_dot,
00399
00400
00401
00402 syntaxspec,
00403
00404
00405 notsyntaxspec
00406 #endif
00407 } re_opcode_t;
00408
00409
00410
00411
00412
00413 #define STORE_NUMBER(destination, number) \
00414 do { \
00415 (destination)[0] = (number) & 0377; \
00416 (destination)[1] = (number) >> 8; \
00417 } while (0)
00418
00419
00420
00421
00422
00423 #define STORE_NUMBER_AND_INCR(destination, number) \
00424 do { \
00425 STORE_NUMBER (destination, number); \
00426 (destination) += 2; \
00427 } while (0)
00428
00429
00430
00431
00432 #define EXTRACT_NUMBER(destination, source) \
00433 do { \
00434 (destination) = *(source) & 0377; \
00435 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00436 } while (0)
00437
00438 #ifdef DEBUG
00439 static void
00440 extract_number (dest, source)
00441 int *dest;
00442 unsigned char *source;
00443 {
00444 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00445 *dest = *source & 0377;
00446 *dest += temp << 8;
00447 }
00448
00449 #ifndef EXTRACT_MACROS
00450 #undef EXTRACT_NUMBER
00451 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00452 #endif
00453
00454 #endif
00455
00456
00457
00458
00459 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
00460 do { \
00461 EXTRACT_NUMBER (destination, source); \
00462 (source) += 2; \
00463 } while (0)
00464
00465 #ifdef DEBUG
00466 static void
00467 extract_number_and_incr (destination, source)
00468 int *destination;
00469 unsigned char **source;
00470 {
00471 extract_number (destination, *source);
00472 *source += 2;
00473 }
00474
00475 #ifndef EXTRACT_MACROS
00476 #undef EXTRACT_NUMBER_AND_INCR
00477 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00478 extract_number_and_incr (&dest, &src)
00479 #endif
00480
00481 #endif
00482
00483
00484
00485
00486
00487
00488
00489 #ifdef DEBUG
00490
00491
00492 #include <portable_io.h>
00493
00494
00495 #include <assert.h>
00496
00497 static int debug = 0;
00498
00499 #define DEBUG_STATEMENT(e) e
00500 #define DEBUG_PRINT1(x) if (debug) printf (x)
00501 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00502 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00503 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00504 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00505 if (debug) print_partial_compiled_pattern (s, e)
00506 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00507 if (debug) print_double_string (w, s1, sz1, s2, sz2)
00508
00509
00510 extern void printchar ();
00511
00512
00513
00514 void
00515 print_fastmap (fastmap)
00516 char *fastmap;
00517 {
00518 unsigned was_a_range = 0;
00519 unsigned i = 0;
00520
00521 while (i < (1 << BYTEWIDTH))
00522 {
00523 if (fastmap[i++])
00524 {
00525 was_a_range = 0;
00526 printchar (i - 1);
00527 while (i < (1 << BYTEWIDTH) && fastmap[i])
00528 {
00529 was_a_range = 1;
00530 i++;
00531 }
00532 if (was_a_range)
00533 {
00534 printf ("-");
00535 printchar (i - 1);
00536 }
00537 }
00538 }
00539 putchar ('\n');
00540 }
00541
00542
00543
00544
00545
00546 void
00547 print_partial_compiled_pattern (start, end)
00548 unsigned char *start;
00549 unsigned char *end;
00550 {
00551 int mcnt, mcnt2;
00552 unsigned char *p = start;
00553 unsigned char *pend = end;
00554
00555 if (start == NULL)
00556 {
00557 printf ("(null)\n");
00558 return;
00559 }
00560
00561
00562 while (p < pend)
00563 {
00564 printf ("%d:\t", p - start);
00565
00566 switch ((re_opcode_t) *p++)
00567 {
00568 case no_op:
00569 printf ("/no_op");
00570 break;
00571
00572 case exactn:
00573 mcnt = *p++;
00574 printf ("/exactn/%d", mcnt);
00575 do
00576 {
00577 putchar ('/');
00578 printchar (*p++);
00579 }
00580 while (--mcnt);
00581 break;
00582
00583 case start_memory:
00584 mcnt = *p++;
00585 printf ("/start_memory/%d/%d", mcnt, *p++);
00586 break;
00587
00588 case stop_memory:
00589 mcnt = *p++;
00590 printf ("/stop_memory/%d/%d", mcnt, *p++);
00591 break;
00592
00593 case duplicate:
00594 printf ("/duplicate/%d", *p++);
00595 break;
00596
00597 case anychar:
00598 printf ("/anychar");
00599 break;
00600
00601 case charset:
00602 case charset_not:
00603 {
00604 register int c, last = -100;
00605 register int in_range = 0;
00606
00607 printf ("/charset [%s",
00608 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00609
00610 assert (p + *p < pend);
00611
00612 for (c = 0; c < 256; c++)
00613 if (c / 8 < *p
00614 && (p[1 + (c/8)] & (1 << (c % 8))))
00615 {
00616
00617 if (last + 1 == c && ! in_range)
00618 {
00619 putchar ('-');
00620 in_range = 1;
00621 }
00622
00623 else if (last + 1 != c && in_range)
00624 {
00625 printchar (last);
00626 in_range = 0;
00627 }
00628
00629 if (! in_range)
00630 printchar (c);
00631
00632 last = c;
00633 }
00634
00635 if (in_range)
00636 printchar (last);
00637
00638 putchar (']');
00639
00640 p += 1 + *p;
00641 }
00642 break;
00643
00644 case begline:
00645 printf ("/begline");
00646 break;
00647
00648 case endline:
00649 printf ("/endline");
00650 break;
00651
00652 case on_failure_jump:
00653 extract_number_and_incr (&mcnt, &p);
00654 printf ("/on_failure_jump to %d", p + mcnt - start);
00655 break;
00656
00657 case on_failure_keep_string_jump:
00658 extract_number_and_incr (&mcnt, &p);
00659 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00660 break;
00661
00662 case dummy_failure_jump:
00663 extract_number_and_incr (&mcnt, &p);
00664 printf ("/dummy_failure_jump to %d", p + mcnt - start);
00665 break;
00666
00667 case push_dummy_failure:
00668 printf ("/push_dummy_failure");
00669 break;
00670
00671 case maybe_pop_jump:
00672 extract_number_and_incr (&mcnt, &p);
00673 printf ("/maybe_pop_jump to %d", p + mcnt - start);
00674 break;
00675
00676 case pop_failure_jump:
00677 extract_number_and_incr (&mcnt, &p);
00678 printf ("/pop_failure_jump to %d", p + mcnt - start);
00679 break;
00680
00681 case jump_past_alt:
00682 extract_number_and_incr (&mcnt, &p);
00683 printf ("/jump_past_alt to %d", p + mcnt - start);
00684 break;
00685
00686 case jump:
00687 extract_number_and_incr (&mcnt, &p);
00688 printf ("/jump to %d", p + mcnt - start);
00689 break;
00690
00691 case succeed_n:
00692 extract_number_and_incr (&mcnt, &p);
00693 extract_number_and_incr (&mcnt2, &p);
00694 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
00695 break;
00696
00697 case jump_n:
00698 extract_number_and_incr (&mcnt, &p);
00699 extract_number_and_incr (&mcnt2, &p);
00700 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
00701 break;
00702
00703 case set_number_at:
00704 extract_number_and_incr (&mcnt, &p);
00705 extract_number_and_incr (&mcnt2, &p);
00706 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
00707 break;
00708
00709 case wordbound:
00710 printf ("/wordbound");
00711 break;
00712
00713 case notwordbound:
00714 printf ("/notwordbound");
00715 break;
00716
00717 case wordbeg:
00718 printf ("/wordbeg");
00719 break;
00720
00721 case wordend:
00722 printf ("/wordend");
00723
00724 #ifdef emacs
00725 case before_dot:
00726 printf ("/before_dot");
00727 break;
00728
00729 case at_dot:
00730 printf ("/at_dot");
00731 break;
00732
00733 case after_dot:
00734 printf ("/after_dot");
00735 break;
00736
00737 case syntaxspec:
00738 printf ("/syntaxspec");
00739 mcnt = *p++;
00740 printf ("/%d", mcnt);
00741 break;
00742
00743 case notsyntaxspec:
00744 printf ("/notsyntaxspec");
00745 mcnt = *p++;
00746 printf ("/%d", mcnt);
00747 break;
00748 #endif
00749
00750 case wordchar:
00751 printf ("/wordchar");
00752 break;
00753
00754 case notwordchar:
00755 printf ("/notwordchar");
00756 break;
00757
00758 case begbuf:
00759 printf ("/begbuf");
00760 break;
00761
00762 case endbuf:
00763 printf ("/endbuf");
00764 break;
00765
00766 default:
00767 printf ("?%d", *(p-1));
00768 }
00769
00770 putchar ('\n');
00771 }
00772
00773 printf ("%d:\tend of pattern.\n", p - start);
00774 }
00775
00776
00777 void
00778 print_compiled_pattern (bufp)
00779 struct re_pattern_buffer *bufp;
00780 {
00781 unsigned char *buffer = bufp->buffer;
00782
00783 print_partial_compiled_pattern (buffer, buffer + bufp->used);
00784 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
00785
00786 if (bufp->fastmap_accurate && bufp->fastmap)
00787 {
00788 printf ("fastmap: ");
00789 print_fastmap (bufp->fastmap);
00790 }
00791
00792 printf ("re_nsub: %d\t", bufp->re_nsub);
00793 printf ("regs_alloc: %d\t", bufp->regs_allocated);
00794 printf ("can_be_null: %d\t", bufp->can_be_null);
00795 printf ("newline_anchor: %d\n", bufp->newline_anchor);
00796 printf ("no_sub: %d\t", bufp->no_sub);
00797 printf ("not_bol: %d\t", bufp->not_bol);
00798 printf ("not_eol: %d\t", bufp->not_eol);
00799 printf ("syntax: %d\n", bufp->syntax);
00800
00801 }
00802
00803
00804 void
00805 print_double_string (where, string1, size1, string2, size2)
00806 const char *where;
00807 const char *string1;
00808 const char *string2;
00809 int size1;
00810 int size2;
00811 {
00812 unsigned this_char;
00813
00814 if (where == NULL)
00815 printf ("(null)");
00816 else
00817 {
00818 if (FIRST_STRING_P (where))
00819 {
00820 for (this_char = where - string1; this_char < size1; this_char++)
00821 printchar (string1[this_char]);
00822
00823 where = string2;
00824 }
00825
00826 for (this_char = where - string2; this_char < size2; this_char++)
00827 printchar (string2[this_char]);
00828 }
00829 }
00830
00831 #else
00832
00833 #undef assert
00834 #define assert(e)
00835
00836 #define DEBUG_STATEMENT(e)
00837 #define DEBUG_PRINT1(x)
00838 #define DEBUG_PRINT2(x1, x2)
00839 #define DEBUG_PRINT3(x1, x2, x3)
00840 #define DEBUG_PRINT4(x1, x2, x3, x4)
00841 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00842 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00843
00844 #endif
00845
00846
00847
00848
00849 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859 reg_syntax_t
00860 re_set_syntax (syntax)
00861 reg_syntax_t syntax;
00862 {
00863 reg_syntax_t ret = re_syntax_options;
00864
00865 re_syntax_options = syntax;
00866 return ret;
00867 }
00868
00869
00870
00871
00872 static const char *re_error_msg[] =
00873 { NULL,
00874 "No match",
00875 "Invalid regular expression",
00876 "Invalid collation character",
00877 "Invalid character class name",
00878 "Trailing backslash",
00879 "Invalid back reference",
00880 "Unmatched [ or [^",
00881 "Unmatched ( or \\(",
00882 "Unmatched \\{",
00883 "Invalid content of \\{\\}",
00884 "Invalid range end",
00885 "Memory exhausted",
00886 "Invalid preceding regular expression",
00887 "Premature end of regular expression",
00888 "Regular expression too big",
00889 "Unmatched ) or \\)",
00890 };
00891
00892
00893
00894
00895
00896
00897 typedef int pattern_offset_t;
00898
00899
00900
00901 typedef unsigned regnum_t;
00902
00903 typedef struct
00904 {
00905 pattern_offset_t begalt_offset;
00906 pattern_offset_t fixup_alt_jump;
00907 pattern_offset_t inner_group_offset;
00908 pattern_offset_t laststart_offset;
00909 regnum_t regnum;
00910 } compile_stack_elt_t;
00911
00912
00913 typedef struct
00914 {
00915 compile_stack_elt_t *stack;
00916 unsigned size;
00917 unsigned avail;
00918 } compile_stack_type;
00919
00920 static void store_op1 (
00921 #ifdef __STDC__
00922 re_opcode_t op,
00923 unsigned char *loc,
00924 int arg
00925 #endif
00926 );
00927
00928 static void insert_op1
00929 (
00930 #ifdef __STDC__
00931 re_opcode_t op, unsigned char *loc, int arg, unsigned char *end
00932 #endif
00933 );
00934
00935 static void store_op2 (
00936 #ifdef __STDC__
00937 re_opcode_t op,
00938 unsigned char *loc,
00939 int arg1,
00940 int arg2
00941 #endif
00942 );
00943
00944 static void insert_op2
00945 (
00946 #ifdef __STDC__
00947 re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end
00948 #endif
00949
00950 );
00951
00952 static boolean at_begline_loc_p
00953 (
00954 #ifdef __STDC__
00955 char const *,char const *, reg_syntax_t
00956 #endif
00957
00958 );
00959
00960 static boolean at_endline_loc_p (
00961 #ifdef __STDC__
00962 const char *p,
00963 const char *pend,
00964 int syntax
00965 #endif
00966 );
00967
00968 static boolean group_in_compile_stack
00969 (
00970 #ifdef __STDC__
00971 compile_stack_type compile_stack,
00972 regnum_t regnum
00973 #endif
00974 );
00975 static reg_errcode_t compile_range(
00976 #ifdef __STDC__
00977 const char **p_ptr,
00978 const char *pend,
00979 char *translate,
00980 reg_syntax_t syntax,
00981 unsigned char *b
00982 #endif
00983
00984 );
00985
00986
00987
00988
00989 #define PATFETCH(c) \
00990 do {if (p == pend) return REG_EEND; \
00991 c = (unsigned char) *p++; \
00992 if (translate) c = translate[c]; \
00993 } while (0)
00994
00995
00996
00997 #define PATFETCH_RAW(c) \
00998 do {if (p == pend) return REG_EEND; \
00999 c = (unsigned char) *p++; \
01000 } while (0)
01001
01002
01003 #define PATUNFETCH p--
01004
01005
01006
01007
01008
01009
01010 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
01011
01012
01013
01014
01015
01016 #define INIT_BUF_SIZE 32
01017
01018
01019 #define GET_BUFFER_SPACE(n) \
01020 while (b - bufp->buffer + (n) > bufp->allocated) \
01021 EXTEND_BUFFER ()
01022
01023
01024 #define BUF_PUSH(c) \
01025 do { \
01026 GET_BUFFER_SPACE (1); \
01027 *b++ = (unsigned char) (c); \
01028 } while (0)
01029
01030
01031
01032 #define BUF_PUSH_2(c1, c2) \
01033 do { \
01034 GET_BUFFER_SPACE (2); \
01035 *b++ = (unsigned char) (c1); \
01036 *b++ = (unsigned char) (c2); \
01037 } while (0)
01038
01039
01040
01041 #define BUF_PUSH_3(c1, c2, c3) \
01042 do { \
01043 GET_BUFFER_SPACE (3); \
01044 *b++ = (unsigned char) (c1); \
01045 *b++ = (unsigned char) (c2); \
01046 *b++ = (unsigned char) (c3); \
01047 } while (0)
01048
01049
01050
01051
01052 #define STORE_JUMP(op, loc, to) \
01053 store_op1 (op, loc, (to) - (loc) - 3)
01054
01055
01056 #define STORE_JUMP2(op, loc, to, arg) \
01057 store_op2 (op, loc, (to) - (loc) - 3, arg)
01058
01059
01060 #define INSERT_JUMP(op, loc, to) \
01061 insert_op1 (op, loc, (to) - (loc) - 3, b)
01062
01063
01064 #define INSERT_JUMP2(op, loc, to, arg) \
01065 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
01066
01067
01068
01069
01070
01071 #define MAX_BUF_SIZE (1L << 16)
01072
01073
01074
01075
01076
01077
01078 #define EXTEND_BUFFER() \
01079 do { \
01080 unsigned char *old_buffer = bufp->buffer; \
01081 if (bufp->allocated == MAX_BUF_SIZE) \
01082 return REG_ESIZE; \
01083 bufp->allocated <<= 1; \
01084 if (bufp->allocated > MAX_BUF_SIZE) \
01085 bufp->allocated = MAX_BUF_SIZE; \
01086 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
01087 if (bufp->buffer == NULL) \
01088 return REG_ESPACE; \
01089 \
01090 if (old_buffer != bufp->buffer) \
01091 { \
01092 b = (b - old_buffer) + bufp->buffer; \
01093 begalt = (begalt - old_buffer) + bufp->buffer; \
01094 if (fixup_alt_jump) \
01095 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01096 if (laststart) \
01097 laststart = (laststart - old_buffer) + bufp->buffer; \
01098 if (pending_exact) \
01099 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
01100 } \
01101 } while (0)
01102
01103
01104
01105
01106
01107 #define MAX_REGNUM 255
01108
01109
01110
01111
01112 #define INIT_COMPILE_STACK_SIZE 32
01113
01114 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
01115 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
01116
01117
01118 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01119
01120
01121
01122 #define SET_LIST_BIT(c) \
01123 (b[((unsigned char) (c)) / BYTEWIDTH] \
01124 |= 1 << (((unsigned char) c) % BYTEWIDTH))
01125
01126
01127
01128 #define GET_UNSIGNED_NUMBER(num) \
01129 { if (p != pend) \
01130 { \
01131 PATFETCH (c); \
01132 while (ISDIGIT (c)) \
01133 { \
01134 if (num < 0) \
01135 num = 0; \
01136 num = num * 10 + c - '0'; \
01137 if (p == pend) \
01138 break; \
01139 PATFETCH (c); \
01140 } \
01141 } \
01142 }
01143
01144 #define CHAR_CLASS_MAX_LENGTH 6
01145
01146 #define IS_CHAR_CLASS(string) \
01147 (STREQ (string, "alpha") || STREQ (string, "upper") \
01148 || STREQ (string, "lower") || STREQ (string, "digit") \
01149 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
01150 || STREQ (string, "space") || STREQ (string, "print") \
01151 || STREQ (string, "punct") || STREQ (string, "graph") \
01152 || STREQ (string, "cntrl") || STREQ (string, "blank"))
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172 #ifdef __STDC__
01173 static reg_errcode_t
01174 regex_compile (char const*pattern, int size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
01175 #else
01176 static reg_errcode_t
01177 regex_compile (pattern, size, syntax, bufp)
01178 char const*pattern;
01179 int size;
01180 reg_syntax_t syntax;
01181 struct re_pattern_buffer *bufp;
01182
01183 #endif
01184 {
01185
01186
01187
01188 register unsigned char c, c1;
01189
01190
01191 const char *p1;
01192
01193
01194 register unsigned char *b;
01195
01196
01197 compile_stack_type compile_stack;
01198
01199
01200 const char *p = pattern;
01201 const char *pend = pattern + size;
01202
01203
01204 char *translate = bufp->translate;
01205
01206
01207
01208
01209
01210 unsigned char *pending_exact = 0;
01211
01212
01213
01214
01215 unsigned char *laststart = 0;
01216
01217
01218 unsigned char *begalt;
01219
01220
01221
01222 const char *beg_interval;
01223
01224
01225
01226
01227 unsigned char *fixup_alt_jump = 0;
01228
01229
01230
01231
01232 regnum_t regnum = 0;
01233
01234 #ifdef DEBUG
01235 DEBUG_PRINT1 ("\nCompiling pattern: ");
01236 if (debug)
01237 {
01238 unsigned debug_count;
01239
01240 for (debug_count = 0; debug_count < size; debug_count++)
01241 printchar (pattern[debug_count]);
01242 putchar ('\n');
01243 }
01244 #endif
01245
01246
01247 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01248 if (compile_stack.stack == NULL)
01249 return REG_ESPACE;
01250
01251 compile_stack.size = INIT_COMPILE_STACK_SIZE;
01252 compile_stack.avail = 0;
01253
01254
01255 bufp->syntax = syntax;
01256 bufp->fastmap_accurate = 0;
01257 bufp->not_bol = bufp->not_eol = 0;
01258
01259
01260
01261
01262 bufp->used = 0;
01263
01264
01265 bufp->re_nsub = 0;
01266
01267 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01268
01269 init_syntax_once ();
01270 #endif
01271
01272 if (bufp->allocated == 0)
01273 {
01274 if (bufp->buffer)
01275 {
01276
01277
01278 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01279 }
01280 else
01281 {
01282 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01283 }
01284 if (!bufp->buffer) return REG_ESPACE;
01285
01286 bufp->allocated = INIT_BUF_SIZE;
01287 }
01288
01289 begalt = b = bufp->buffer;
01290
01291
01292 while (p != pend)
01293 {
01294 PATFETCH (c);
01295
01296 switch (c)
01297 {
01298 case '^':
01299 {
01300 if (
01301 p == pattern + 1
01302
01303 || syntax & RE_CONTEXT_INDEP_ANCHORS
01304
01305 || at_begline_loc_p (pattern, p, syntax))
01306 BUF_PUSH (begline);
01307 else
01308 goto normal_char;
01309 }
01310 break;
01311
01312
01313 case '$':
01314 {
01315 if (
01316 p == pend
01317
01318 || syntax & RE_CONTEXT_INDEP_ANCHORS
01319
01320 || at_endline_loc_p (p, pend, syntax))
01321 BUF_PUSH (endline);
01322 else
01323 goto normal_char;
01324 }
01325 break;
01326
01327
01328 case '+':
01329 case '?':
01330 if ((syntax & RE_BK_PLUS_QM)
01331 || (syntax & RE_LIMITED_OPS))
01332 goto normal_char;
01333 handle_plus:
01334 case '*':
01335
01336 if (!laststart)
01337 {
01338 if (syntax & RE_CONTEXT_INVALID_OPS)
01339 return REG_BADRPT;
01340 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01341 goto normal_char;
01342 }
01343
01344 {
01345
01346 boolean keep_string_p = false;
01347
01348
01349 char zero_times_ok = 0, many_times_ok = 0;
01350
01351
01352
01353
01354
01355
01356 for (;;)
01357 {
01358 zero_times_ok |= c != '+';
01359 many_times_ok |= c != '?';
01360
01361 if (p == pend)
01362 break;
01363
01364 PATFETCH (c);
01365
01366 if (c == '*'
01367 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01368 ;
01369
01370 else if (syntax & RE_BK_PLUS_QM && c == '\\')
01371 {
01372 if (p == pend) return REG_EESCAPE;
01373
01374 PATFETCH (c1);
01375 if (!(c1 == '+' || c1 == '?'))
01376 {
01377 PATUNFETCH;
01378 PATUNFETCH;
01379 break;
01380 }
01381
01382 c = c1;
01383 }
01384 else
01385 {
01386 PATUNFETCH;
01387 break;
01388 }
01389
01390
01391 }
01392
01393
01394
01395 if (!laststart)
01396 break;
01397
01398
01399
01400 if (many_times_ok)
01401 {
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411 assert (p - 1 > pattern);
01412
01413
01414 GET_BUFFER_SPACE (3);
01415
01416
01417
01418
01419
01420
01421 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
01422 && zero_times_ok
01423 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
01424 && !(syntax & RE_DOT_NEWLINE))
01425 {
01426 STORE_JUMP (jump, b, laststart);
01427 keep_string_p = true;
01428 }
01429 else
01430
01431 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
01432
01433
01434 b += 3;
01435 }
01436
01437
01438
01439 GET_BUFFER_SPACE (3);
01440 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
01441 : on_failure_jump,
01442 laststart, b + 3);
01443 pending_exact = 0;
01444 b += 3;
01445
01446 if (!zero_times_ok)
01447 {
01448
01449
01450
01451
01452
01453 GET_BUFFER_SPACE (3);
01454 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
01455 b += 3;
01456 }
01457 }
01458 break;
01459
01460
01461 case '.':
01462 laststart = b;
01463 BUF_PUSH (anychar);
01464 break;
01465
01466
01467 case '[':
01468 {
01469 boolean had_char_class = false;
01470
01471 if (p == pend) return REG_EBRACK;
01472
01473
01474
01475 GET_BUFFER_SPACE (34);
01476
01477 laststart = b;
01478
01479
01480
01481 BUF_PUSH (*p == '^' ? charset_not : charset);
01482 if (*p == '^')
01483 p++;
01484
01485
01486 p1 = p;
01487
01488
01489 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
01490
01491
01492 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
01493
01494
01495 if ((re_opcode_t) b[-2] == charset_not
01496 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
01497 SET_LIST_BIT ('\n');
01498
01499
01500 for (;;)
01501 {
01502 if (p == pend) return REG_EBRACK;
01503
01504 PATFETCH (c);
01505
01506
01507 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
01508 {
01509 if (p == pend) return REG_EESCAPE;
01510
01511 PATFETCH (c1);
01512 SET_LIST_BIT (c1);
01513 continue;
01514 }
01515
01516
01517
01518
01519 if (c == ']' && p != p1 + 1)
01520 break;
01521
01522
01523
01524 if (had_char_class && c == '-' && *p != ']')
01525 return REG_ERANGE;
01526
01527
01528
01529
01530
01531 if (c == '-'
01532 && !(p - 2 >= pattern && p[-2] == '[')
01533 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
01534 && *p != ']')
01535 {
01536 reg_errcode_t ret
01537 = compile_range (&p, pend, translate, syntax, b);
01538 if (ret != REG_NOERROR) return ret;
01539 }
01540
01541 else if (p[0] == '-' && p[1] != ']')
01542 {
01543 reg_errcode_t ret;
01544
01545
01546 PATFETCH (c1);
01547
01548 ret = compile_range (&p, pend, translate, syntax, b);
01549 if (ret != REG_NOERROR) return ret;
01550 }
01551
01552
01553
01554
01555 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
01556 {
01557 char str[CHAR_CLASS_MAX_LENGTH + 1];
01558
01559 PATFETCH (c);
01560 c1 = 0;
01561
01562
01563 if (p == pend) return REG_EBRACK;
01564
01565 for (;;)
01566 {
01567 PATFETCH (c);
01568 if (c == ':' || c == ']' || p == pend
01569 || c1 == CHAR_CLASS_MAX_LENGTH)
01570 break;
01571 str[c1++] = c;
01572 }
01573 str[c1] = '\0';
01574
01575
01576
01577
01578 if (c == ':' && *p == ']')
01579 {
01580 int ch;
01581 boolean is_alnum = STREQ (str, "alnum");
01582 boolean is_alpha = STREQ (str, "alpha");
01583 boolean is_blank = STREQ (str, "blank");
01584 boolean is_cntrl = STREQ (str, "cntrl");
01585 boolean is_digit = STREQ (str, "digit");
01586 boolean is_graph = STREQ (str, "graph");
01587 boolean is_lower = STREQ (str, "lower");
01588 boolean is_print = STREQ (str, "print");
01589 boolean is_punct = STREQ (str, "punct");
01590 boolean is_space = STREQ (str, "space");
01591 boolean is_upper = STREQ (str, "upper");
01592 boolean is_xdigit = STREQ (str, "xdigit");
01593
01594 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
01595
01596
01597
01598 PATFETCH (c);
01599
01600 if (p == pend) return REG_EBRACK;
01601
01602 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
01603 {
01604 if ( (is_alnum && ISALNUM (ch))
01605 || (is_alpha && ISALPHA (ch))
01606 || (is_blank && ISBLANK (ch))
01607 || (is_cntrl && ISCNTRL (ch))
01608 || (is_digit && ISDIGIT (ch))
01609 || (is_graph && ISGRAPH (ch))
01610 || (is_lower && ISLOWER (ch))
01611 || (is_print && ISPRINT (ch))
01612 || (is_punct && ISPUNCT (ch))
01613 || (is_space && ISSPACE (ch))
01614 || (is_upper && ISUPPER (ch))
01615 || (is_xdigit && ISXDIGIT (ch)))
01616 SET_LIST_BIT (ch);
01617 }
01618 had_char_class = true;
01619 }
01620 else
01621 {
01622 c1++;
01623 while (c1--)
01624 PATUNFETCH;
01625 SET_LIST_BIT ('[');
01626 SET_LIST_BIT (':');
01627 had_char_class = false;
01628 }
01629 }
01630 else
01631 {
01632 had_char_class = false;
01633 SET_LIST_BIT (c);
01634 }
01635 }
01636
01637
01638
01639 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
01640 b[-1]--;
01641 b += b[-1];
01642 }
01643 break;
01644
01645
01646 case '(':
01647 if (syntax & RE_NO_BK_PARENS)
01648 goto handle_open;
01649 else
01650 goto normal_char;
01651
01652
01653 case ')':
01654 if (syntax & RE_NO_BK_PARENS)
01655 goto handle_close;
01656 else
01657 goto normal_char;
01658
01659
01660 case '\n':
01661 if (syntax & RE_NEWLINE_ALT)
01662 goto handle_alt;
01663 else
01664 goto normal_char;
01665
01666
01667 case '|':
01668 if (syntax & RE_NO_BK_VBAR)
01669 goto handle_alt;
01670 else
01671 goto normal_char;
01672
01673
01674 case '{':
01675 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
01676 goto handle_interval;
01677 else
01678 goto normal_char;
01679
01680
01681 case '\\':
01682 if (p == pend) return REG_EESCAPE;
01683
01684
01685
01686
01687 PATFETCH_RAW (c);
01688
01689 switch (c)
01690 {
01691 case '(':
01692 if (syntax & RE_NO_BK_PARENS)
01693 goto normal_backslash;
01694
01695 handle_open:
01696 bufp->re_nsub++;
01697 regnum++;
01698
01699 if (COMPILE_STACK_FULL)
01700 {
01701 RETALLOC (compile_stack.stack, compile_stack.size << 1,
01702 compile_stack_elt_t);
01703 if (compile_stack.stack == NULL) return REG_ESPACE;
01704
01705 compile_stack.size <<= 1;
01706 }
01707
01708
01709
01710
01711
01712 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
01713 COMPILE_STACK_TOP.fixup_alt_jump
01714 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
01715 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
01716 COMPILE_STACK_TOP.regnum = regnum;
01717
01718
01719
01720
01721
01722 if (regnum <= MAX_REGNUM)
01723 {
01724 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
01725 BUF_PUSH_3 (start_memory, regnum, 0);
01726 }
01727
01728 compile_stack.avail++;
01729
01730 fixup_alt_jump = 0;
01731 laststart = 0;
01732 begalt = b;
01733
01734
01735
01736 pending_exact = 0;
01737 break;
01738
01739
01740 case ')':
01741 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
01742
01743 if (COMPILE_STACK_EMPTY)
01744 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
01745 goto normal_backslash;
01746 else
01747 return REG_ERPAREN;
01748
01749 handle_close:
01750 if (fixup_alt_jump)
01751 {
01752
01753
01754
01755 BUF_PUSH (push_dummy_failure);
01756
01757
01758
01759 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
01760 }
01761
01762
01763 if (COMPILE_STACK_EMPTY)
01764 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
01765 goto normal_char;
01766 else
01767 return REG_ERPAREN;
01768
01769
01770
01771 assert (compile_stack.avail != 0);
01772 {
01773
01774
01775
01776 regnum_t this_group_regnum;
01777
01778 compile_stack.avail--;
01779 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
01780 fixup_alt_jump
01781 = COMPILE_STACK_TOP.fixup_alt_jump
01782 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
01783 : 0;
01784 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
01785 this_group_regnum = COMPILE_STACK_TOP.regnum;
01786
01787
01788
01789 pending_exact = 0;
01790
01791
01792
01793 if (this_group_regnum <= MAX_REGNUM)
01794 {
01795 unsigned char *inner_group_loc
01796 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
01797
01798 *inner_group_loc = regnum - this_group_regnum;
01799 BUF_PUSH_3 (stop_memory, this_group_regnum,
01800 regnum - this_group_regnum);
01801 }
01802 }
01803 break;
01804
01805
01806 case '|':
01807 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
01808 goto normal_backslash;
01809 handle_alt:
01810 if (syntax & RE_LIMITED_OPS)
01811 goto normal_char;
01812
01813
01814
01815 GET_BUFFER_SPACE (3);
01816 INSERT_JUMP (on_failure_jump, begalt, b + 6);
01817 pending_exact = 0;
01818 b += 3;
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836 if (fixup_alt_jump)
01837 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
01838
01839
01840
01841
01842 fixup_alt_jump = b;
01843 GET_BUFFER_SPACE (3);
01844 b += 3;
01845
01846 laststart = 0;
01847 begalt = b;
01848 break;
01849
01850
01851 case '{':
01852
01853 if (!(syntax & RE_INTERVALS)
01854
01855
01856 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
01857 || (p - 2 == pattern && p == pend))
01858 goto normal_backslash;
01859
01860 handle_interval:
01861 {
01862
01863
01864
01865 int lower_bound = -1, upper_bound = -1;
01866
01867 beg_interval = p - 1;
01868
01869 if (p == pend)
01870 {
01871 if (syntax & RE_NO_BK_BRACES)
01872 goto unfetch_interval;
01873 else
01874 return REG_EBRACE;
01875 }
01876
01877 GET_UNSIGNED_NUMBER (lower_bound);
01878
01879 if (c == ',')
01880 {
01881 GET_UNSIGNED_NUMBER (upper_bound);
01882 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
01883 }
01884 else
01885
01886 upper_bound = lower_bound;
01887
01888 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
01889 || lower_bound > upper_bound)
01890 {
01891 if (syntax & RE_NO_BK_BRACES)
01892 goto unfetch_interval;
01893 else
01894 return REG_BADBR;
01895 }
01896
01897 if (!(syntax & RE_NO_BK_BRACES))
01898 {
01899 if (c != '\\') return REG_EBRACE;
01900
01901 PATFETCH (c);
01902 }
01903
01904 if (c != '}')
01905 {
01906 if (syntax & RE_NO_BK_BRACES)
01907 goto unfetch_interval;
01908 else
01909 return REG_BADBR;
01910 }
01911
01912
01913
01914
01915 if (!laststart)
01916 {
01917 if (syntax & RE_CONTEXT_INVALID_OPS)
01918 return REG_BADRPT;
01919 else if (syntax & RE_CONTEXT_INDEP_OPS)
01920 laststart = b;
01921 else
01922 goto unfetch_interval;
01923 }
01924
01925
01926
01927
01928 if (upper_bound == 0)
01929 {
01930 GET_BUFFER_SPACE (3);
01931 INSERT_JUMP (jump, laststart, b + 3);
01932 b += 3;
01933 }
01934
01935
01936
01937
01938
01939
01940
01941
01942
01943
01944 else
01945 {
01946
01947 unsigned nbytes = 10 + (upper_bound > 1) * 10;
01948
01949 GET_BUFFER_SPACE (nbytes);
01950
01951
01952
01953
01954
01955
01956 INSERT_JUMP2 (succeed_n, laststart,
01957 b + 5 + (upper_bound > 1) * 5,
01958 lower_bound);
01959 b += 5;
01960
01961
01962
01963
01964
01965 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
01966 b += 5;
01967
01968 if (upper_bound > 1)
01969 {
01970
01971
01972
01973
01974
01975
01976 STORE_JUMP2 (jump_n, b, laststart + 5,
01977 upper_bound - 1);
01978 b += 5;
01979
01980
01981
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992
01993
01994 insert_op2 (set_number_at, laststart, b - laststart,
01995 upper_bound - 1, b);
01996 b += 5;
01997 }
01998 }
01999 pending_exact = 0;
02000 beg_interval = NULL;
02001 }
02002 break;
02003
02004 unfetch_interval:
02005
02006 assert (beg_interval);
02007 p = beg_interval;
02008 beg_interval = NULL;
02009
02010
02011 PATFETCH (c);
02012
02013 if (!(syntax & RE_NO_BK_BRACES))
02014 {
02015 if (p > pattern && p[-1] == '\\')
02016 goto normal_backslash;
02017 }
02018 goto normal_char;
02019
02020 #ifdef emacs
02021
02022
02023 case '=':
02024 BUF_PUSH (at_dot);
02025 break;
02026
02027 case 's':
02028 laststart = b;
02029 PATFETCH (c);
02030 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02031 break;
02032
02033 case 'S':
02034 laststart = b;
02035 PATFETCH (c);
02036 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02037 break;
02038 #endif
02039
02040
02041 case 'w':
02042 laststart = b;
02043 BUF_PUSH (wordchar);
02044 break;
02045
02046
02047 case 'W':
02048 laststart = b;
02049 BUF_PUSH (notwordchar);
02050 break;
02051
02052
02053 case '<':
02054 BUF_PUSH (wordbeg);
02055 break;
02056
02057 case '>':
02058 BUF_PUSH (wordend);
02059 break;
02060
02061 case 'b':
02062 BUF_PUSH (wordbound);
02063 break;
02064
02065 case 'B':
02066 BUF_PUSH (notwordbound);
02067 break;
02068
02069 case '`':
02070 BUF_PUSH (begbuf);
02071 break;
02072
02073 case '\'':
02074 BUF_PUSH (endbuf);
02075 break;
02076
02077 case '1': case '2': case '3': case '4': case '5':
02078 case '6': case '7': case '8': case '9':
02079 if (syntax & RE_NO_BK_REFS)
02080 goto normal_char;
02081
02082 c1 = c - '0';
02083
02084 if (c1 > regnum)
02085 return REG_ESUBREG;
02086
02087
02088 if (group_in_compile_stack (compile_stack, c1))
02089 goto normal_char;
02090
02091 laststart = b;
02092 BUF_PUSH_2 (duplicate, c1);
02093 break;
02094
02095
02096 case '+':
02097 case '?':
02098 if (syntax & RE_BK_PLUS_QM)
02099 goto handle_plus;
02100 else
02101 goto normal_backslash;
02102
02103 default:
02104 normal_backslash:
02105
02106
02107
02108 c = TRANSLATE (c);
02109 goto normal_char;
02110 }
02111 break;
02112
02113
02114 default:
02115
02116 normal_char:
02117
02118 if (!pending_exact
02119
02120
02121 || pending_exact + *pending_exact + 1 != b
02122
02123
02124 || *pending_exact == (1 << BYTEWIDTH) - 1
02125
02126
02127 || *p == '*' || *p == '^'
02128 || ((syntax & RE_BK_PLUS_QM)
02129 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02130 : (*p == '+' || *p == '?'))
02131 || ((syntax & RE_INTERVALS)
02132 && ((syntax & RE_NO_BK_BRACES)
02133 ? *p == '{'
02134 : (p[0] == '\\' && p[1] == '{'))))
02135 {
02136
02137
02138 laststart = b;
02139
02140 BUF_PUSH_2 (exactn, 0);
02141 pending_exact = b - 1;
02142 }
02143
02144 BUF_PUSH (c);
02145 (*pending_exact)++;
02146 break;
02147 }
02148 }
02149
02150
02151
02152
02153 if (fixup_alt_jump)
02154 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02155
02156 if (!COMPILE_STACK_EMPTY)
02157 return REG_EPAREN;
02158
02159 free (compile_stack.stack);
02160
02161
02162 bufp->used = b - bufp->buffer;
02163
02164 #ifdef DEBUG
02165 if (debug)
02166 {
02167 DEBUG_PRINT1 ("\nCompiled pattern: \n");
02168 print_compiled_pattern (bufp);
02169 }
02170 #endif
02171
02172 return REG_NOERROR;
02173 }
02174
02175
02176
02177
02178
02179 static void
02180 store_op1 (op, loc, arg)
02181 re_opcode_t op;
02182 unsigned char *loc;
02183 int arg;
02184 {
02185 *loc = (unsigned char) op;
02186 STORE_NUMBER (loc + 1, arg);
02187 }
02188
02189
02190
02191
02192 static void
02193 store_op2 (op, loc, arg1, arg2)
02194 re_opcode_t op;
02195 unsigned char *loc;
02196 int arg1, arg2;
02197 {
02198 *loc = (unsigned char) op;
02199 STORE_NUMBER (loc + 1, arg1);
02200 STORE_NUMBER (loc + 3, arg2);
02201 }
02202
02203
02204
02205
02206
02207 static void
02208 insert_op1 (op, loc, arg, end)
02209 re_opcode_t op;
02210 unsigned char *loc;
02211 int arg;
02212 unsigned char *end;
02213 {
02214 register unsigned char *pfrom = end;
02215 register unsigned char *pto = end + 3;
02216
02217 while (pfrom != loc)
02218 *--pto = *--pfrom;
02219
02220 store_op1 (op, loc, arg);
02221 }
02222
02223
02224
02225
02226 static void
02227 insert_op2 (op, loc, arg1, arg2, end)
02228 re_opcode_t op;
02229 unsigned char *loc;
02230 int arg1, arg2;
02231 unsigned char *end;
02232 {
02233 register unsigned char *pfrom = end;
02234 register unsigned char *pto = end + 5;
02235
02236 while (pfrom != loc)
02237 *--pto = *--pfrom;
02238
02239 store_op2 (op, loc, arg1, arg2);
02240 }
02241
02242
02243
02244
02245
02246
02247 static boolean
02248 at_begline_loc_p (pattern, p, syntax)
02249 const char *pattern, *p;
02250 reg_syntax_t syntax;
02251 {
02252 const char *prev = p - 2;
02253 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02254
02255 return
02256
02257 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02258
02259 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02260 }
02261
02262
02263
02264
02265
02266 static boolean
02267 at_endline_loc_p (p, pend, syntax)
02268 const char *p, *pend;
02269 int syntax;
02270 {
02271 const char *next = p;
02272 boolean next_backslash = *next == '\\';
02273 const char *next_next = p + 1 < pend ? p + 1 : NULL;
02274
02275 return
02276
02277 (syntax & RE_NO_BK_PARENS ? *next == ')'
02278 : next_backslash && next_next && *next_next == ')')
02279
02280 || (syntax & RE_NO_BK_VBAR ? *next == '|'
02281 : next_backslash && next_next && *next_next == '|');
02282 }
02283
02284
02285
02286
02287
02288 static boolean
02289 group_in_compile_stack (compile_stack, regnum)
02290 compile_stack_type compile_stack;
02291 regnum_t regnum;
02292 {
02293 int this_element;
02294
02295 for (this_element = compile_stack.avail - 1;
02296 this_element >= 0;
02297 this_element--)
02298 if (compile_stack.stack[this_element].regnum == regnum)
02299 return true;
02300
02301 return false;
02302 }
02303
02304
02305
02306
02307
02308
02309
02310
02311
02312
02313
02314
02315
02316 static reg_errcode_t
02317 compile_range (p_ptr, pend, translate, syntax, b)
02318 const char **p_ptr, *pend;
02319 char *translate;
02320 reg_syntax_t syntax;
02321 unsigned char *b;
02322 {
02323 unsigned this_char;
02324
02325 const char *p = *p_ptr;
02326 int range_start, range_end;
02327
02328 if (p == pend)
02329 return REG_ERANGE;
02330
02331
02332
02333
02334
02335
02336
02337
02338 range_start = ((unsigned char *) p)[-2];
02339 range_end = ((unsigned char *) p)[0];
02340
02341
02342
02343 (*p_ptr)++;
02344
02345
02346 if (range_start > range_end)
02347 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
02348
02349
02350
02351
02352
02353 for (this_char = range_start; this_char <= range_end; this_char++)
02354 {
02355 SET_LIST_BIT (TRANSLATE (this_char));
02356 }
02357
02358 return REG_NOERROR;
02359 }
02360
02361
02362
02363
02364
02365
02366
02367
02368
02369 #ifndef INIT_FAILURE_ALLOC
02370 #define INIT_FAILURE_ALLOC 5
02371 #endif
02372
02373
02374
02375
02376
02377 int re_max_failures = 2000;
02378
02379 typedef const unsigned char *fail_stack_elt_t;
02380
02381 typedef struct
02382 {
02383 fail_stack_elt_t *stack;
02384 unsigned size;
02385 unsigned avail;
02386 } fail_stack_type;
02387
02388 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
02389 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
02390 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
02391 #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
02392
02393
02394
02395
02396 #define INIT_FAIL_STACK() \
02397 do { \
02398 fail_stack.stack = (fail_stack_elt_t *) \
02399 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
02400 \
02401 if (fail_stack.stack == NULL) \
02402 return -2; \
02403 \
02404 fail_stack.size = INIT_FAILURE_ALLOC; \
02405 fail_stack.avail = 0; \
02406 } while (0)
02407
02408
02409
02410
02411
02412
02413
02414
02415
02416 #define DOUBLE_FAIL_STACK(fail_stack) \
02417 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
02418 ? 0 \
02419 : ((fail_stack).stack = (fail_stack_elt_t *) \
02420 REGEX_REALLOCATE ((fail_stack).stack, \
02421 (fail_stack).size * sizeof (fail_stack_elt_t), \
02422 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
02423 \
02424 (fail_stack).stack == NULL \
02425 ? 0 \
02426 : ((fail_stack).size <<= 1, \
02427 1)))
02428
02429
02430
02431
02432
02433
02434 #define PUSH_PATTERN_OP(pattern_op, fail_stack) \
02435 ((FAIL_STACK_FULL () \
02436 && !DOUBLE_FAIL_STACK (fail_stack)) \
02437 ? 0 \
02438 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
02439 1))
02440
02441
02442
02443
02444 #define PUSH_FAILURE_ITEM(item) \
02445 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
02446
02447
02448 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
02449
02450
02451 #ifdef DEBUG
02452 #define DEBUG_PUSH PUSH_FAILURE_ITEM
02453 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
02454 #else
02455 #define DEBUG_PUSH(item)
02456 #define DEBUG_POP(item_addr)
02457 #endif
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
02470 do { \
02471 char *destination; \
02472
02473 \
02474 int this_reg; \
02475 \
02476 DEBUG_STATEMENT (failure_id++); \
02477 DEBUG_STATEMENT (nfailure_points_pushed++); \
02478 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
02479 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
02480 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
02481 \
02482 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
02483 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
02484 \
02485 \
02486 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
02487 { \
02488 if (!DOUBLE_FAIL_STACK (fail_stack)) \
02489 return failure_code; \
02490 \
02491 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
02492 (fail_stack).size); \
02493 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
02494 } \
02495 \
02496 \
02497 DEBUG_PRINT1 ("\n"); \
02498 \
02499 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
02500 this_reg++) \
02501 { \
02502 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
02503 DEBUG_STATEMENT (num_regs_pushed++); \
02504 \
02505 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
02506 PUSH_FAILURE_ITEM (regstart[this_reg]); \
02507 \
02508 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
02509 PUSH_FAILURE_ITEM (regend[this_reg]); \
02510 \
02511 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
02512 DEBUG_PRINT2 (" match_null=%d", \
02513 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
02514 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
02515 DEBUG_PRINT2 (" matched_something=%d", \
02516 MATCHED_SOMETHING (reg_info[this_reg])); \
02517 DEBUG_PRINT2 (" ever_matched=%d", \
02518 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
02519 DEBUG_PRINT1 ("\n"); \
02520 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
02521 } \
02522 \
02523 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
02524 PUSH_FAILURE_ITEM (lowest_active_reg); \
02525 \
02526 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
02527 PUSH_FAILURE_ITEM (highest_active_reg); \
02528 \
02529 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
02530 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
02531 PUSH_FAILURE_ITEM (pattern_place); \
02532 \
02533 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
02534 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
02535 size2); \
02536 DEBUG_PRINT1 ("'\n"); \
02537 PUSH_FAILURE_ITEM (string_place); \
02538 \
02539 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
02540 DEBUG_PUSH (failure_id); \
02541 } while (0)
02542
02543
02544
02545 #define NUM_REG_ITEMS 3
02546
02547
02548 #ifdef DEBUG
02549 #define NUM_NONREG_ITEMS 5
02550 #else
02551 #define NUM_NONREG_ITEMS 4
02552 #endif
02553
02554
02555 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
02556
02557
02558 #define NUM_FAILURE_ITEMS \
02559 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
02560 + NUM_NONREG_ITEMS)
02561
02562
02563 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
02564
02565
02566
02567
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577
02578 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
02579 { \
02580 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
02581 int this_reg; \
02582 const unsigned char *string_temp; \
02583 \
02584 assert (!FAIL_STACK_EMPTY ()); \
02585 \
02586 \
02587 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
02588 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
02589 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
02590 \
02591 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
02592 \
02593 DEBUG_POP (&failure_id); \
02594 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
02595 \
02596
02597
02598 \
02599 string_temp = POP_FAILURE_ITEM (); \
02600 if (string_temp != NULL) \
02601 str = (const char *) string_temp; \
02602 \
02603 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
02604 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
02605 DEBUG_PRINT1 ("'\n"); \
02606 \
02607 pat = (unsigned char *) POP_FAILURE_ITEM (); \
02608 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
02609 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
02610 \
02611 \
02612 high_reg = (unsigned) POP_FAILURE_ITEM (); \
02613 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
02614 \
02615 low_reg = (unsigned) POP_FAILURE_ITEM (); \
02616 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
02617 \
02618 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
02619 { \
02620 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
02621 \
02622 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
02623 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
02624 \
02625 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
02626 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
02627 \
02628 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
02629 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
02630 } \
02631 \
02632 DEBUG_STATEMENT (nfailure_points_popped++); \
02633 }
02634
02635
02636
02637
02638
02639
02640
02641
02642
02643
02644
02645
02646
02647
02648 int
02649 re_compile_fastmap (bufp)
02650 struct re_pattern_buffer *bufp;
02651 {
02652 int j, k;
02653 fail_stack_type fail_stack;
02654 #ifndef REGEX_MALLOC
02655 char *destination;
02656 #endif
02657
02658 unsigned num_regs = 0;
02659
02660 register char *fastmap = bufp->fastmap;
02661 unsigned char *pattern = bufp->buffer;
02662 unsigned long size = bufp->used;
02663 const unsigned char *p = pattern;
02664 register unsigned char *pend = pattern + size;
02665
02666
02667
02668
02669
02670 boolean path_can_be_null = true;
02671
02672
02673 boolean succeed_n_p = false;
02674
02675 assert (fastmap != NULL && p != NULL);
02676
02677 INIT_FAIL_STACK ();
02678 bzero (fastmap, 1 << BYTEWIDTH);
02679 bufp->fastmap_accurate = 1;
02680 bufp->can_be_null = 0;
02681
02682 while (p != pend || !FAIL_STACK_EMPTY ())
02683 {
02684 if (p == pend)
02685 {
02686 bufp->can_be_null |= path_can_be_null;
02687
02688
02689 path_can_be_null = true;
02690
02691 p = fail_stack.stack[--fail_stack.avail];
02692 }
02693
02694
02695 assert (p < pend);
02696
02697 #ifdef SWITCH_ENUM_BUG
02698 switch ((int) ((re_opcode_t) *p++))
02699 #else
02700 switch ((re_opcode_t) *p++)
02701 #endif
02702 {
02703
02704
02705
02706
02707
02708
02709 case duplicate:
02710 bufp->can_be_null = 1;
02711 return 0;
02712
02713
02714
02715
02716
02717 case exactn:
02718 fastmap[p[1]] = 1;
02719 break;
02720
02721
02722 case charset:
02723 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02724 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
02725 fastmap[j] = 1;
02726 break;
02727
02728
02729 case charset_not:
02730
02731 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
02732 fastmap[j] = 1;
02733
02734 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02735 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
02736 fastmap[j] = 1;
02737 break;
02738
02739
02740 case wordchar:
02741 for (j = 0; j < (1 << BYTEWIDTH); j++)
02742 if (SYNTAX (j) == Sword)
02743 fastmap[j] = 1;
02744 break;
02745
02746
02747 case notwordchar:
02748 for (j = 0; j < (1 << BYTEWIDTH); j++)
02749 if (SYNTAX (j) != Sword)
02750 fastmap[j] = 1;
02751 break;
02752
02753
02754 case anychar:
02755
02756 for (j = 0; j < (1 << BYTEWIDTH); j++)
02757 fastmap[j] = 1;
02758
02759
02760 if (!(bufp->syntax & RE_DOT_NEWLINE))
02761 fastmap['\n'] = 0;
02762
02763
02764
02765 else if (bufp->can_be_null)
02766 return 0;
02767
02768
02769 break;
02770
02771
02772 #ifdef emacs
02773 case syntaxspec:
02774 k = *p++;
02775 for (j = 0; j < (1 << BYTEWIDTH); j++)
02776 if (SYNTAX (j) == (enum syntaxcode) k)
02777 fastmap[j] = 1;
02778 break;
02779
02780
02781 case notsyntaxspec:
02782 k = *p++;
02783 for (j = 0; j < (1 << BYTEWIDTH); j++)
02784 if (SYNTAX (j) != (enum syntaxcode) k)
02785 fastmap[j] = 1;
02786 break;
02787
02788
02789
02790
02791
02792
02793 case before_dot:
02794 case at_dot:
02795 case after_dot:
02796 continue;
02797 #endif
02798
02799
02800 case no_op:
02801 case begline:
02802 case endline:
02803 case begbuf:
02804 case endbuf:
02805 case wordbound:
02806 case notwordbound:
02807 case wordbeg:
02808 case wordend:
02809 case push_dummy_failure:
02810 continue;
02811
02812
02813 case jump_n:
02814 case pop_failure_jump:
02815 case maybe_pop_jump:
02816 case jump:
02817 case jump_past_alt:
02818 case dummy_failure_jump:
02819 EXTRACT_NUMBER_AND_INCR (j, p);
02820 p += j;
02821 if (j > 0)
02822 continue;
02823
02824
02825
02826
02827
02828
02829 if ((re_opcode_t) *p != on_failure_jump
02830 && (re_opcode_t) *p != succeed_n)
02831 continue;
02832
02833 p++;
02834 EXTRACT_NUMBER_AND_INCR (j, p);
02835 p += j;
02836
02837
02838 if (!FAIL_STACK_EMPTY ()
02839 && fail_stack.stack[fail_stack.avail - 1] == p)
02840 fail_stack.avail--;
02841
02842 continue;
02843
02844
02845 case on_failure_jump:
02846 case on_failure_keep_string_jump:
02847 handle_on_failure_jump:
02848 EXTRACT_NUMBER_AND_INCR (j, p);
02849
02850
02851
02852
02853
02854
02855
02856
02857 if (p + j < pend)
02858 {
02859 if (!PUSH_PATTERN_OP (p + j, fail_stack))
02860 return -2;
02861 }
02862 else
02863 bufp->can_be_null = 1;
02864
02865 if (succeed_n_p)
02866 {
02867 EXTRACT_NUMBER_AND_INCR (k, p);
02868 succeed_n_p = false;
02869 }
02870
02871 continue;
02872
02873
02874 case succeed_n:
02875
02876 p += 2;
02877
02878
02879 EXTRACT_NUMBER_AND_INCR (k, p);
02880 if (k == 0)
02881 {
02882 p -= 4;
02883 succeed_n_p = true;
02884 goto handle_on_failure_jump;
02885 }
02886 continue;
02887
02888
02889 case set_number_at:
02890 p += 4;
02891 continue;
02892
02893
02894 case start_memory:
02895 case stop_memory:
02896 p += 2;
02897 continue;
02898
02899
02900 default:
02901 abort ();
02902 }
02903
02904
02905
02906
02907
02908
02909
02910 path_can_be_null = false;
02911 p = pend;
02912 }
02913
02914
02915
02916 bufp->can_be_null |= path_can_be_null;
02917 return 0;
02918 }
02919
02920
02921
02922
02923
02924
02925
02926
02927
02928
02929
02930
02931
02932
02933 void
02934 re_set_registers (bufp, regs, num_regs, starts, ends)
02935 struct re_pattern_buffer *bufp;
02936 struct re_registers *regs;
02937 unsigned num_regs;
02938 regoff_t *starts, *ends;
02939 {
02940 if (num_regs)
02941 {
02942 bufp->regs_allocated = REGS_REALLOCATE;
02943 regs->num_regs = num_regs;
02944 regs->start = starts;
02945 regs->end = ends;
02946 }
02947 else
02948 {
02949 bufp->regs_allocated = REGS_UNALLOCATED;
02950 regs->num_regs = 0;
02951 regs->start = regs->end = (regoff_t) 0;
02952 }
02953 }
02954
02955
02956
02957
02958
02959
02960 int
02961 re_search (bufp, string, size, startpos, range, regs)
02962 struct re_pattern_buffer *bufp;
02963 const char *string;
02964 int size, startpos, range;
02965 struct re_registers *regs;
02966 {
02967 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
02968 regs, size);
02969 }
02970
02971
02972
02973
02974
02975
02976
02977
02978
02979
02980
02981
02982
02983
02984
02985
02986
02987
02988
02989
02990
02991
02992
02993 int
02994 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
02995 struct re_pattern_buffer *bufp;
02996 const char *string1, *string2;
02997 int size1, size2;
02998 int startpos;
02999 int range;
03000 struct re_registers *regs;
03001 int stop;
03002 {
03003 int val;
03004 register char *fastmap = bufp->fastmap;
03005 register char *translate = bufp->translate;
03006 int total_size = size1 + size2;
03007 int endpos = startpos + range;
03008
03009
03010 if (startpos < 0 || startpos > total_size)
03011 return -1;
03012
03013
03014
03015 if (endpos < -1)
03016 range = -1 - startpos;
03017 else if (endpos > total_size)
03018 range = total_size - startpos;
03019
03020
03021
03022 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03023 {
03024 if (startpos > 0)
03025 return -1;
03026 else
03027 range = 1;
03028 }
03029
03030
03031 if (fastmap && !bufp->fastmap_accurate)
03032 if (re_compile_fastmap (bufp) == -2)
03033 return -2;
03034
03035
03036 for (;;)
03037 {
03038
03039
03040
03041
03042 if (fastmap && startpos < total_size && !bufp->can_be_null)
03043 {
03044 if (range > 0)
03045 {
03046 register const char *d;
03047 register int lim = 0;
03048 int irange = range;
03049
03050 if (startpos < size1 && startpos + range >= size1)
03051 lim = range - (size1 - startpos);
03052
03053 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03054
03055
03056
03057 if (translate)
03058 while (range > lim
03059 && !fastmap[(unsigned char)
03060 translate[(unsigned char) *d++]])
03061 range--;
03062 else
03063 while (range > lim && !fastmap[(unsigned char) *d++])
03064 range--;
03065
03066 startpos += irange - range;
03067 }
03068 else
03069 {
03070 register char c = (size1 == 0 || startpos >= size1
03071 ? string2[startpos - size1]
03072 : string1[startpos]);
03073
03074 if (!fastmap[(unsigned char) TRANSLATE (c)])
03075 goto advance;
03076 }
03077 }
03078
03079
03080 if (range >= 0 && startpos == total_size && fastmap
03081 && !bufp->can_be_null)
03082 return -1;
03083
03084 val = re_match_2 (bufp, string1, size1, string2, size2,
03085 startpos, regs, stop);
03086 if (val >= 0)
03087 return startpos;
03088
03089 if (val == -2)
03090 return -2;
03091
03092 advance:
03093 if (!range)
03094 break;
03095 else if (range > 0)
03096 {
03097 range--;
03098 startpos++;
03099 }
03100 else
03101 {
03102 range++;
03103 startpos--;
03104 }
03105 }
03106 return -1;
03107 }
03108
03109
03110
03111 static int
03112 bcmp_translate (
03113 #ifdef __STDC__
03114 unsigned char *s1,
03115 unsigned char *s2,
03116 register int len,
03117 char *translate
03118 #endif
03119 );
03120
03121
03122
03123
03124
03125
03126
03127
03128
03129
03130
03131
03132 typedef union
03133 {
03134 fail_stack_elt_t word;
03135 struct
03136 {
03137
03138
03139 #define MATCH_NULL_UNSET_VALUE 3
03140 unsigned match_null_string_p : 2;
03141 unsigned is_active : 1;
03142 unsigned matched_something : 1;
03143 unsigned ever_matched_something : 1;
03144 } bits;
03145 } register_info_type;
03146 static boolean common_op_match_null_string_p (
03147 #ifdef __STDC__
03148 unsigned char **p,
03149 unsigned char *end,
03150 register_info_type *reg_info
03151 #endif
03152 );
03153 static boolean alt_match_null_string_p (
03154 #ifdef __STDC__
03155 unsigned char *p,
03156 unsigned char *end,
03157 register_info_type *reg_info
03158 #endif
03159 );
03160
03161 static boolean
03162 group_match_null_string_p (
03163 #ifdef __STDC__
03164 unsigned char **p,
03165 unsigned char *end,
03166 register_info_type *reg_info
03167 #endif
03168 );
03169
03170
03171
03172 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
03173 #define IS_ACTIVE(R) ((R).bits.is_active)
03174 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
03175 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
03176
03177
03178
03179
03180
03181 #define SET_REGS_MATCHED() \
03182 do \
03183 { \
03184 unsigned r; \
03185 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
03186 { \
03187 MATCHED_SOMETHING (reg_info[r]) \
03188 = EVER_MATCHED_SOMETHING (reg_info[r]) \
03189 = 1; \
03190 } \
03191 } \
03192 while (0)
03193
03194
03195
03196
03197 #define POINTER_TO_OFFSET(ptr) \
03198 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
03199
03200
03201 #define REG_UNSET_VALUE ((char *) -1)
03202 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
03203
03204
03205
03206
03207 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
03208
03209
03210
03211 #define PREFETCH() \
03212 while (d == dend) \
03213 { \
03214 \
03215 if (dend == end_match_2) \
03216 goto fail; \
03217 \
03218 d = string2; \
03219 dend = end_match_2; \
03220 }
03221
03222
03223
03224
03225 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03226 #define AT_STRINGS_END(d) ((d) == end2)
03227
03228
03229
03230
03231
03232
03233 #define WORDCHAR_P(d) \
03234 (SYNTAX ((d) == end1 ? *string2 \
03235 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
03236 == Sword)
03237
03238
03239
03240 #define AT_WORD_BOUNDARY(d) \
03241 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
03242 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03243
03244
03245
03246 #ifdef REGEX_MALLOC
03247 #define FREE_VAR(var) if (var) free (var); var = NULL
03248 #define FREE_VARIABLES() \
03249 do { \
03250 FREE_VAR (fail_stack.stack); \
03251 FREE_VAR (regstart); \
03252 FREE_VAR (regend); \
03253 FREE_VAR (old_regstart); \
03254 FREE_VAR (old_regend); \
03255 FREE_VAR (best_regstart); \
03256 FREE_VAR (best_regend); \
03257 FREE_VAR (reg_info); \
03258 FREE_VAR (reg_dummy); \
03259 FREE_VAR (reg_info_dummy); \
03260 } while (0)
03261 #else
03262
03263 #define FREE_VARIABLES() alloca (0)
03264 #endif
03265
03266
03267
03268
03269
03270
03271
03272
03273
03274 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03275 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03276
03277
03278
03279 #ifndef emacs
03280
03281
03282 int
03283 re_match (bufp, string, size, pos, regs)
03284 struct re_pattern_buffer *bufp;
03285 const char *string;
03286 int size, pos;
03287 struct re_registers *regs;
03288 {
03289 return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
03290 }
03291 #endif
03292
03293
03294
03295
03296
03297
03298
03299
03300
03301
03302
03303
03304
03305
03306
03307 int
03308 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
03309 struct re_pattern_buffer *bufp;
03310 const char *string1, *string2;
03311 int size1, size2;
03312 int pos;
03313 struct re_registers *regs;
03314 int stop;
03315 {
03316
03317 int mcnt;
03318 unsigned char *p1;
03319
03320
03321 const char *end1, *end2;
03322
03323
03324
03325 const char *end_match_1, *end_match_2;
03326
03327
03328 const char *d, *dend;
03329
03330
03331 unsigned char *p = bufp->buffer;
03332 register unsigned char *pend = p + bufp->used;
03333
03334
03335 char *translate = bufp->translate;
03336
03337
03338
03339
03340
03341
03342
03343
03344
03345
03346 fail_stack_type fail_stack;
03347 #ifdef DEBUG
03348 static unsigned failure_id = 0;
03349 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03350 #endif
03351
03352
03353
03354
03355 unsigned num_regs = bufp->re_nsub + 1;
03356
03357
03358 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03359 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03360
03361
03362
03363
03364
03365
03366
03367
03368 const char **regstart, **regend;
03369
03370
03371
03372
03373
03374
03375 const char **old_regstart, **old_regend;
03376
03377
03378
03379
03380
03381
03382
03383 register_info_type *reg_info;
03384
03385
03386
03387
03388
03389 unsigned best_regs_set = false;
03390 const char **best_regstart, **best_regend;
03391
03392
03393
03394
03395
03396
03397
03398
03399
03400 const char *match_end = NULL;
03401
03402
03403 const char **reg_dummy;
03404 register_info_type *reg_info_dummy;
03405
03406 #ifdef DEBUG
03407
03408 unsigned num_regs_pushed = 0;
03409 #endif
03410
03411 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03412
03413 INIT_FAIL_STACK ();
03414
03415
03416
03417
03418
03419
03420 if (bufp->re_nsub)
03421 {
03422 regstart = REGEX_TALLOC (num_regs, const char *);
03423 regend = REGEX_TALLOC (num_regs, const char *);
03424 old_regstart = REGEX_TALLOC (num_regs, const char *);
03425 old_regend = REGEX_TALLOC (num_regs, const char *);
03426 best_regstart = REGEX_TALLOC (num_regs, const char *);
03427 best_regend = REGEX_TALLOC (num_regs, const char *);
03428 reg_info = REGEX_TALLOC (num_regs, register_info_type);
03429 reg_dummy = REGEX_TALLOC (num_regs, const char *);
03430 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03431
03432 if (!(regstart && regend && old_regstart && old_regend && reg_info
03433 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03434 {
03435 FREE_VARIABLES ();
03436 return -2;
03437 }
03438 }
03439 #ifdef REGEX_MALLOC
03440 else
03441 {
03442
03443
03444 regstart = regend = old_regstart = old_regend = best_regstart
03445 = best_regend = reg_dummy = NULL;
03446 reg_info = reg_info_dummy = (register_info_type *) NULL;
03447 }
03448 #endif
03449
03450
03451 if (pos < 0 || pos > size1 + size2)
03452 {
03453 FREE_VARIABLES ();
03454 return -1;
03455 }
03456
03457
03458
03459
03460 for (mcnt = 1; mcnt < num_regs; mcnt++)
03461 {
03462 regstart[mcnt] = regend[mcnt]
03463 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03464
03465 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03466 IS_ACTIVE (reg_info[mcnt]) = 0;
03467 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03468 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03469 }
03470
03471
03472
03473 if (size2 == 0 && string1 != NULL)
03474 {
03475 string2 = string1;
03476 size2 = size1;
03477 string1 = 0;
03478 size1 = 0;
03479 }
03480 end1 = string1 + size1;
03481 end2 = string2 + size2;
03482
03483
03484 if (stop <= size1)
03485 {
03486 end_match_1 = string1 + stop;
03487 end_match_2 = string2;
03488 }
03489 else
03490 {
03491 end_match_1 = end1;
03492 end_match_2 = string2 + stop - size1;
03493 }
03494
03495
03496
03497
03498
03499
03500
03501 if (size1 > 0 && pos <= size1)
03502 {
03503 d = string1 + pos;
03504 dend = end_match_1;
03505 }
03506 else
03507 {
03508 d = string2 + pos - size1;
03509 dend = end_match_2;
03510 }
03511
03512 DEBUG_PRINT1 ("The compiled pattern is: ");
03513 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03514 DEBUG_PRINT1 ("The string to match is: `");
03515 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03516 DEBUG_PRINT1 ("'\n");
03517
03518
03519
03520
03521 for (;;)
03522 {
03523 DEBUG_PRINT2 ("\n0x%x: ", p);
03524
03525 if (p == pend)
03526 {
03527 DEBUG_PRINT1 ("end of pattern ... ");
03528
03529
03530
03531 if (d != end_match_2)
03532 {
03533 DEBUG_PRINT1 ("backtracking.\n");
03534
03535 if (!FAIL_STACK_EMPTY ())
03536 {
03537 boolean same_str_p = (FIRST_STRING_P (match_end)
03538 == MATCHING_IN_FIRST_STRING);
03539
03540
03541 if (!best_regs_set
03542 || (same_str_p && d > match_end)
03543 || (!same_str_p && !MATCHING_IN_FIRST_STRING))
03544 {
03545 best_regs_set = true;
03546 match_end = d;
03547
03548 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
03549
03550 for (mcnt = 1; mcnt < num_regs; mcnt++)
03551 {
03552 best_regstart[mcnt] = regstart[mcnt];
03553 best_regend[mcnt] = regend[mcnt];
03554 }
03555 }
03556 goto fail;
03557 }
03558
03559
03560 else if (best_regs_set)
03561 {
03562 restore_best_regs:
03563
03564
03565
03566
03567
03568 DEBUG_PRINT1 ("Restoring best registers.\n");
03569
03570 d = match_end;
03571 dend = ((d >= string1 && d <= end1)
03572 ? end_match_1 : end_match_2);
03573
03574 for (mcnt = 1; mcnt < num_regs; mcnt++)
03575 {
03576 regstart[mcnt] = best_regstart[mcnt];
03577 regend[mcnt] = best_regend[mcnt];
03578 }
03579 }
03580 }
03581
03582 DEBUG_PRINT1 ("Accepting match.\n");
03583
03584
03585 if (regs && !bufp->no_sub)
03586 {
03587
03588 if (bufp->regs_allocated == REGS_UNALLOCATED)
03589 {
03590
03591
03592 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
03593 regs->start = TALLOC (regs->num_regs, regoff_t);
03594 regs->end = TALLOC (regs->num_regs, regoff_t);
03595 if (regs->start == NULL || regs->end == NULL)
03596 return -2;
03597 bufp->regs_allocated = REGS_REALLOCATE;
03598 }
03599 else if (bufp->regs_allocated == REGS_REALLOCATE)
03600 {
03601
03602
03603 if (regs->num_regs < num_regs + 1)
03604 {
03605 regs->num_regs = num_regs + 1;
03606 RETALLOC (regs->start, regs->num_regs, regoff_t);
03607 RETALLOC (regs->end, regs->num_regs, regoff_t);
03608 if (regs->start == NULL || regs->end == NULL)
03609 return -2;
03610 }
03611 }
03612 else
03613 {
03614
03615
03616 assert (bufp->regs_allocated == REGS_FIXED);
03617 }
03618
03619
03620
03621
03622 if (regs->num_regs > 0)
03623 {
03624 regs->start[0] = pos;
03625 regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
03626 : d - string2 + size1);
03627 }
03628
03629
03630
03631 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
03632 {
03633 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
03634 regs->start[mcnt] = regs->end[mcnt] = -1;
03635 else
03636 {
03637 regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
03638 regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
03639 }
03640 }
03641
03642
03643
03644
03645
03646
03647 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
03648 regs->start[mcnt] = regs->end[mcnt] = -1;
03649 }
03650
03651 FREE_VARIABLES ();
03652 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
03653 nfailure_points_pushed, nfailure_points_popped,
03654 nfailure_points_pushed - nfailure_points_popped);
03655 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
03656
03657 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
03658 ? string1
03659 : string2 - size1);
03660
03661 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
03662
03663 return mcnt;
03664 }
03665
03666
03667 #ifdef SWITCH_ENUM_BUG
03668 switch ((int) ((re_opcode_t) *p++))
03669 #else
03670 switch ((re_opcode_t) *p++)
03671 #endif
03672 {
03673
03674
03675 case no_op:
03676 DEBUG_PRINT1 ("EXECUTING no_op.\n");
03677 break;
03678
03679
03680
03681
03682
03683 case exactn:
03684 mcnt = *p++;
03685 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
03686
03687
03688
03689 if (translate)
03690 {
03691 do
03692 {
03693 PREFETCH ();
03694 if (translate[(unsigned char) *d++] != (char) *p++)
03695 goto fail;
03696 }
03697 while (--mcnt);
03698 }
03699 else
03700 {
03701 do
03702 {
03703 PREFETCH ();
03704 if (*d++ != (char) *p++) goto fail;
03705 }
03706 while (--mcnt);
03707 }
03708 SET_REGS_MATCHED ();
03709 break;
03710
03711
03712
03713 case anychar:
03714 DEBUG_PRINT1 ("EXECUTING anychar.\n");
03715
03716 PREFETCH ();
03717
03718 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
03719 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
03720 goto fail;
03721
03722 SET_REGS_MATCHED ();
03723 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
03724 d++;
03725 break;
03726
03727
03728 case charset:
03729 case charset_not:
03730 {
03731 register unsigned char c;
03732 boolean not = (re_opcode_t) *(p - 1) == charset_not;
03733
03734 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
03735
03736 PREFETCH ();
03737 c = TRANSLATE (*d);
03738
03739
03740
03741 if (c < (unsigned) (*p * BYTEWIDTH)
03742 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
03743 not = !not;
03744
03745 p += 1 + *p;
03746
03747 if (!not) goto fail;
03748
03749 SET_REGS_MATCHED ();
03750 d++;
03751 break;
03752 }
03753
03754
03755
03756
03757
03758
03759
03760 case start_memory:
03761 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
03762
03763
03764 p1 = p;
03765
03766 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
03767 REG_MATCH_NULL_STRING_P (reg_info[*p])
03768 = group_match_null_string_p (&p1, pend, reg_info);
03769
03770
03771
03772
03773
03774
03775 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
03776 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
03777 : regstart[*p];
03778 DEBUG_PRINT2 (" old_regstart: %d\n",
03779 POINTER_TO_OFFSET (old_regstart[*p]));
03780
03781 regstart[*p] = d;
03782 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
03783
03784 IS_ACTIVE (reg_info[*p]) = 1;
03785 MATCHED_SOMETHING (reg_info[*p]) = 0;
03786
03787
03788 highest_active_reg = *p;
03789
03790
03791
03792 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
03793 lowest_active_reg = *p;
03794
03795
03796 p += 2;
03797 break;
03798
03799
03800
03801
03802
03803 case stop_memory:
03804 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
03805
03806
03807
03808
03809
03810
03811 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
03812 ? REG_UNSET (regend[*p]) ? d : regend[*p]
03813 : regend[*p];
03814 DEBUG_PRINT2 (" old_regend: %d\n",
03815 POINTER_TO_OFFSET (old_regend[*p]));
03816
03817 regend[*p] = d;
03818 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
03819
03820
03821 IS_ACTIVE (reg_info[*p]) = 0;
03822
03823
03824
03825 if (lowest_active_reg == highest_active_reg)
03826 {
03827 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03828 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03829 }
03830 else
03831 {
03832
03833
03834
03835 unsigned char r = *p - 1;
03836 while (r > 0 && !IS_ACTIVE (reg_info[r]))
03837 r--;
03838
03839
03840
03841
03842
03843
03844
03845
03846 if (r == 0)
03847 {
03848 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03849 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03850 }
03851 else
03852 highest_active_reg = r;
03853 }
03854
03855
03856
03857
03858
03859
03860 if ((!MATCHED_SOMETHING (reg_info[*p])
03861 || (re_opcode_t) p[-3] == start_memory)
03862 && (p + 2) < pend)
03863 {
03864 boolean is_a_jump_n = false;
03865
03866 p1 = p + 2;
03867 mcnt = 0;
03868 switch ((re_opcode_t) *p1++)
03869 {
03870 case jump_n:
03871 is_a_jump_n = true;
03872 case pop_failure_jump:
03873 case maybe_pop_jump:
03874 case jump:
03875 case dummy_failure_jump:
03876 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
03877 if (is_a_jump_n)
03878 p1 += 2;
03879 break;
03880
03881 default:
03882 ;
03883 }
03884 p1 += mcnt;
03885
03886
03887
03888
03889
03890
03891 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
03892 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
03893 {
03894
03895
03896
03897
03898
03899
03900
03901
03902
03903
03904 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
03905 {
03906 unsigned r;
03907
03908 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
03909
03910
03911 for (r = *p; r < *p + *(p + 1); r++)
03912 {
03913 regstart[r] = old_regstart[r];
03914
03915
03916 if ((int) old_regend[r] >= (int) regstart[r])
03917 regend[r] = old_regend[r];
03918 }
03919 }
03920 p1++;
03921 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
03922 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
03923
03924 goto fail;
03925 }
03926 }
03927
03928
03929 p += 2;
03930 break;
03931
03932
03933
03934
03935 case duplicate:
03936 {
03937 register const char *d2, *dend2;
03938 int regno = *p++;
03939 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
03940
03941
03942 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
03943 goto fail;
03944
03945
03946 d2 = regstart[regno];
03947
03948
03949
03950
03951
03952
03953 dend2 = ((FIRST_STRING_P (regstart[regno])
03954 == FIRST_STRING_P (regend[regno]))
03955 ? regend[regno] : end_match_1);
03956 for (;;)
03957 {
03958
03959
03960 while (d2 == dend2)
03961 {
03962 if (dend2 == end_match_2) break;
03963 if (dend2 == regend[regno]) break;
03964
03965
03966 d2 = string2;
03967 dend2 = regend[regno];
03968 }
03969
03970 if (d2 == dend2) break;
03971
03972
03973 PREFETCH ();
03974
03975
03976 mcnt = dend - d;
03977
03978
03979
03980 if (mcnt > dend2 - d2)
03981 mcnt = dend2 - d2;
03982
03983
03984
03985 if (translate
03986 ? bcmp_translate ((unsigned char*)(d), (unsigned char *)(d2), mcnt, translate)
03987 : bcmp (d, d2, mcnt))
03988 goto fail;
03989 d += mcnt, d2 += mcnt;
03990 }
03991 }
03992 break;
03993
03994
03995
03996
03997
03998 case begline:
03999 DEBUG_PRINT1 ("EXECUTING begline.\n");
04000
04001 if (AT_STRINGS_BEG (d))
04002 {
04003 if (!bufp->not_bol) break;
04004 }
04005 else if (d[-1] == '\n' && bufp->newline_anchor)
04006 {
04007 break;
04008 }
04009
04010 goto fail;
04011
04012
04013
04014 case endline:
04015 DEBUG_PRINT1 ("EXECUTING endline.\n");
04016
04017 if (AT_STRINGS_END (d))
04018 {
04019 if (!bufp->not_eol) break;
04020 }
04021
04022
04023 else if ((d == end1 ? *string2 : *d) == '\n'
04024 && bufp->newline_anchor)
04025 {
04026 break;
04027 }
04028 goto fail;
04029
04030
04031
04032 case begbuf:
04033 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04034 if (AT_STRINGS_BEG (d))
04035 break;
04036 goto fail;
04037
04038
04039
04040 case endbuf:
04041 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04042 if (AT_STRINGS_END (d))
04043 break;
04044 goto fail;
04045
04046
04047
04048
04049
04050
04051
04052
04053
04054
04055
04056
04057
04058
04059
04060
04061
04062
04063 case on_failure_keep_string_jump:
04064 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04065
04066 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04067 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04068
04069 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04070 break;
04071
04072
04073
04074
04075
04076
04077
04078
04079
04080
04081
04082
04083
04084
04085 case on_failure_jump:
04086 on_failure:
04087 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04088
04089 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04090 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04091
04092
04093
04094
04095
04096
04097
04098
04099
04100
04101 p1 = p;
04102
04103
04104
04105
04106
04107 while (p1 < pend && (re_opcode_t) *p1 == no_op)
04108 p1++;
04109
04110 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04111 {
04112
04113
04114
04115
04116 highest_active_reg = *(p1 + 1) + *(p1 + 2);
04117 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04118 lowest_active_reg = *(p1 + 1);
04119 }
04120
04121 DEBUG_PRINT1 (":\n");
04122 PUSH_FAILURE_POINT (p + mcnt, d, -2);
04123 break;
04124
04125
04126
04127
04128 case maybe_pop_jump:
04129 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04130 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04131 {
04132 register unsigned char *p2 = p;
04133
04134
04135
04136
04137
04138
04139
04140
04141
04142
04143
04144
04145
04146
04147
04148 while (p2 + 2 < pend
04149 && ((re_opcode_t) *p2 == stop_memory
04150 || (re_opcode_t) *p2 == start_memory))
04151 p2 += 3;
04152
04153
04154 if (p2 == pend)
04155 {
04156
04157
04158
04159 p[-3] = (unsigned char) pop_failure_jump;
04160 DEBUG_PRINT1
04161 (" End of pattern: change to `pop_failure_jump'.\n");
04162 }
04163
04164 else if ((re_opcode_t) *p2 == exactn
04165 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04166 {
04167 register unsigned char c
04168 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04169 p1 = p + mcnt;
04170
04171
04172
04173
04174 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04175 {
04176 p[-3] = (unsigned char) pop_failure_jump;
04177 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04178 c, p1[5]);
04179 }
04180
04181 else if ((re_opcode_t) p1[3] == charset
04182 || (re_opcode_t) p1[3] == charset_not)
04183 {
04184 int not = (re_opcode_t) p1[3] == charset_not;
04185
04186 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04187 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04188 not = !not;
04189
04190
04191
04192 if (!not)
04193 {
04194 p[-3] = (unsigned char) pop_failure_jump;
04195 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04196 }
04197 }
04198 }
04199 }
04200 p -= 2;
04201 if ((re_opcode_t) p[-1] != pop_failure_jump)
04202 {
04203 p[-1] = (unsigned char) jump;
04204 DEBUG_PRINT1 (" Match => jump.\n");
04205 goto unconditional_jump;
04206 }
04207
04208
04209
04210
04211
04212
04213
04214
04215
04216 case pop_failure_jump:
04217 {
04218
04219
04220
04221
04222
04223 unsigned dummy_low_reg, dummy_high_reg;
04224 unsigned char *pdummy;
04225 const char *sdummy;
04226
04227 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04228 POP_FAILURE_POINT (sdummy, pdummy,
04229 dummy_low_reg, dummy_high_reg,
04230 reg_dummy, reg_dummy, reg_info_dummy);
04231 }
04232
04233
04234
04235
04236 case jump:
04237 unconditional_jump:
04238 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04239 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04240 p += mcnt;
04241 DEBUG_PRINT2 ("(to 0x%x).\n", p);
04242 break;
04243
04244
04245
04246
04247 case jump_past_alt:
04248 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04249 goto unconditional_jump;
04250
04251
04252
04253
04254
04255
04256
04257 case dummy_failure_jump:
04258 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04259
04260
04261 PUSH_FAILURE_POINT (0, 0, -2);
04262 goto unconditional_jump;
04263
04264
04265
04266
04267
04268
04269
04270 case push_dummy_failure:
04271 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04272
04273
04274 PUSH_FAILURE_POINT (0, 0, -2);
04275 break;
04276
04277
04278
04279 case succeed_n:
04280 EXTRACT_NUMBER (mcnt, p + 2);
04281 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04282
04283 assert (mcnt >= 0);
04284
04285 if (mcnt > 0)
04286 {
04287 mcnt--;
04288 p += 2;
04289 STORE_NUMBER_AND_INCR (p, mcnt);
04290 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
04291 }
04292 else if (mcnt == 0)
04293 {
04294 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
04295 p[2] = (unsigned char) no_op;
04296 p[3] = (unsigned char) no_op;
04297 goto on_failure;
04298 }
04299 break;
04300
04301 case jump_n:
04302 EXTRACT_NUMBER (mcnt, p + 2);
04303 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04304
04305
04306 if (mcnt)
04307 {
04308 mcnt--;
04309 STORE_NUMBER (p + 2, mcnt);
04310 goto unconditional_jump;
04311 }
04312
04313 else
04314 p += 4;
04315 break;
04316
04317 case set_number_at:
04318 {
04319 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04320
04321 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04322 p1 = p + mcnt;
04323 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04324 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
04325 STORE_NUMBER (p1, mcnt);
04326 break;
04327 }
04328
04329 case wordbound:
04330 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04331 if (AT_WORD_BOUNDARY (d))
04332 break;
04333 goto fail;
04334
04335 case notwordbound:
04336 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04337 if (AT_WORD_BOUNDARY (d))
04338 goto fail;
04339 break;
04340
04341 case wordbeg:
04342 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04343 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04344 break;
04345 goto fail;
04346
04347 case wordend:
04348 DEBUG_PRINT1 ("EXECUTING wordend.\n");
04349 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04350 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04351 break;
04352 goto fail;
04353
04354 #ifdef emacs
04355 #ifdef emacs19
04356 case before_dot:
04357 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
04358 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
04359 goto fail;
04360 break;
04361
04362 case at_dot:
04363 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04364 if (PTR_CHAR_POS ((unsigned char *) d) != point)
04365 goto fail;
04366 break;
04367
04368 case after_dot:
04369 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
04370 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
04371 goto fail;
04372 break;
04373 #else
04374 case at_dot:
04375 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04376 if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
04377 goto fail;
04378 break;
04379 #endif
04380
04381 case syntaxspec:
04382 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
04383 mcnt = *p++;
04384 goto matchsyntax;
04385
04386 case wordchar:
04387 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
04388 mcnt = (int) Sword;
04389 matchsyntax:
04390 PREFETCH ();
04391 if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
04392 goto fail;
04393 SET_REGS_MATCHED ();
04394 break;
04395
04396 case notsyntaxspec:
04397 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
04398 mcnt = *p++;
04399 goto matchnotsyntax;
04400
04401 case notwordchar:
04402 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
04403 mcnt = (int) Sword;
04404 matchnotsyntax:
04405 PREFETCH ();
04406 if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
04407 goto fail;
04408 SET_REGS_MATCHED ();
04409 break;
04410
04411 #else
04412 case wordchar:
04413 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
04414 PREFETCH ();
04415 if (!WORDCHAR_P (d))
04416 goto fail;
04417 SET_REGS_MATCHED ();
04418 d++;
04419 break;
04420
04421 case notwordchar:
04422 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
04423 PREFETCH ();
04424 if (WORDCHAR_P (d))
04425 goto fail;
04426 SET_REGS_MATCHED ();
04427 d++;
04428 break;
04429 #endif
04430
04431 default:
04432 abort ();
04433 }
04434 continue;
04435
04436
04437
04438 fail:
04439 if (!FAIL_STACK_EMPTY ())
04440 {
04441 DEBUG_PRINT1 ("\nFAIL:\n");
04442 POP_FAILURE_POINT (d, p,
04443 lowest_active_reg, highest_active_reg,
04444 regstart, regend, reg_info);
04445
04446
04447 if (!p)
04448 goto fail;
04449
04450
04451 assert (p <= pend);
04452 if (p < pend)
04453 {
04454 boolean is_a_jump_n = false;
04455
04456
04457
04458 switch ((re_opcode_t) *p)
04459 {
04460 case jump_n:
04461 is_a_jump_n = true;
04462 case maybe_pop_jump:
04463 case pop_failure_jump:
04464 case jump:
04465 p1 = p + 1;
04466 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04467 p1 += mcnt;
04468
04469 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
04470 || (!is_a_jump_n
04471 && (re_opcode_t) *p1 == on_failure_jump))
04472 goto fail;
04473 break;
04474 default:
04475 ;
04476 }
04477 }
04478
04479 if (d >= string1 && d <= end1)
04480 dend = end_match_1;
04481 }
04482 else
04483 break;
04484 }
04485
04486 if (best_regs_set)
04487 goto restore_best_regs;
04488
04489 FREE_VARIABLES ();
04490
04491 return -1;
04492 }
04493
04494
04495
04496
04497
04498
04499
04500
04501
04502
04503
04504
04505
04506
04507 static boolean
04508 group_match_null_string_p (p, end, reg_info)
04509 unsigned char **p, *end;
04510 register_info_type *reg_info;
04511 {
04512 int mcnt;
04513
04514 unsigned char *p1 = *p + 2;
04515
04516 while (p1 < end)
04517 {
04518
04519
04520
04521
04522 switch ((re_opcode_t) *p1)
04523 {
04524
04525 case on_failure_jump:
04526 p1++;
04527 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04528
04529
04530
04531
04532 if (mcnt >= 0)
04533 {
04534
04535
04536
04537
04538
04539
04540
04541
04542
04543
04544
04545
04546
04547
04548
04549
04550
04551
04552 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
04553 {
04554
04555
04556
04557
04558 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
04559 reg_info))
04560 return false;
04561
04562
04563
04564 p1 += mcnt;
04565
04566
04567
04568 if ((re_opcode_t) *p1 != on_failure_jump)
04569 break;
04570
04571
04572
04573 p1++;
04574 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04575 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
04576 {
04577
04578 p1 -= 3;
04579 break;
04580 }
04581 }
04582
04583
04584
04585
04586 EXTRACT_NUMBER (mcnt, p1 - 2);
04587
04588 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
04589 return false;
04590
04591 p1 += mcnt;
04592 }
04593 break;
04594
04595
04596 case stop_memory:
04597 assert (p1[1] == **p);
04598 *p = p1 + 2;
04599 return true;
04600
04601
04602 default:
04603 if (!common_op_match_null_string_p (&p1, end, reg_info))
04604 return false;
04605 }
04606 }
04607
04608 return false;
04609 }
04610
04611
04612
04613
04614
04615
04616 static boolean
04617 alt_match_null_string_p (p, end, reg_info)
04618 unsigned char *p, *end;
04619 register_info_type *reg_info;
04620 {
04621 int mcnt;
04622 unsigned char *p1 = p;
04623
04624 while (p1 < end)
04625 {
04626
04627
04628
04629 switch ((re_opcode_t) *p1)
04630 {
04631
04632 case on_failure_jump:
04633 p1++;
04634 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04635 p1 += mcnt;
04636 break;
04637
04638 default:
04639 if (!common_op_match_null_string_p (&p1, end, reg_info))
04640 return false;
04641 }
04642 }
04643
04644 return true;
04645 }
04646
04647
04648
04649
04650
04651
04652
04653 static boolean
04654 common_op_match_null_string_p (p, end, reg_info)
04655 unsigned char **p, *end;
04656 register_info_type *reg_info;
04657 {
04658 int mcnt;
04659 boolean ret;
04660 int reg_no;
04661 unsigned char *p1 = *p;
04662
04663 switch ((re_opcode_t) *p1++)
04664 {
04665 case no_op:
04666 case begline:
04667 case endline:
04668 case begbuf:
04669 case endbuf:
04670 case wordbeg:
04671 case wordend:
04672 case wordbound:
04673 case notwordbound:
04674 #ifdef emacs
04675 case before_dot:
04676 case at_dot:
04677 case after_dot:
04678 #endif
04679 break;
04680
04681 case start_memory:
04682 reg_no = *p1;
04683 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
04684 ret = group_match_null_string_p (&p1, end, reg_info);
04685
04686
04687
04688
04689 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
04690 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
04691
04692 if (!ret)
04693 return false;
04694 break;
04695
04696
04697 case jump:
04698 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04699 if (mcnt >= 0)
04700 p1 += mcnt;
04701 else
04702 return false;
04703 break;
04704
04705 case succeed_n:
04706
04707 p1 += 2;
04708 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04709
04710 if (mcnt == 0)
04711 {
04712 p1 -= 4;
04713 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04714 p1 += mcnt;
04715 }
04716 else
04717 return false;
04718 break;
04719
04720 case duplicate:
04721 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
04722 return false;
04723 break;
04724
04725 case set_number_at:
04726 p1 += 4;
04727
04728 default:
04729
04730 return false;
04731 }
04732
04733 *p = p1;
04734 return true;
04735 }
04736
04737
04738
04739
04740
04741 static int
04742 bcmp_translate (s1, s2, len, translate)
04743 unsigned char *s1, *s2;
04744 register int len;
04745 char *translate;
04746 {
04747 register unsigned char *p1 = s1, *p2 = s2;
04748 while (len)
04749 {
04750 if (translate[*p1++] != translate[*p2++]) return 1;
04751 len--;
04752 }
04753 return 0;
04754 }
04755
04756
04757
04758
04759
04760
04761
04762
04763
04764
04765
04766
04767 const char *
04768 re_compile_pattern (pattern, length, bufp)
04769 const char *pattern;
04770 int length;
04771 struct re_pattern_buffer *bufp;
04772 {
04773 reg_errcode_t ret;
04774
04775
04776
04777 bufp->regs_allocated = REGS_UNALLOCATED;
04778
04779
04780
04781
04782 bufp->no_sub = 0;
04783
04784
04785 bufp->newline_anchor = 1;
04786
04787 ret = regex_compile (pattern, length, re_syntax_options, bufp);
04788
04789 return re_error_msg[(int) ret];
04790 }
04791
04792
04793
04794
04795 #if !defined (emacs) && !defined (_POSIX_SOURCE)
04796
04797
04798 static struct re_pattern_buffer re_comp_buf;
04799
04800 char *
04801 re_comp (s)
04802 const char *s;
04803 {
04804 reg_errcode_t ret;
04805
04806 if (!s)
04807 {
04808 if (!re_comp_buf.buffer)
04809 return "No previous regular expression";
04810 return 0;
04811 }
04812
04813 if (!re_comp_buf.buffer)
04814 {
04815 re_comp_buf.buffer = (unsigned char *) malloc (200);
04816 if (re_comp_buf.buffer == NULL)
04817 return "Memory exhausted";
04818 re_comp_buf.allocated = 200;
04819
04820 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
04821 if (re_comp_buf.fastmap == NULL)
04822 return "Memory exhausted";
04823 }
04824
04825
04826
04827
04828
04829 re_comp_buf.newline_anchor = 1;
04830
04831 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
04832
04833
04834 return (char *) re_error_msg[(int) ret];
04835 }
04836
04837
04838 int
04839 re_exec (s)
04840 const char *s;
04841 {
04842 const int len = strlen (s);
04843 return
04844 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
04845 }
04846 #endif
04847
04848
04849
04850 #ifndef emacs
04851
04852
04853
04854
04855
04856
04857
04858
04859
04860
04861
04862
04863
04864
04865
04866
04867
04868
04869
04870
04871
04872
04873
04874
04875
04876
04877
04878
04879
04880
04881
04882
04883
04884
04885
04886 int
04887 regcomp (preg, pattern, cflags)
04888 regex_t *preg;
04889 const char *pattern;
04890 int cflags;
04891 {
04892 reg_errcode_t ret;
04893 unsigned syntax
04894 = (cflags & REG_EXTENDED) ?
04895 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
04896
04897
04898 preg->buffer = 0;
04899 preg->allocated = 0;
04900
04901
04902
04903
04904
04905 preg->fastmap = 0;
04906
04907 if (cflags & REG_ICASE)
04908 {
04909 unsigned i;
04910
04911 preg->translate = (char *) malloc (CHAR_SET_SIZE);
04912 if (preg->translate == NULL)
04913 return (int) REG_ESPACE;
04914
04915
04916 for (i = 0; i < CHAR_SET_SIZE; i++)
04917 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
04918 }
04919 else
04920 preg->translate = NULL;
04921
04922
04923 if (cflags & REG_NEWLINE)
04924 {
04925 syntax &= ~RE_DOT_NEWLINE;
04926 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
04927
04928 preg->newline_anchor = 1;
04929 }
04930 else
04931 preg->newline_anchor = 0;
04932
04933 preg->no_sub = !!(cflags & REG_NOSUB);
04934
04935
04936
04937 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
04938
04939
04940
04941 if (ret == REG_ERPAREN) ret = REG_EPAREN;
04942
04943 return (int) ret;
04944 }
04945
04946
04947
04948
04949
04950
04951
04952
04953
04954
04955
04956
04957
04958
04959
04960
04961 int
04962 regexec (preg, string, nmatch, pmatch, eflags)
04963 const regex_t *preg;
04964 const char *string;
04965 size_t nmatch;
04966 regmatch_t pmatch[];
04967 int eflags;
04968 {
04969 int ret;
04970 struct re_registers regs;
04971 regex_t private_preg;
04972 int len = strlen (string);
04973 boolean want_reg_info = !preg->no_sub && nmatch > 0;
04974
04975 private_preg = *preg;
04976
04977 private_preg.not_bol = !!(eflags & REG_NOTBOL);
04978 private_preg.not_eol = !!(eflags & REG_NOTEOL);
04979
04980
04981
04982
04983 private_preg.regs_allocated = REGS_FIXED;
04984
04985 if (want_reg_info)
04986 {
04987 regs.num_regs = nmatch;
04988 regs.start = TALLOC (nmatch, regoff_t);
04989 regs.end = TALLOC (nmatch, regoff_t);
04990 if (regs.start == NULL || regs.end == NULL)
04991 return (int) REG_NOMATCH;
04992 }
04993
04994
04995 ret = re_search (&private_preg, string, len,
04996 0, len,
04997 want_reg_info ? ®s : (struct re_registers *) 0);
04998
04999
05000 if (want_reg_info)
05001 {
05002 if (ret >= 0)
05003 {
05004 unsigned r;
05005
05006 for (r = 0; r < nmatch; r++)
05007 {
05008 pmatch[r].rm_so = regs.start[r];
05009 pmatch[r].rm_eo = regs.end[r];
05010 }
05011 }
05012
05013
05014 free (regs.start);
05015 free (regs.end);
05016 }
05017
05018
05019 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05020 }
05021
05022 int
05023 regexec1 (preg, string, stringLength, nmatch, pmatch, eflags)
05024 const regex_t *preg;
05025 const char *string;
05026 int stringLength;
05027 size_t nmatch;
05028 regmatch_t pmatch[];
05029 int eflags;
05030 {
05031 int ret;
05032 struct re_registers regs;
05033 regex_t private_preg;
05034 int len = stringLength;
05035 boolean want_reg_info = !preg->no_sub && nmatch > 0;
05036
05037 private_preg = *preg;
05038
05039 private_preg.not_bol = !!(eflags & REG_NOTBOL);
05040 private_preg.not_eol = !!(eflags & REG_NOTEOL);
05041
05042
05043
05044
05045 private_preg.regs_allocated = REGS_FIXED;
05046
05047 if (want_reg_info)
05048 {
05049 regs.num_regs = nmatch;
05050 regs.start = TALLOC (nmatch, regoff_t);
05051 regs.end = TALLOC (nmatch, regoff_t);
05052 if (regs.start == NULL || regs.end == NULL)
05053 return (int) REG_NOMATCH;
05054 }
05055
05056
05057 ret = re_search (&private_preg, string, len,
05058 0, len,
05059 want_reg_info ? ®s : (struct re_registers *) 0);
05060
05061
05062 if (want_reg_info)
05063 {
05064 if (ret >= 0)
05065 {
05066 unsigned r;
05067
05068 for (r = 0; r < nmatch; r++)
05069 {
05070 pmatch[r].rm_so = regs.start[r];
05071 pmatch[r].rm_eo = regs.end[r];
05072 }
05073 }
05074
05075
05076 free (regs.start);
05077 free (regs.end);
05078 }
05079
05080
05081 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05082 }
05083
05084
05085
05086
05087
05088 size_t
05089 regerror (errcode, preg, errbuf, errbuf_size)
05090 int errcode;
05091 const regex_t *preg;
05092 char *errbuf;
05093 size_t errbuf_size;
05094 {
05095 const char *msg;
05096 size_t msg_size;
05097
05098 if (errcode < 0
05099 || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
05100
05101
05102
05103
05104 abort ();
05105
05106 msg = re_error_msg[errcode];
05107
05108
05109
05110 if (! msg)
05111 msg = "Success";
05112
05113 msg_size = strlen (msg) + 1;
05114
05115 if (errbuf_size != 0)
05116 {
05117 if (msg_size > errbuf_size)
05118 {
05119 strncpy (errbuf, msg, errbuf_size - 1);
05120 errbuf[errbuf_size - 1] = 0;
05121 }
05122 else
05123 strcpy (errbuf, msg);
05124 }
05125
05126 return msg_size;
05127 }
05128
05129
05130
05131
05132 void
05133 regfree (preg)
05134 regex_t *preg;
05135 {
05136 if (preg->buffer != NULL)
05137 free (preg->buffer);
05138 preg->buffer = NULL;
05139
05140 preg->allocated = 0;
05141 preg->used = 0;
05142
05143 if (preg->fastmap != NULL)
05144 free (preg->fastmap);
05145 preg->fastmap = NULL;
05146 preg->fastmap_accurate = 0;
05147
05148 if (preg->translate != NULL)
05149 free (preg->translate);
05150 preg->translate = NULL;
05151 }
05152
05153 #endif
05154
05155
05156
05157
05158
05159
05160
05161