regex.c

Go to the documentation of this file.
00001 
00002 #ifdef __BORLANDC__
00003 #define alloca alloca
00004 
00005 extern void *alloca(unsigned);
00006 
00007 #endif
00008 
00009 #ifdef _MSC_VER
00010 #include <malloc.h>
00011 #endif
00012 
00013 #if defined(__sparc) || defined __hpux
00014 #include <alloca.h>
00015 #endif
00016 
00017 #define const
00018 
00019 /* Extended regular expression matching and search library,
00020    version 0.12.
00021    (Implements POSIX draft P10003.2/D11.2, except for
00022    internationalization features.)
00023 
00024    Copyright (C) 1993 Free Software Foundation, Inc.
00025 
00026    This program is free software; you can redistribute it and/or modify
00027    it under the terms of the GNU General Public License as published by
00028    the Free Software Foundation; either version 2, or (at your option)
00029    any later version.
00030 
00031    This program is distributed in the hope that it will be useful,
00032    but WITHOUT ANY WARRANTY; without even the implied warranty of
00033    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00034    GNU General Public License for more details.
00035 
00036    You should have received a copy of the GNU General Public License
00037    along with this program; if not, write to the Free Software
00038    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
00039 
00040 /* AIX requires this to be the first thing in the file. */
00041 #if defined (_AIX) && !defined (REGEX_MALLOC)
00042   #pragma alloca
00043 #endif
00044 
00045 #define _GNU_SOURCE
00046 
00047 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
00048 #include <sys/types.h>
00049 
00050 #ifdef HAVE_CONFIG_H
00051 #include "config.h"
00052 #endif
00053 
00054 /* The `emacs' switch turns on certain matching commands
00055    that make sense only in Emacs. */
00056 #ifdef emacs
00057 
00058 #include "lisp.h"
00059 #include "buffer.h"
00060 #include "syntax.h"
00061 
00062 /* Emacs uses `NULL' as a predicate.  */
00063 #undef NULL
00064 
00065 #else  /* not emacs */
00066 
00067 /* We used to test for `BSTRING' here, but only GCC and Emacs define
00068    `BSTRING', as far as I know, and neither of them use this code.  */
00069 #if HAVE_STRING_H || STDC_HEADERS
00070 #include <string.h>
00071 #ifndef bcmp
00072 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00073 #endif
00074 #ifndef bcopy
00075 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
00076 #endif
00077 #ifndef bzero
00078 #define bzero(s, n)     memset ((s), 0, (n))
00079 #endif
00080 #else
00081 #include <strings.h>
00082 #endif
00083 
00084 #ifdef STDC_HEADERS
00085 #include <stdlib.h>
00086 #else
00087 char *malloc ();
00088 char *realloc ();
00089 #endif
00090 
00091 
00092 /* Define the syntax stuff for <, >, etc.  */
00093 
00094 /* This must be nonzero for the wordchar and notwordchar pattern
00095    commands in re_match_2.  */
00096 #ifndef Sword
00097 #define Sword 1
00098 #endif
00099 
00100 #ifdef SYNTAX_TABLE
00101 
00102 extern char *re_syntax_table;
00103 
00104 #else /* not SYNTAX_TABLE */
00105 
00106 /* How many characters in the character set.  */
00107 #define CHAR_SET_SIZE 256
00108 
00109 static char re_syntax_table[CHAR_SET_SIZE];
00110 
00111 static void
00112 init_syntax_once ()
00113 {
00114    register int c;
00115    static int done = 0;
00116 
00117    if (done)
00118      return;
00119 
00120    bzero (re_syntax_table, sizeof re_syntax_table);
00121 
00122    for (c = 'a'; c <= 'z'; c++)
00123      re_syntax_table[c] = Sword;
00124 
00125    for (c = 'A'; c <= 'Z'; c++)
00126      re_syntax_table[c] = Sword;
00127 
00128    for (c = '0'; c <= '9'; c++)
00129      re_syntax_table[c] = Sword;
00130 
00131    re_syntax_table['_'] = Sword;
00132 
00133    done = 1;
00134 }
00135 
00136 #endif /* not SYNTAX_TABLE */
00137 
00138 #define SYNTAX(c) re_syntax_table[c]
00139 
00140 #endif /* not emacs */
00141 
00142 /* Get the interface, including the syntax bits.  */
00143 #include "regex.h"
00144 
00145 /* isalpha etc. are used for the character classes.  */
00146 #include <ctype.h>
00147 
00148 /* Jim Meyering writes:
00149 
00150    "... Some ctype macros are valid only for character codes that
00151    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
00152    using /bin/cc or gcc but without giving an ansi option).  So, all
00153    ctype uses should be through macros like ISPRINT...  If
00154    STDC_HEADERS is defined, then autoconf has verified that the ctype
00155    macros don't need to be guarded with references to isascii. ...
00156    Defining isascii to 1 should let any compiler worth its salt
00157    eliminate the && through constant folding."  */
00158 #if ! defined (isascii) || defined (STDC_HEADERS)
00159 #undef isascii
00160 #define isascii(c) 1
00161 #endif
00162 
00163 #ifdef isblank
00164 #define ISBLANK(c) (isascii (c) && isblank (c))
00165 #else
00166 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00167 #endif
00168 #ifdef isgraph
00169 #define ISGRAPH(c) (isascii (c) && isgraph (c))
00170 #else
00171 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
00172 #endif
00173 
00174 #define ISPRINT(c) (isascii (c) && isprint (c))
00175 #define ISDIGIT(c) (isascii (c) && isdigit (c))
00176 #define ISALNUM(c) (isascii (c) && isalnum (c))
00177 #define ISALPHA(c) (isascii (c) && isalpha (c))
00178 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
00179 #define ISLOWER(c) (isascii (c) && islower (c))
00180 #define ISPUNCT(c) (isascii (c) && ispunct (c))
00181 #define ISSPACE(c) (isascii (c) && isspace (c))
00182 #define ISUPPER(c) (isascii (c) && isupper (c))
00183 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
00184 
00185 #ifndef NULL
00186 #define NULL 0
00187 #endif
00188 
00189 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
00190    since ours (we hope) works properly with all combinations of
00191    machines, compilers, `char' and `unsigned char' argument types.
00192    (Per Bothner suggested the basic approach.)  */
00193 #undef SIGN_EXTEND_CHAR
00194 #if __STDC__
00195 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00196 #else  /* not __STDC__ */
00197 /* As in Harbison and Steele.  */
00198 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00199 #endif
00200 
00201 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
00202    use `alloca' instead of `malloc'.  This is because using malloc in
00203    re_search* or re_match* could cause memory leaks when C-g is used in
00204    Emacs; also, malloc is slower and causes storage fragmentation.  On
00205    the other hand, malloc is more portable, and easier to debug.
00206 
00207    Because we sometimes use alloca, some routines have to be macros,
00208    not functions -- `alloca'-allocated space disappears at the end of the
00209    function it is called in.  */
00210 
00211 #ifdef REGEX_MALLOC
00212 
00213 #define REGEX_ALLOCATE malloc
00214 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00215 
00216 #else /* not REGEX_MALLOC  */
00217 
00218 /* Emacs already defines alloca, sometimes.  */
00219 #ifndef alloca
00220 
00221 /* Make alloca work the best possible way.  */
00222 #ifdef __GNUC__
00223 #define alloca __builtin_alloca
00224 #else /* not __GNUC__ */
00225 #if HAVE_ALLOCA_H
00226 #include <alloca.h>
00227 #else /* not __GNUC__ or HAVE_ALLOCA_H */
00228 #ifndef _AIX /* Already did AIX, up at the top.  */
00229 char *alloca ();
00230 #endif /* not _AIX */
00231 #endif /* not HAVE_ALLOCA_H */
00232 #endif /* not __GNUC__ */
00233 
00234 #endif /* not alloca */
00235 
00236 #define REGEX_ALLOCATE alloca
00237 
00238 /* Assumes a `char *destination' variable.  */
00239 #define REGEX_REALLOCATE(source, osize, nsize)                          \
00240   (destination = (char *) alloca (nsize),                               \
00241    bcopy (source, destination, osize),                                  \
00242    destination)
00243 
00244 #endif /* not REGEX_MALLOC */
00245 
00246 
00247 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
00248    `string1' or just past its end.  This works if PTR is NULL, which is
00249    a good thing.  */
00250 #define FIRST_STRING_P(ptr)                                     \
00251   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00252 
00253 /* (Re)Allocate N items of type T using malloc, or fail.  */
00254 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00255 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00256 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00257 
00258 #define BYTEWIDTH 8 /* In bits.  */
00259 
00260 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00261 
00262 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00263 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00264 
00265 typedef char boolean;
00266 #define false 0
00267 #define true 1
00268 
00269 /* These are the command codes that appear in compiled regular
00270    expressions.  Some opcodes are followed by argument bytes.  A
00271    command code can specify any interpretation whatsoever for its
00272    arguments.  Zero bytes may appear in the compiled regular expression.
00273 
00274    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
00275    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
00276    `exactn' we use here must also be 1.  */
00277 
00278 typedef enum
00279 {
00280   no_op = 0,
00281 
00282         /* Followed by one byte giving n, then by n literal bytes.  */
00283   exactn = 1,
00284 
00285         /* Matches any (more or less) character.  */
00286   anychar,
00287 
00288         /* Matches any one char belonging to specified set.  First
00289            following byte is number of bitmap bytes.  Then come bytes
00290            for a bitmap saying which chars are in.  Bits in each byte
00291            are ordered low-bit-first.  A character is in the set if its
00292            bit is 1.  A character too large to have a bit in the map is
00293            automatically not in the set.  */
00294   charset,
00295 
00296         /* Same parameters as charset, but match any character that is
00297            not one of those specified.  */
00298   charset_not,
00299 
00300         /* Start remembering the text that is matched, for storing in a
00301            register.  Followed by one byte with the register number, in
00302            the range 0 to one less than the pattern buffer's re_nsub
00303            field.  Then followed by one byte with the number of groups
00304            inner to this one.  (This last has to be part of the
00305            start_memory only because we need it in the on_failure_jump
00306            of re_match_2.)  */
00307   start_memory,
00308 
00309         /* Stop remembering the text that is matched and store it in a
00310            memory register.  Followed by one byte with the register
00311            number, in the range 0 to one less than `re_nsub' in the
00312            pattern buffer, and one byte with the number of inner groups,
00313            just like `start_memory'.  (We need the number of inner
00314            groups here because we don't have any easy way of finding the
00315            corresponding start_memory when we're at a stop_memory.)  */
00316   stop_memory,
00317 
00318         /* Match a duplicate of something remembered. Followed by one
00319            byte containing the register number.  */
00320   duplicate,
00321 
00322         /* Fail unless at beginning of line.  */
00323   begline,
00324 
00325         /* Fail unless at end of line.  */
00326   endline,
00327 
00328         /* Succeeds if at beginning of buffer (if emacs) or at beginning
00329            of string to be matched (if not).  */
00330   begbuf,
00331 
00332         /* Analogously, for end of buffer/string.  */
00333   endbuf,
00334 
00335         /* Followed by two byte relative address to which to jump.  */
00336   jump,
00337 
00338         /* Same as jump, but marks the end of an alternative.  */
00339   jump_past_alt,
00340 
00341         /* Followed by two-byte relative address of place to resume at
00342            in case of failure.  */
00343   on_failure_jump,
00344 
00345         /* Like on_failure_jump, but pushes a placeholder instead of the
00346            current string position when executed.  */
00347   on_failure_keep_string_jump,
00348 
00349         /* Throw away latest failure point and then jump to following
00350            two-byte relative address.  */
00351   pop_failure_jump,
00352 
00353         /* Change to pop_failure_jump if know won't have to backtrack to
00354            match; otherwise change to jump.  This is used to jump
00355            back to the beginning of a repeat.  If what follows this jump
00356            clearly won't match what the repeat does, such that we can be
00357            sure that there is no use backtracking out of repetitions
00358            already matched, then we change it to a pop_failure_jump.
00359            Followed by two-byte address.  */
00360   maybe_pop_jump,
00361 
00362         /* Jump to following two-byte address, and push a dummy failure
00363            point. This failure point will be thrown away if an attempt
00364            is made to use it for a failure.  A `+' construct makes this
00365            before the first repeat.  Also used as an intermediary kind
00366            of jump when compiling an alternative.  */
00367   dummy_failure_jump,
00368 
00369         /* Push a dummy failure point and continue.  Used at the end of
00370            alternatives.  */
00371   push_dummy_failure,
00372 
00373         /* Followed by two-byte relative address and two-byte number n.
00374            After matching N times, jump to the address upon failure.  */
00375   succeed_n,
00376 
00377         /* Followed by two-byte relative address, and two-byte number n.
00378            Jump to the address N times, then fail.  */
00379   jump_n,
00380 
00381         /* Set the following two-byte relative address to the
00382            subsequent two-byte number.  The address *includes* the two
00383            bytes of number.  */
00384   set_number_at,
00385 
00386   wordchar,     /* Matches any word-constituent character.  */
00387   notwordchar,  /* Matches any char that is not a word-constituent.  */
00388 
00389   wordbeg,      /* Succeeds if at word beginning.  */
00390   wordend,      /* Succeeds if at word end.  */
00391 
00392   wordbound,    /* Succeeds if at a word boundary.  */
00393   notwordbound  /* Succeeds if not at a word boundary.  */
00394 
00395 #ifdef emacs
00396   ,before_dot,  /* Succeeds if before point.  */
00397   at_dot,       /* Succeeds if at point.  */
00398   after_dot,    /* Succeeds if after point.  */
00399 
00400         /* Matches any character whose syntax is specified.  Followed by
00401            a byte which contains a syntax code, e.g., Sword.  */
00402   syntaxspec,
00403 
00404         /* Matches any character whose syntax is not that specified.  */
00405   notsyntaxspec
00406 #endif /* emacs */
00407 } re_opcode_t;
00408 
00409 /* Common operations on the compiled pattern.  */
00410 
00411 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
00412 
00413 #define STORE_NUMBER(destination, number)                               \
00414   do {                                                                  \
00415     (destination)[0] = (number) & 0377;                                 \
00416     (destination)[1] = (number) >> 8;                                   \
00417   } while (0)
00418 
00419 /* Same as STORE_NUMBER, except increment DESTINATION to
00420    the byte after where the number is stored.  Therefore, DESTINATION
00421    must be an lvalue.  */
00422 
00423 #define STORE_NUMBER_AND_INCR(destination, number)                      \
00424   do {                                                                  \
00425     STORE_NUMBER (destination, number);                                 \
00426     (destination) += 2;                                                 \
00427   } while (0)
00428 
00429 /* Put into DESTINATION a number stored in two contiguous bytes starting
00430    at SOURCE.  */
00431 
00432 #define EXTRACT_NUMBER(destination, source)                             \
00433   do {                                                                  \
00434     (destination) = *(source) & 0377;                                   \
00435     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
00436   } while (0)
00437 
00438 #ifdef DEBUG
00439 static void
00440 extract_number (dest, source)
00441     int *dest;
00442     unsigned char *source;
00443 {
00444   int temp = SIGN_EXTEND_CHAR (*(source + 1));
00445   *dest = *source & 0377;
00446   *dest += temp << 8;
00447 }
00448 
00449 #ifndef EXTRACT_MACROS /* To debug the macros.  */
00450 #undef EXTRACT_NUMBER
00451 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00452 #endif /* not EXTRACT_MACROS */
00453 
00454 #endif /* DEBUG */
00455 
00456 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
00457    SOURCE must be an lvalue.  */
00458 
00459 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
00460   do {                                                                  \
00461     EXTRACT_NUMBER (destination, source);                               \
00462     (source) += 2;                                                      \
00463   } while (0)
00464 
00465 #ifdef DEBUG
00466 static void
00467 extract_number_and_incr (destination, source)
00468     int *destination;
00469     unsigned char **source;
00470 {
00471   extract_number (destination, *source);
00472   *source += 2;
00473 }
00474 
00475 #ifndef EXTRACT_MACROS
00476 #undef EXTRACT_NUMBER_AND_INCR
00477 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00478   extract_number_and_incr (&dest, &src)
00479 #endif /* not EXTRACT_MACROS */
00480 
00481 #endif /* DEBUG */
00482 
00483 /* If DEBUG is defined, Regex prints many voluminous messages about what
00484    it is doing (if the variable `debug' is nonzero).  If linked with the
00485    main program in `iregex.c', you can enter patterns and strings
00486    interactively.  And if linked with the main program in `main.c' and
00487    the other test files, you can run the already-written tests.  */
00488 
00489 #ifdef DEBUG
00490 
00491 /* We use standard I/O for debugging.  */
00492 #include <portable_io.h>
00493 
00494 /* It is useful to test things that ``must'' be true when debugging.  */
00495 #include <assert.h>
00496 
00497 static int debug = 0;
00498 
00499 #define DEBUG_STATEMENT(e) e
00500 #define DEBUG_PRINT1(x) if (debug) printf (x)
00501 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00502 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00503 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00504 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
00505   if (debug) print_partial_compiled_pattern (s, e)
00506 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
00507   if (debug) print_double_string (w, s1, sz1, s2, sz2)
00508 
00509 
00510 extern void printchar ();
00511 
00512 /* Print the fastmap in human-readable form.  */
00513 
00514 void
00515 print_fastmap (fastmap)
00516     char *fastmap;
00517 {
00518   unsigned was_a_range = 0;
00519   unsigned i = 0;
00520 
00521   while (i < (1 << BYTEWIDTH))
00522     {
00523       if (fastmap[i++])
00524         {
00525           was_a_range = 0;
00526           printchar (i - 1);
00527           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
00528             {
00529               was_a_range = 1;
00530               i++;
00531             }
00532           if (was_a_range)
00533             {
00534               printf ("-");
00535               printchar (i - 1);
00536             }
00537         }
00538     }
00539   putchar ('\n');
00540 }
00541 
00542 
00543 /* Print a compiled pattern string in human-readable form, starting at
00544    the START pointer into it and ending just before the pointer END.  */
00545 
00546 void
00547 print_partial_compiled_pattern (start, end)
00548     unsigned char *start;
00549     unsigned char *end;
00550 {
00551   int mcnt, mcnt2;
00552   unsigned char *p = start;
00553   unsigned char *pend = end;
00554 
00555   if (start == NULL)
00556     {
00557       printf ("(null)\n");
00558       return;
00559     }
00560 
00561   /* Loop over pattern commands.  */
00562   while (p < pend)
00563     {
00564       printf ("%d:\t", p - start);
00565 
00566       switch ((re_opcode_t) *p++)
00567         {
00568         case no_op:
00569           printf ("/no_op");
00570           break;
00571 
00572         case exactn:
00573           mcnt = *p++;
00574           printf ("/exactn/%d", mcnt);
00575           do
00576             {
00577               putchar ('/');
00578               printchar (*p++);
00579             }
00580           while (--mcnt);
00581           break;
00582 
00583         case start_memory:
00584           mcnt = *p++;
00585           printf ("/start_memory/%d/%d", mcnt, *p++);
00586           break;
00587 
00588         case stop_memory:
00589           mcnt = *p++;
00590           printf ("/stop_memory/%d/%d", mcnt, *p++);
00591           break;
00592 
00593         case duplicate:
00594           printf ("/duplicate/%d", *p++);
00595           break;
00596 
00597         case anychar:
00598           printf ("/anychar");
00599           break;
00600 
00601         case charset:
00602         case charset_not:
00603           {
00604             register int c, last = -100;
00605             register int in_range = 0;
00606 
00607             printf ("/charset [%s",
00608                     (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00609 
00610             assert (p + *p < pend);
00611 
00612             for (c = 0; c < 256; c++)
00613               if (c / 8 < *p
00614                   && (p[1 + (c/8)] & (1 << (c % 8))))
00615                 {
00616                   /* Are we starting a range?  */
00617                   if (last + 1 == c && ! in_range)
00618                     {
00619                       putchar ('-');
00620                       in_range = 1;
00621                     }
00622                   /* Have we broken a range?  */
00623                   else if (last + 1 != c && in_range)
00624               {
00625                       printchar (last);
00626                       in_range = 0;
00627                     }
00628 
00629                   if (! in_range)
00630                     printchar (c);
00631 
00632                   last = c;
00633               }
00634 
00635             if (in_range)
00636               printchar (last);
00637 
00638             putchar (']');
00639 
00640             p += 1 + *p;
00641           }
00642           break;
00643 
00644         case begline:
00645           printf ("/begline");
00646           break;
00647 
00648         case endline:
00649           printf ("/endline");
00650           break;
00651 
00652         case on_failure_jump:
00653           extract_number_and_incr (&mcnt, &p);
00654           printf ("/on_failure_jump to %d", p + mcnt - start);
00655           break;
00656 
00657         case on_failure_keep_string_jump:
00658           extract_number_and_incr (&mcnt, &p);
00659           printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00660           break;
00661 
00662         case dummy_failure_jump:
00663           extract_number_and_incr (&mcnt, &p);
00664           printf ("/dummy_failure_jump to %d", p + mcnt - start);
00665           break;
00666 
00667         case push_dummy_failure:
00668           printf ("/push_dummy_failure");
00669           break;
00670 
00671         case maybe_pop_jump:
00672           extract_number_and_incr (&mcnt, &p);
00673           printf ("/maybe_pop_jump to %d", p + mcnt - start);
00674           break;
00675 
00676         case pop_failure_jump:
00677           extract_number_and_incr (&mcnt, &p);
00678           printf ("/pop_failure_jump to %d", p + mcnt - start);
00679           break;
00680 
00681         case jump_past_alt:
00682           extract_number_and_incr (&mcnt, &p);
00683           printf ("/jump_past_alt to %d", p + mcnt - start);
00684           break;
00685 
00686         case jump:
00687           extract_number_and_incr (&mcnt, &p);
00688           printf ("/jump to %d", p + mcnt - start);
00689           break;
00690 
00691         case succeed_n:
00692           extract_number_and_incr (&mcnt, &p);
00693           extract_number_and_incr (&mcnt2, &p);
00694           printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
00695           break;
00696 
00697         case jump_n:
00698           extract_number_and_incr (&mcnt, &p);
00699           extract_number_and_incr (&mcnt2, &p);
00700           printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
00701           break;
00702 
00703         case set_number_at:
00704           extract_number_and_incr (&mcnt, &p);
00705           extract_number_and_incr (&mcnt2, &p);
00706           printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
00707           break;
00708 
00709         case wordbound:
00710           printf ("/wordbound");
00711           break;
00712 
00713         case notwordbound:
00714           printf ("/notwordbound");
00715           break;
00716 
00717         case wordbeg:
00718           printf ("/wordbeg");
00719           break;
00720 
00721         case wordend:
00722           printf ("/wordend");
00723 
00724 #ifdef emacs
00725         case before_dot:
00726           printf ("/before_dot");
00727           break;
00728 
00729         case at_dot:
00730           printf ("/at_dot");
00731           break;
00732 
00733         case after_dot:
00734           printf ("/after_dot");
00735           break;
00736 
00737         case syntaxspec:
00738           printf ("/syntaxspec");
00739           mcnt = *p++;
00740           printf ("/%d", mcnt);
00741           break;
00742 
00743         case notsyntaxspec:
00744           printf ("/notsyntaxspec");
00745           mcnt = *p++;
00746           printf ("/%d", mcnt);
00747           break;
00748 #endif /* emacs */
00749 
00750         case wordchar:
00751           printf ("/wordchar");
00752           break;
00753 
00754         case notwordchar:
00755           printf ("/notwordchar");
00756           break;
00757 
00758         case begbuf:
00759           printf ("/begbuf");
00760           break;
00761 
00762         case endbuf:
00763           printf ("/endbuf");
00764           break;
00765 
00766         default:
00767           printf ("?%d", *(p-1));
00768         }
00769 
00770       putchar ('\n');
00771     }
00772 
00773   printf ("%d:\tend of pattern.\n", p - start);
00774 }
00775 
00776 
00777 void
00778 print_compiled_pattern (bufp)
00779     struct re_pattern_buffer *bufp;
00780 {
00781   unsigned char *buffer = bufp->buffer;
00782 
00783   print_partial_compiled_pattern (buffer, buffer + bufp->used);
00784   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
00785 
00786   if (bufp->fastmap_accurate && bufp->fastmap)
00787     {
00788       printf ("fastmap: ");
00789       print_fastmap (bufp->fastmap);
00790     }
00791 
00792   printf ("re_nsub: %d\t", bufp->re_nsub);
00793   printf ("regs_alloc: %d\t", bufp->regs_allocated);
00794   printf ("can_be_null: %d\t", bufp->can_be_null);
00795   printf ("newline_anchor: %d\n", bufp->newline_anchor);
00796   printf ("no_sub: %d\t", bufp->no_sub);
00797   printf ("not_bol: %d\t", bufp->not_bol);
00798   printf ("not_eol: %d\t", bufp->not_eol);
00799   printf ("syntax: %d\n", bufp->syntax);
00800   /* Perhaps we should print the translate table?  */
00801 }
00802 
00803 
00804 void
00805 print_double_string (where, string1, size1, string2, size2)
00806     const char *where;
00807     const char *string1;
00808     const char *string2;
00809     int size1;
00810     int size2;
00811 {
00812   unsigned this_char;
00813 
00814   if (where == NULL)
00815     printf ("(null)");
00816   else
00817     {
00818       if (FIRST_STRING_P (where))
00819         {
00820           for (this_char = where - string1; this_char < size1; this_char++)
00821             printchar (string1[this_char]);
00822 
00823           where = string2;
00824         }
00825 
00826       for (this_char = where - string2; this_char < size2; this_char++)
00827         printchar (string2[this_char]);
00828     }
00829 }
00830 
00831 #else /* not DEBUG */
00832 
00833 #undef assert
00834 #define assert(e)
00835 
00836 #define DEBUG_STATEMENT(e)
00837 #define DEBUG_PRINT1(x)
00838 #define DEBUG_PRINT2(x1, x2)
00839 #define DEBUG_PRINT3(x1, x2, x3)
00840 #define DEBUG_PRINT4(x1, x2, x3, x4)
00841 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00842 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00843 
00844 #endif /* not DEBUG */
00845 
00846 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
00847    also be assigned to arbitrarily: each pattern buffer stores its own
00848    syntax, so it can be changed between regex compilations.  */
00849 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
00850 
00851 
00852 /* Specify the precise syntax of regexps for compilation.  This provides
00853    for compatibility for various utilities which historically have
00854    different, incompatible syntaxes.
00855 
00856    The argument SYNTAX is a bit mask comprised of the various bits
00857    defined in regex.h.  We return the old syntax.  */
00858 
00859 reg_syntax_t
00860 re_set_syntax (syntax)
00861   reg_syntax_t syntax;
00862 {
00863   reg_syntax_t ret = re_syntax_options;
00864 
00865   re_syntax_options = syntax;
00866   return ret;
00867 }
00868 
00869 /* This table gives an error message for each of the error codes listed
00870    in regex.h.  Obviously the order here has to be same as there.  */
00871 
00872 static const char *re_error_msg[] =
00873   { NULL,                                       /* REG_NOERROR */
00874     "No match",                                 /* REG_NOMATCH */
00875     "Invalid regular expression",               /* REG_BADPAT */
00876     "Invalid collation character",              /* REG_ECOLLATE */
00877     "Invalid character class name",             /* REG_ECTYPE */
00878     "Trailing backslash",                       /* REG_EESCAPE */
00879     "Invalid back reference",                   /* REG_ESUBREG */
00880     "Unmatched [ or [^",                        /* REG_EBRACK */
00881     "Unmatched ( or \\(",                       /* REG_EPAREN */
00882     "Unmatched \\{",                            /* REG_EBRACE */
00883     "Invalid content of \\{\\}",                /* REG_BADBR */
00884     "Invalid range end",                        /* REG_ERANGE */
00885     "Memory exhausted",                         /* REG_ESPACE */
00886     "Invalid preceding regular expression",     /* REG_BADRPT */
00887     "Premature end of regular expression",      /* REG_EEND */
00888     "Regular expression too big",               /* REG_ESIZE */
00889     "Unmatched ) or \\)",                       /* REG_ERPAREN */
00890   };
00891 
00892 /* Subroutine declarations and macros for regex_compile.  */
00893 /* Macros for the compile stack.  */
00894 
00895 /* Since offsets can go either forwards or backwards, this type needs to
00896    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
00897 typedef int pattern_offset_t;
00898 
00899 /* But patterns can have more than `MAX_REGNUM' registers.  We just
00900    ignore the excess.  */
00901 typedef unsigned regnum_t;
00902 
00903 typedef struct
00904 {
00905   pattern_offset_t begalt_offset;
00906   pattern_offset_t fixup_alt_jump;
00907   pattern_offset_t inner_group_offset;
00908   pattern_offset_t laststart_offset;
00909   regnum_t regnum;
00910 } compile_stack_elt_t;
00911 
00912 
00913 typedef struct
00914 {
00915   compile_stack_elt_t *stack;
00916   unsigned size;
00917   unsigned avail;                       /* Offset of next open position.  */
00918 } compile_stack_type;
00919 
00920 static void store_op1 (
00921 #ifdef __STDC__
00922     re_opcode_t op,
00923     unsigned char *loc,
00924     int arg
00925 #endif
00926     );
00927 
00928 static void insert_op1 
00929 ( 
00930 #ifdef __STDC__
00931   re_opcode_t op, unsigned char *loc, int arg, unsigned char *end
00932 #endif
00933 );
00934 
00935 static void store_op2 (
00936 #ifdef __STDC__
00937     re_opcode_t op,
00938     unsigned char *loc,
00939     int arg1,
00940     int arg2
00941 #endif
00942     );
00943 
00944 static void insert_op2 
00945 (
00946 #ifdef __STDC__
00947   re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end
00948 #endif
00949 
00950 );
00951 
00952 static boolean at_begline_loc_p 
00953 (
00954 #ifdef __STDC__
00955   char const *,char const *, reg_syntax_t
00956 #endif
00957 
00958 );
00959 
00960 static boolean at_endline_loc_p (
00961 #ifdef __STDC__
00962     const char *p,
00963     const char *pend,
00964     int syntax
00965 #endif
00966     );
00967 
00968 static boolean group_in_compile_stack
00969 (
00970 #ifdef __STDC__
00971   compile_stack_type compile_stack,
00972                                       regnum_t regnum
00973 #endif
00974                                      );
00975 static reg_errcode_t compile_range(
00976 #ifdef __STDC__
00977     const char **p_ptr,
00978     const char *pend,
00979     char *translate,
00980     reg_syntax_t syntax,
00981     unsigned char *b
00982 #endif
00983 
00984     );
00985 /* Fetch the next character in the uncompiled pattern---translating it
00986    if necessary.  Also cast from a signed character in the constant
00987    string passed to us by the user to an unsigned char that we can use
00988    as an array index (in, e.g., `translate').  */
00989 #define PATFETCH(c)                                                     \
00990   do {if (p == pend) return REG_EEND;                                   \
00991     c = (unsigned char) *p++;                                           \
00992     if (translate) c = translate[c];                                    \
00993   } while (0)
00994 
00995 /* Fetch the next character in the uncompiled pattern, with no
00996    translation.  */
00997 #define PATFETCH_RAW(c)                                                 \
00998   do {if (p == pend) return REG_EEND;                                   \
00999     c = (unsigned char) *p++;                                           \
01000   } while (0)
01001 
01002 /* Go backwards one character in the pattern.  */
01003 #define PATUNFETCH p--
01004 
01005 
01006 /* If `translate' is non-null, return translate[D], else just D.  We
01007    cast the subscript to translate because some data is declared as
01008    `char *', to avoid warnings when a string constant is passed.  But
01009    when we use a character as a subscript we must make it unsigned.  */
01010 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
01011 
01012 
01013 /* Macros for outputting the compiled pattern into `buffer'.  */
01014 
01015 /* If the buffer isn't allocated when it comes in, use this.  */
01016 #define INIT_BUF_SIZE  32
01017 
01018 /* Make sure we have at least N more bytes of space in buffer.  */
01019 #define GET_BUFFER_SPACE(n)                                             \
01020     while (b - bufp->buffer + (n) > bufp->allocated)                    \
01021       EXTEND_BUFFER ()
01022 
01023 /* Make sure we have one more byte of buffer space and then add C to it.  */
01024 #define BUF_PUSH(c)                                                     \
01025   do {                                                                  \
01026     GET_BUFFER_SPACE (1);                                               \
01027     *b++ = (unsigned char) (c);                                         \
01028   } while (0)
01029 
01030 
01031 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
01032 #define BUF_PUSH_2(c1, c2)                                              \
01033   do {                                                                  \
01034     GET_BUFFER_SPACE (2);                                               \
01035     *b++ = (unsigned char) (c1);                                        \
01036     *b++ = (unsigned char) (c2);                                        \
01037   } while (0)
01038 
01039 
01040 /* As with BUF_PUSH_2, except for three bytes.  */
01041 #define BUF_PUSH_3(c1, c2, c3)                                          \
01042   do {                                                                  \
01043     GET_BUFFER_SPACE (3);                                               \
01044     *b++ = (unsigned char) (c1);                                        \
01045     *b++ = (unsigned char) (c2);                                        \
01046     *b++ = (unsigned char) (c3);                                        \
01047   } while (0)
01048 
01049 
01050 /* Store a jump with opcode OP at LOC to location TO.  We store a
01051    relative address offset by the three bytes the jump itself occupies.  */
01052 #define STORE_JUMP(op, loc, to) \
01053   store_op1 (op, loc, (to) - (loc) - 3)
01054 
01055 /* Likewise, for a two-argument jump.  */
01056 #define STORE_JUMP2(op, loc, to, arg) \
01057   store_op2 (op, loc, (to) - (loc) - 3, arg)
01058 
01059 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
01060 #define INSERT_JUMP(op, loc, to) \
01061   insert_op1 (op, loc, (to) - (loc) - 3, b)
01062 
01063 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
01064 #define INSERT_JUMP2(op, loc, to, arg) \
01065   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
01066 
01067 
01068 /* This is not an arbitrary limit: the arguments which represent offsets
01069    into the pattern are two bytes long.  So if 2^16 bytes turns out to
01070    be too small, many things would have to change.  */
01071 #define MAX_BUF_SIZE (1L << 16)
01072 
01073 
01074 /* Extend the buffer by twice its current size via realloc and
01075    reset the pointers that pointed into the old block to point to the
01076    correct places in the new one.  If extending the buffer results in it
01077    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
01078 #define EXTEND_BUFFER()                                                 \
01079   do {                                                                  \
01080     unsigned char *old_buffer = bufp->buffer;                           \
01081     if (bufp->allocated == MAX_BUF_SIZE)                                \
01082       return REG_ESIZE;                                                 \
01083     bufp->allocated <<= 1;                                              \
01084     if (bufp->allocated > MAX_BUF_SIZE)                                 \
01085       bufp->allocated = MAX_BUF_SIZE;                                   \
01086     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
01087     if (bufp->buffer == NULL)                                           \
01088       return REG_ESPACE;                                                \
01089     /* If the buffer moved, move all the pointers into it.  */          \
01090     if (old_buffer != bufp->buffer)                                     \
01091       {                                                                 \
01092         b = (b - old_buffer) + bufp->buffer;                            \
01093         begalt = (begalt - old_buffer) + bufp->buffer;                  \
01094         if (fixup_alt_jump)                                             \
01095           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01096         if (laststart)                                                  \
01097           laststart = (laststart - old_buffer) + bufp->buffer;          \
01098         if (pending_exact)                                              \
01099           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
01100       }                                                                 \
01101   } while (0)
01102 
01103 
01104 /* Since we have one byte reserved for the register number argument to
01105    {start,stop}_memory, the maximum number of groups we can report
01106    things about is what fits in that byte.  */
01107 #define MAX_REGNUM 255
01108 
01109 
01110 
01111 
01112 #define INIT_COMPILE_STACK_SIZE 32
01113 
01114 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
01115 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
01116 
01117 /* The next available element.  */
01118 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01119 
01120 
01121 /* Set the bit for character C in a list.  */
01122 #define SET_LIST_BIT(c)                               \
01123   (b[((unsigned char) (c)) / BYTEWIDTH]               \
01124    |= 1 << (((unsigned char) c) % BYTEWIDTH))
01125 
01126 
01127 /* Get the next unsigned number in the uncompiled pattern.  */
01128 #define GET_UNSIGNED_NUMBER(num)                                        \
01129   { if (p != pend)                                                      \
01130      {                                                                  \
01131        PATFETCH (c);                                                    \
01132        while (ISDIGIT (c))                                              \
01133          {                                                              \
01134            if (num < 0)                                                 \
01135               num = 0;                                                  \
01136            num = num * 10 + c - '0';                                    \
01137            if (p == pend)                                               \
01138               break;                                                    \
01139            PATFETCH (c);                                                \
01140          }                                                              \
01141        }                                                                \
01142     }
01143 
01144 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
01145 
01146 #define IS_CHAR_CLASS(string)                                           \
01147    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
01148     || STREQ (string, "lower") || STREQ (string, "digit")               \
01149     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
01150     || STREQ (string, "space") || STREQ (string, "print")               \
01151     || STREQ (string, "punct") || STREQ (string, "graph")               \
01152     || STREQ (string, "cntrl") || STREQ (string, "blank"))
01153 
01154 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
01155    Returns one of error codes defined in `regex.h', or zero for success.
01156 
01157    Assumes the `allocated' (and perhaps `buffer') and `translate'
01158    fields are set in BUFP on entry.
01159 
01160    If it succeeds, results are put in BUFP (if it returns an error, the
01161    contents of BUFP are undefined):
01162      `buffer' is the compiled pattern;
01163      `syntax' is set to SYNTAX;
01164      `used' is set to the length of the compiled pattern;
01165      `fastmap_accurate' is zero;
01166      `re_nsub' is the number of subexpressions in PATTERN;
01167      `not_bol' and `not_eol' are zero;
01168 
01169    The `fastmap' and `newline_anchor' fields are neither
01170    examined nor set.  */
01171 
01172 #ifdef __STDC__
01173 static reg_errcode_t
01174 regex_compile (char const*pattern, int size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
01175 #else
01176 static reg_errcode_t
01177 regex_compile (pattern, size, syntax, bufp)
01178   char const*pattern; 
01179   int size; 
01180   reg_syntax_t syntax; 
01181   struct re_pattern_buffer *bufp;
01182 
01183 #endif
01184 {
01185   /* We fetch characters from PATTERN here.  Even though PATTERN is
01186      `char *' (i.e., signed), we declare these variables as unsigned, so
01187      they can be reliably used as array indices.  */
01188   register unsigned char c, c1;
01189 
01190   /* A random tempory spot in PATTERN.  */
01191   const char *p1;
01192 
01193   /* Points to the end of the buffer, where we should append.  */
01194   register unsigned char *b;
01195 
01196   /* Keeps track of unclosed groups.  */
01197   compile_stack_type compile_stack;
01198 
01199   /* Points to the current (ending) position in the pattern.  */
01200   const char *p = pattern;
01201   const char *pend = pattern + size;
01202 
01203   /* How to translate the characters in the pattern.  */
01204   char *translate = bufp->translate;
01205 
01206   /* Address of the count-byte of the most recently inserted `exactn'
01207      command.  This makes it possible to tell if a new exact-match
01208      character can be added to that command or if the character requires
01209      a new `exactn' command.  */
01210   unsigned char *pending_exact = 0;
01211 
01212   /* Address of start of the most recently finished expression.
01213      This tells, e.g., postfix * where to find the start of its
01214      operand.  Reset at the beginning of groups and alternatives.  */
01215   unsigned char *laststart = 0;
01216 
01217   /* Address of beginning of regexp, or inside of last group.  */
01218   unsigned char *begalt;
01219 
01220   /* Place in the uncompiled pattern (i.e., the {) to
01221      which to go back if the interval is invalid.  */
01222   const char *beg_interval;
01223 
01224   /* Address of the place where a forward jump should go to the end of
01225      the containing expression.  Each alternative of an `or' -- except the
01226      last -- ends with a forward jump of this sort.  */
01227   unsigned char *fixup_alt_jump = 0;
01228 
01229   /* Counts open-groups as they are encountered.  Remembered for the
01230      matching close-group on the compile stack, so the same register
01231      number is put in the stop_memory as the start_memory.  */
01232   regnum_t regnum = 0;
01233 
01234 #ifdef DEBUG
01235   DEBUG_PRINT1 ("\nCompiling pattern: ");
01236   if (debug)
01237     {
01238       unsigned debug_count;
01239 
01240       for (debug_count = 0; debug_count < size; debug_count++)
01241         printchar (pattern[debug_count]);
01242       putchar ('\n');
01243     }
01244 #endif /* DEBUG */
01245 
01246   /* Initialize the compile stack.  */
01247   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01248   if (compile_stack.stack == NULL)
01249     return REG_ESPACE;
01250 
01251   compile_stack.size = INIT_COMPILE_STACK_SIZE;
01252   compile_stack.avail = 0;
01253 
01254   /* Initialize the pattern buffer.  */
01255   bufp->syntax = syntax;
01256   bufp->fastmap_accurate = 0;
01257   bufp->not_bol = bufp->not_eol = 0;
01258 
01259   /* Set `used' to zero, so that if we return an error, the pattern
01260      printer (for debugging) will think there's no pattern.  We reset it
01261      at the end.  */
01262   bufp->used = 0;
01263 
01264   /* Always count groups, whether or not bufp->no_sub is set.  */
01265   bufp->re_nsub = 0;
01266 
01267 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01268   /* Initialize the syntax table.  */
01269    init_syntax_once ();
01270 #endif
01271 
01272   if (bufp->allocated == 0)
01273     {
01274       if (bufp->buffer)
01275         { /* If zero allocated, but buffer is non-null, try to realloc
01276              enough space.  This loses if buffer's address is bogus, but
01277              that is the user's responsibility.  */
01278           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01279         }
01280       else
01281         { /* Caller did not allocate a buffer.  Do it for them.  */
01282           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01283         }
01284       if (!bufp->buffer) return REG_ESPACE;
01285 
01286       bufp->allocated = INIT_BUF_SIZE;
01287     }
01288 
01289   begalt = b = bufp->buffer;
01290 
01291   /* Loop through the uncompiled pattern until we're at the end.  */
01292   while (p != pend)
01293     {
01294       PATFETCH (c);
01295 
01296       switch (c)
01297         {
01298         case '^':
01299           {
01300             if (   /* If at start of pattern, it's an operator.  */
01301                    p == pattern + 1
01302                    /* If context independent, it's an operator.  */
01303                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01304                    /* Otherwise, depends on what's come before.  */
01305                 || at_begline_loc_p (pattern, p, syntax))
01306               BUF_PUSH (begline);
01307             else
01308               goto normal_char;
01309           }
01310           break;
01311 
01312 
01313         case '$':
01314           {
01315             if (   /* If at end of pattern, it's an operator.  */
01316                    p == pend
01317                    /* If context independent, it's an operator.  */
01318                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01319                    /* Otherwise, depends on what's next.  */
01320                 || at_endline_loc_p (p, pend, syntax))
01321                BUF_PUSH (endline);
01322              else
01323                goto normal_char;
01324            }
01325            break;
01326 
01327 
01328         case '+':
01329         case '?':
01330           if ((syntax & RE_BK_PLUS_QM)
01331               || (syntax & RE_LIMITED_OPS))
01332             goto normal_char;
01333         handle_plus:
01334         case '*':
01335           /* If there is no previous pattern... */
01336           if (!laststart)
01337             {
01338               if (syntax & RE_CONTEXT_INVALID_OPS)
01339                 return REG_BADRPT;
01340               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01341                 goto normal_char;
01342             }
01343 
01344           {
01345             /* Are we optimizing this jump?  */
01346             boolean keep_string_p = false;
01347 
01348             /* 1 means zero (many) matches is allowed.  */
01349             char zero_times_ok = 0, many_times_ok = 0;
01350 
01351             /* If there is a sequence of repetition chars, collapse it
01352                down to just one (the right one).  We can't combine
01353                interval operators with these because of, e.g., `a{2}*',
01354                which should only match an even number of `a's.  */
01355 
01356             for (;;)
01357               {
01358                 zero_times_ok |= c != '+';
01359                 many_times_ok |= c != '?';
01360 
01361                 if (p == pend)
01362                   break;
01363 
01364                 PATFETCH (c);
01365 
01366                 if (c == '*'
01367                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01368                   ;
01369 
01370                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
01371                   {
01372                     if (p == pend) return REG_EESCAPE;
01373 
01374                     PATFETCH (c1);
01375                     if (!(c1 == '+' || c1 == '?'))
01376                       {
01377                         PATUNFETCH;
01378                         PATUNFETCH;
01379                         break;
01380                       }
01381 
01382                     c = c1;
01383                   }
01384                 else
01385                   {
01386                     PATUNFETCH;
01387                     break;
01388                   }
01389 
01390                 /* If we get here, we found another repeat character.  */
01391                }
01392 
01393             /* Star, etc. applied to an empty pattern is equivalent
01394                to an empty pattern.  */
01395             if (!laststart)
01396               break;
01397 
01398             /* Now we know whether or not zero matches is allowed
01399                and also whether or not two or more matches is allowed.  */
01400             if (many_times_ok)
01401               { /* More than one repetition is allowed, so put in at the
01402                    end a backward relative jump from `b' to before the next
01403                    jump we're going to put in below (which jumps from
01404                    laststart to after this jump).
01405 
01406                    But if we are at the `*' in the exact sequence `.*\n',
01407                    insert an unconditional jump backwards to the .,
01408                    instead of the beginning of the loop.  This way we only
01409                    push a failure point once, instead of every time
01410                    through the loop.  */
01411                 assert (p - 1 > pattern);
01412 
01413                 /* Allocate the space for the jump.  */
01414                 GET_BUFFER_SPACE (3);
01415 
01416                 /* We know we are not at the first character of the pattern,
01417                    because laststart was nonzero.  And we've already
01418                    incremented `p', by the way, to be the character after
01419                    the `*'.  Do we have to do something analogous here
01420                    for null bytes, because of RE_DOT_NOT_NULL?  */
01421                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
01422                     && zero_times_ok
01423                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
01424                     && !(syntax & RE_DOT_NEWLINE))
01425                   { /* We have .*\n.  */
01426                     STORE_JUMP (jump, b, laststart);
01427                     keep_string_p = true;
01428                   }
01429                 else
01430                   /* Anything else.  */
01431                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
01432 
01433                 /* We've added more stuff to the buffer.  */
01434                 b += 3;
01435               }
01436 
01437             /* On failure, jump from laststart to b + 3, which will be the
01438                end of the buffer after this jump is inserted.  */
01439             GET_BUFFER_SPACE (3);
01440             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
01441                                        : on_failure_jump,
01442                          laststart, b + 3);
01443             pending_exact = 0;
01444             b += 3;
01445 
01446             if (!zero_times_ok)
01447               {
01448                 /* At least one repetition is required, so insert a
01449                    `dummy_failure_jump' before the initial
01450                    `on_failure_jump' instruction of the loop. This
01451                    effects a skip over that instruction the first time
01452                    we hit that loop.  */
01453                 GET_BUFFER_SPACE (3);
01454                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
01455                 b += 3;
01456               }
01457             }
01458           break;
01459 
01460 
01461         case '.':
01462           laststart = b;
01463           BUF_PUSH (anychar);
01464           break;
01465 
01466 
01467         case '[':
01468           {
01469             boolean had_char_class = false;
01470 
01471             if (p == pend) return REG_EBRACK;
01472 
01473             /* Ensure that we have enough space to push a charset: the
01474                opcode, the length count, and the bitset; 34 bytes in all.  */
01475             GET_BUFFER_SPACE (34);
01476 
01477             laststart = b;
01478 
01479             /* We test `*p == '^' twice, instead of using an if
01480                statement, so we only need one BUF_PUSH.  */
01481             BUF_PUSH (*p == '^' ? charset_not : charset);
01482             if (*p == '^')
01483               p++;
01484 
01485             /* Remember the first position in the bracket expression.  */
01486             p1 = p;
01487 
01488             /* Push the number of bytes in the bitmap.  */
01489             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
01490 
01491             /* Clear the whole map.  */
01492             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
01493 
01494             /* charset_not matches newline according to a syntax bit.  */
01495             if ((re_opcode_t) b[-2] == charset_not
01496                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
01497               SET_LIST_BIT ('\n');
01498 
01499             /* Read in characters and ranges, setting map bits.  */
01500             for (;;)
01501               {
01502                 if (p == pend) return REG_EBRACK;
01503 
01504                 PATFETCH (c);
01505 
01506                 /* \ might escape characters inside [...] and [^...].  */
01507                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
01508                   {
01509                     if (p == pend) return REG_EESCAPE;
01510 
01511                     PATFETCH (c1);
01512                     SET_LIST_BIT (c1);
01513                     continue;
01514                   }
01515 
01516                 /* Could be the end of the bracket expression.  If it's
01517                    not (i.e., when the bracket expression is `[]' so
01518                    far), the ']' character bit gets set way below.  */
01519                 if (c == ']' && p != p1 + 1)
01520                   break;
01521 
01522                 /* Look ahead to see if it's a range when the last thing
01523                    was a character class.  */
01524                 if (had_char_class && c == '-' && *p != ']')
01525                   return REG_ERANGE;
01526 
01527                 /* Look ahead to see if it's a range when the last thing
01528                    was a character: if this is a hyphen not at the
01529                    beginning or the end of a list, then it's the range
01530                    operator.  */
01531                 if (c == '-'
01532                     && !(p - 2 >= pattern && p[-2] == '[')
01533                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
01534                     && *p != ']')
01535                   {
01536                     reg_errcode_t ret
01537                       = compile_range (&p, pend, translate, syntax, b);
01538                     if (ret != REG_NOERROR) return ret;
01539                   }
01540 
01541                 else if (p[0] == '-' && p[1] != ']')
01542                   { /* This handles ranges made up of characters only.  */
01543                     reg_errcode_t ret;
01544 
01545                     /* Move past the `-'.  */
01546                     PATFETCH (c1);
01547 
01548                     ret = compile_range (&p, pend, translate, syntax, b);
01549                     if (ret != REG_NOERROR) return ret;
01550                   }
01551 
01552                 /* See if we're at the beginning of a possible character
01553                    class.  */
01554 
01555                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
01556                   { /* Leave room for the null.  */
01557                     char str[CHAR_CLASS_MAX_LENGTH + 1];
01558 
01559                     PATFETCH (c);
01560                     c1 = 0;
01561 
01562                     /* If pattern is `[[:'.  */
01563                     if (p == pend) return REG_EBRACK;
01564 
01565                     for (;;)
01566                       {
01567                         PATFETCH (c);
01568                         if (c == ':' || c == ']' || p == pend
01569                             || c1 == CHAR_CLASS_MAX_LENGTH)
01570                           break;
01571                         str[c1++] = c;
01572                       }
01573                     str[c1] = '\0';
01574 
01575                     /* If isn't a word bracketed by `[:' and:`]':
01576                        undo the ending character, the letters, and leave
01577                        the leading `:' and `[' (but set bits for them).  */
01578                     if (c == ':' && *p == ']')
01579                       {
01580                         int ch;
01581                         boolean is_alnum = STREQ (str, "alnum");
01582                         boolean is_alpha = STREQ (str, "alpha");
01583                         boolean is_blank = STREQ (str, "blank");
01584                         boolean is_cntrl = STREQ (str, "cntrl");
01585                         boolean is_digit = STREQ (str, "digit");
01586                         boolean is_graph = STREQ (str, "graph");
01587                         boolean is_lower = STREQ (str, "lower");
01588                         boolean is_print = STREQ (str, "print");
01589                         boolean is_punct = STREQ (str, "punct");
01590                         boolean is_space = STREQ (str, "space");
01591                         boolean is_upper = STREQ (str, "upper");
01592                         boolean is_xdigit = STREQ (str, "xdigit");
01593 
01594                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
01595 
01596                         /* Throw away the ] at the end of the character
01597                            class.  */
01598                         PATFETCH (c);
01599 
01600                         if (p == pend) return REG_EBRACK;
01601 
01602                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
01603                           {
01604                             if (   (is_alnum  && ISALNUM (ch))
01605                                 || (is_alpha  && ISALPHA (ch))
01606                                 || (is_blank  && ISBLANK (ch))
01607                                 || (is_cntrl  && ISCNTRL (ch))
01608                                 || (is_digit  && ISDIGIT (ch))
01609                                 || (is_graph  && ISGRAPH (ch))
01610                                 || (is_lower  && ISLOWER (ch))
01611                                 || (is_print  && ISPRINT (ch))
01612                                 || (is_punct  && ISPUNCT (ch))
01613                                 || (is_space  && ISSPACE (ch))
01614                                 || (is_upper  && ISUPPER (ch))
01615                                 || (is_xdigit && ISXDIGIT (ch)))
01616                             SET_LIST_BIT (ch);
01617                           }
01618                         had_char_class = true;
01619                       }
01620                     else
01621                       {
01622                         c1++;
01623                         while (c1--)
01624                           PATUNFETCH;
01625                         SET_LIST_BIT ('[');
01626                         SET_LIST_BIT (':');
01627                         had_char_class = false;
01628                       }
01629                   }
01630                 else
01631                   {
01632                     had_char_class = false;
01633                     SET_LIST_BIT (c);
01634                   }
01635               }
01636 
01637             /* Discard any (non)matching list bytes that are all 0 at the
01638                end of the map.  Decrease the map-length byte too.  */
01639             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
01640               b[-1]--;
01641             b += b[-1];
01642           }
01643           break;
01644 
01645 
01646         case '(':
01647           if (syntax & RE_NO_BK_PARENS)
01648             goto handle_open;
01649           else
01650             goto normal_char;
01651 
01652 
01653         case ')':
01654           if (syntax & RE_NO_BK_PARENS)
01655             goto handle_close;
01656           else
01657             goto normal_char;
01658 
01659 
01660         case '\n':
01661           if (syntax & RE_NEWLINE_ALT)
01662             goto handle_alt;
01663           else
01664             goto normal_char;
01665 
01666 
01667         case '|':
01668           if (syntax & RE_NO_BK_VBAR)
01669             goto handle_alt;
01670           else
01671             goto normal_char;
01672 
01673 
01674         case '{':
01675            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
01676              goto handle_interval;
01677            else
01678              goto normal_char;
01679 
01680 
01681         case '\\':
01682           if (p == pend) return REG_EESCAPE;
01683 
01684           /* Do not translate the character after the \, so that we can
01685              distinguish, e.g., \B from \b, even if we normally would
01686              translate, e.g., B to b.  */
01687           PATFETCH_RAW (c);
01688 
01689           switch (c)
01690             {
01691             case '(':
01692               if (syntax & RE_NO_BK_PARENS)
01693                 goto normal_backslash;
01694 
01695             handle_open:
01696               bufp->re_nsub++;
01697               regnum++;
01698 
01699               if (COMPILE_STACK_FULL)
01700                 {
01701                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
01702                             compile_stack_elt_t);
01703                   if (compile_stack.stack == NULL) return REG_ESPACE;
01704 
01705                   compile_stack.size <<= 1;
01706                 }
01707 
01708               /* These are the values to restore when we hit end of this
01709                  group.  They are all relative offsets, so that if the
01710                  whole pattern moves because of realloc, they will still
01711                  be valid.  */
01712               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
01713               COMPILE_STACK_TOP.fixup_alt_jump
01714                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
01715               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
01716               COMPILE_STACK_TOP.regnum = regnum;
01717 
01718               /* We will eventually replace the 0 with the number of
01719                  groups inner to this one.  But do not push a
01720                  start_memory for groups beyond the last one we can
01721                  represent in the compiled pattern.  */
01722               if (regnum <= MAX_REGNUM)
01723                 {
01724                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
01725                   BUF_PUSH_3 (start_memory, regnum, 0);
01726                 }
01727 
01728               compile_stack.avail++;
01729 
01730               fixup_alt_jump = 0;
01731               laststart = 0;
01732               begalt = b;
01733               /* If we've reached MAX_REGNUM groups, then this open
01734                  won't actually generate any code, so we'll have to
01735                  clear pending_exact explicitly.  */
01736               pending_exact = 0;
01737               break;
01738 
01739 
01740             case ')':
01741               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
01742 
01743               if (COMPILE_STACK_EMPTY)
01744                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
01745                   goto normal_backslash;
01746                 else
01747                   return REG_ERPAREN;
01748 
01749             handle_close:
01750               if (fixup_alt_jump)
01751                 { /* Push a dummy failure point at the end of the
01752                      alternative for a possible future
01753                      `pop_failure_jump' to pop.  See comments at
01754                      `push_dummy_failure' in `re_match_2'.  */
01755                   BUF_PUSH (push_dummy_failure);
01756 
01757                   /* We allocated space for this jump when we assigned
01758                      to `fixup_alt_jump', in the `handle_alt' case below.  */
01759                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
01760                 }
01761 
01762               /* See similar code for backslashed left paren above.  */
01763               if (COMPILE_STACK_EMPTY)
01764                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
01765                   goto normal_char;
01766                 else
01767                   return REG_ERPAREN;
01768 
01769               /* Since we just checked for an empty stack above, this
01770                  ``can't happen''.  */
01771               assert (compile_stack.avail != 0);
01772               {
01773                 /* We don't just want to restore into `regnum', because
01774                    later groups should continue to be numbered higher,
01775                    as in `(ab)c(de)' -- the second group is #2.  */
01776                 regnum_t this_group_regnum;
01777 
01778                 compile_stack.avail--;
01779                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
01780                 fixup_alt_jump
01781                   = COMPILE_STACK_TOP.fixup_alt_jump
01782                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
01783                     : 0;
01784                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
01785                 this_group_regnum = COMPILE_STACK_TOP.regnum;
01786                 /* If we've reached MAX_REGNUM groups, then this open
01787                    won't actually generate any code, so we'll have to
01788                    clear pending_exact explicitly.  */
01789                 pending_exact = 0;
01790 
01791                 /* We're at the end of the group, so now we know how many
01792                    groups were inside this one.  */
01793                 if (this_group_regnum <= MAX_REGNUM)
01794                   {
01795                     unsigned char *inner_group_loc
01796                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
01797 
01798                     *inner_group_loc = regnum - this_group_regnum;
01799                     BUF_PUSH_3 (stop_memory, this_group_regnum,
01800                                 regnum - this_group_regnum);
01801                   }
01802               }
01803               break;
01804 
01805 
01806             case '|':                                   /* `\|'.  */
01807               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
01808                 goto normal_backslash;
01809             handle_alt:
01810               if (syntax & RE_LIMITED_OPS)
01811                 goto normal_char;
01812 
01813               /* Insert before the previous alternative a jump which
01814                  jumps to this alternative if the former fails.  */
01815               GET_BUFFER_SPACE (3);
01816               INSERT_JUMP (on_failure_jump, begalt, b + 6);
01817               pending_exact = 0;
01818               b += 3;
01819 
01820               /* The alternative before this one has a jump after it
01821                  which gets executed if it gets matched.  Adjust that
01822                  jump so it will jump to this alternative's analogous
01823                  jump (put in below, which in turn will jump to the next
01824                  (if any) alternative's such jump, etc.).  The last such
01825                  jump jumps to the correct final destination.  A picture:
01826                           _____ _____
01827                           |   | |   |
01828                           |   v |   v
01829                          a | b   | c
01830 
01831                  If we are at `b', then fixup_alt_jump right now points to a
01832                  three-byte space after `a'.  We'll put in the jump, set
01833                  fixup_alt_jump to right after `b', and leave behind three
01834                  bytes which we'll fill in when we get to after `c'.  */
01835 
01836               if (fixup_alt_jump)
01837                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
01838 
01839               /* Mark and leave space for a jump after this alternative,
01840                  to be filled in later either by next alternative or
01841                  when know we're at the end of a series of alternatives.  */
01842               fixup_alt_jump = b;
01843               GET_BUFFER_SPACE (3);
01844               b += 3;
01845 
01846               laststart = 0;
01847               begalt = b;
01848               break;
01849 
01850 
01851             case '{':
01852               /* If \{ is a literal.  */
01853               if (!(syntax & RE_INTERVALS)
01854                      /* If we're at `\{' and it's not the open-interval
01855                         operator.  */
01856                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
01857                   || (p - 2 == pattern  &&  p == pend))
01858                 goto normal_backslash;
01859 
01860             handle_interval:
01861               {
01862                 /* If got here, then the syntax allows intervals.  */
01863 
01864                 /* At least (most) this many matches must be made.  */
01865                 int lower_bound = -1, upper_bound = -1;
01866 
01867                 beg_interval = p - 1;
01868 
01869                 if (p == pend)
01870                   {
01871                     if (syntax & RE_NO_BK_BRACES)
01872                       goto unfetch_interval;
01873                     else
01874                       return REG_EBRACE;
01875                   }
01876 
01877                 GET_UNSIGNED_NUMBER (lower_bound);
01878 
01879                 if (c == ',')
01880                   {
01881                     GET_UNSIGNED_NUMBER (upper_bound);
01882                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
01883                   }
01884                 else
01885                   /* Interval such as `{1}' => match exactly once. */
01886                   upper_bound = lower_bound;
01887 
01888                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
01889                     || lower_bound > upper_bound)
01890                   {
01891                     if (syntax & RE_NO_BK_BRACES)
01892                       goto unfetch_interval;
01893                     else
01894                       return REG_BADBR;
01895                   }
01896 
01897                 if (!(syntax & RE_NO_BK_BRACES))
01898                   {
01899                     if (c != '\\') return REG_EBRACE;
01900 
01901                     PATFETCH (c);
01902                   }
01903 
01904                 if (c != '}')
01905                   {
01906                     if (syntax & RE_NO_BK_BRACES)
01907                       goto unfetch_interval;
01908                     else
01909                       return REG_BADBR;
01910                   }
01911 
01912                 /* We just parsed a valid interval.  */
01913 
01914                 /* If it's invalid to have no preceding re.  */
01915                 if (!laststart)
01916                   {
01917                     if (syntax & RE_CONTEXT_INVALID_OPS)
01918                       return REG_BADRPT;
01919                     else if (syntax & RE_CONTEXT_INDEP_OPS)
01920                       laststart = b;
01921                     else
01922                       goto unfetch_interval;
01923                   }
01924 
01925                 /* If the upper bound is zero, don't want to succeed at
01926                    all; jump from `laststart' to `b + 3', which will be
01927                    the end of the buffer after we insert the jump.  */
01928                  if (upper_bound == 0)
01929                    {
01930                      GET_BUFFER_SPACE (3);
01931                      INSERT_JUMP (jump, laststart, b + 3);
01932                      b += 3;
01933                    }
01934 
01935                  /* Otherwise, we have a nontrivial interval.  When
01936                     we're all done, the pattern will look like:
01937                       set_number_at <jump count> <upper bound>
01938                       set_number_at <succeed_n count> <lower bound>
01939                       succeed_n <after jump addr> <succed_n count>
01940                       <body of loop>
01941                       jump_n <succeed_n addr> <jump count>
01942                     (The upper bound and `jump_n' are omitted if
01943                     `upper_bound' is 1, though.)  */
01944                  else
01945                    { /* If the upper bound is > 1, we need to insert
01946                         more at the end of the loop.  */
01947                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
01948 
01949                      GET_BUFFER_SPACE (nbytes);
01950 
01951                      /* Initialize lower bound of the `succeed_n', even
01952                         though it will be set during matching by its
01953                         attendant `set_number_at' (inserted next),
01954                         because `re_compile_fastmap' needs to know.
01955                         Jump to the `jump_n' we might insert below.  */
01956                      INSERT_JUMP2 (succeed_n, laststart,
01957                                    b + 5 + (upper_bound > 1) * 5,
01958                                    lower_bound);
01959                      b += 5;
01960 
01961                      /* Code to initialize the lower bound.  Insert
01962                         before the `succeed_n'.  The `5' is the last two
01963                         bytes of this `set_number_at', plus 3 bytes of
01964                         the following `succeed_n'.  */
01965                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
01966                      b += 5;
01967 
01968                      if (upper_bound > 1)
01969                        { /* More than one repetition is allowed, so
01970                             append a backward jump to the `succeed_n'
01971                             that starts this interval.
01972 
01973                             When we've reached this during matching,
01974                             we'll have matched the interval once, so
01975                             jump back only `upper_bound - 1' times.  */
01976                          STORE_JUMP2 (jump_n, b, laststart + 5,
01977                                       upper_bound - 1);
01978                          b += 5;
01979 
01980                          /* The location we want to set is the second
01981                             parameter of the `jump_n'; that is `b-2' as
01982                             an absolute address.  `laststart' will be
01983                             the `set_number_at' we're about to insert;
01984                             `laststart+3' the number to set, the source
01985                             for the relative address.  But we are
01986                             inserting into the middle of the pattern --
01987                             so everything is getting moved up by 5.
01988                             Conclusion: (b - 2) - (laststart + 3) + 5,
01989                             i.e., b - laststart.
01990 
01991                             We insert this at the beginning of the loop
01992                             so that if we fail during matching, we'll
01993                             reinitialize the bounds.  */
01994                          insert_op2 (set_number_at, laststart, b - laststart,
01995                                      upper_bound - 1, b);
01996                          b += 5;
01997                        }
01998                    }
01999                 pending_exact = 0;
02000                 beg_interval = NULL;
02001               }
02002               break;
02003 
02004             unfetch_interval:
02005               /* If an invalid interval, match the characters as literals.  */
02006                assert (beg_interval);
02007                p = beg_interval;
02008                beg_interval = NULL;
02009 
02010                /* normal_char and normal_backslash need `c'.  */
02011                PATFETCH (c);
02012 
02013                if (!(syntax & RE_NO_BK_BRACES))
02014                  {
02015                    if (p > pattern  &&  p[-1] == '\\')
02016                      goto normal_backslash;
02017                  }
02018                goto normal_char;
02019 
02020 #ifdef emacs
02021             /* There is no way to specify the before_dot and after_dot
02022                operators.  rms says this is ok.  --karl  */
02023             case '=':
02024               BUF_PUSH (at_dot);
02025               break;
02026 
02027             case 's':
02028               laststart = b;
02029               PATFETCH (c);
02030               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02031               break;
02032 
02033             case 'S':
02034               laststart = b;
02035               PATFETCH (c);
02036               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02037               break;
02038 #endif /* emacs */
02039 
02040 
02041             case 'w':
02042               laststart = b;
02043               BUF_PUSH (wordchar);
02044               break;
02045 
02046 
02047             case 'W':
02048               laststart = b;
02049               BUF_PUSH (notwordchar);
02050               break;
02051 
02052 
02053             case '<':
02054               BUF_PUSH (wordbeg);
02055               break;
02056 
02057             case '>':
02058               BUF_PUSH (wordend);
02059               break;
02060 
02061             case 'b':
02062               BUF_PUSH (wordbound);
02063               break;
02064 
02065             case 'B':
02066               BUF_PUSH (notwordbound);
02067               break;
02068 
02069             case '`':
02070               BUF_PUSH (begbuf);
02071               break;
02072 
02073             case '\'':
02074               BUF_PUSH (endbuf);
02075               break;
02076 
02077             case '1': case '2': case '3': case '4': case '5':
02078             case '6': case '7': case '8': case '9':
02079               if (syntax & RE_NO_BK_REFS)
02080                 goto normal_char;
02081 
02082               c1 = c - '0';
02083 
02084               if (c1 > regnum)
02085                 return REG_ESUBREG;
02086 
02087               /* Can't back reference to a subexpression if inside of it.  */
02088               if (group_in_compile_stack (compile_stack, c1))
02089                 goto normal_char;
02090 
02091               laststart = b;
02092               BUF_PUSH_2 (duplicate, c1);
02093               break;
02094 
02095 
02096             case '+':
02097             case '?':
02098               if (syntax & RE_BK_PLUS_QM)
02099                 goto handle_plus;
02100               else
02101                 goto normal_backslash;
02102 
02103             default:
02104             normal_backslash:
02105               /* You might think it would be useful for \ to mean
02106                  not to translate; but if we don't translate it
02107                  it will never match anything.  */
02108               c = TRANSLATE (c);
02109               goto normal_char;
02110             }
02111           break;
02112 
02113 
02114         default:
02115         /* Expects the character in `c'.  */
02116         normal_char:
02117               /* If no exactn currently being built.  */
02118           if (!pending_exact
02119 
02120               /* If last exactn not at current position.  */
02121               || pending_exact + *pending_exact + 1 != b
02122 
02123               /* We have only one byte following the exactn for the count.  */
02124               || *pending_exact == (1 << BYTEWIDTH) - 1
02125 
02126               /* If followed by a repetition operator.  */
02127               || *p == '*' || *p == '^'
02128               || ((syntax & RE_BK_PLUS_QM)
02129                   ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02130                   : (*p == '+' || *p == '?'))
02131               || ((syntax & RE_INTERVALS)
02132                   && ((syntax & RE_NO_BK_BRACES)
02133                       ? *p == '{'
02134                       : (p[0] == '\\' && p[1] == '{'))))
02135             {
02136               /* Start building a new exactn.  */
02137 
02138               laststart = b;
02139 
02140               BUF_PUSH_2 (exactn, 0);
02141               pending_exact = b - 1;
02142             }
02143 
02144           BUF_PUSH (c);
02145           (*pending_exact)++;
02146           break;
02147         } /* switch (c) */
02148     } /* while p != pend */
02149 
02150 
02151   /* Through the pattern now.  */
02152 
02153   if (fixup_alt_jump)
02154     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02155 
02156   if (!COMPILE_STACK_EMPTY)
02157     return REG_EPAREN;
02158 
02159   free (compile_stack.stack);
02160 
02161   /* We have succeeded; set the length of the buffer.  */
02162   bufp->used = b - bufp->buffer;
02163 
02164 #ifdef DEBUG
02165   if (debug)
02166     {
02167       DEBUG_PRINT1 ("\nCompiled pattern: \n");
02168       print_compiled_pattern (bufp);
02169     }
02170 #endif /* DEBUG */
02171 
02172   return REG_NOERROR;
02173 } /* regex_compile */
02174 
02175 /* Subroutines for `regex_compile'.  */
02176 
02177 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
02178 
02179 static void
02180 store_op1 (op, loc, arg)
02181     re_opcode_t op;
02182     unsigned char *loc;
02183     int arg;
02184 {
02185   *loc = (unsigned char) op;
02186   STORE_NUMBER (loc + 1, arg);
02187 }
02188 
02189 
02190 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
02191 
02192 static void
02193 store_op2 (op, loc, arg1, arg2)
02194     re_opcode_t op;
02195     unsigned char *loc;
02196     int arg1, arg2;
02197 {
02198   *loc = (unsigned char) op;
02199   STORE_NUMBER (loc + 1, arg1);
02200   STORE_NUMBER (loc + 3, arg2);
02201 }
02202 
02203 
02204 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
02205    for OP followed by two-byte integer parameter ARG.  */
02206 
02207 static void
02208 insert_op1 (op, loc, arg, end)
02209     re_opcode_t op;
02210     unsigned char *loc;
02211     int arg;
02212     unsigned char *end;
02213 {
02214   register unsigned char *pfrom = end;
02215   register unsigned char *pto = end + 3;
02216 
02217   while (pfrom != loc)
02218     *--pto = *--pfrom;
02219 
02220   store_op1 (op, loc, arg);
02221 }
02222 
02223 
02224 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
02225 
02226 static void
02227 insert_op2 (op, loc, arg1, arg2, end)
02228     re_opcode_t op;
02229     unsigned char *loc;
02230     int arg1, arg2;
02231     unsigned char *end;
02232 {
02233   register unsigned char *pfrom = end;
02234   register unsigned char *pto = end + 5;
02235 
02236   while (pfrom != loc)
02237     *--pto = *--pfrom;
02238 
02239   store_op2 (op, loc, arg1, arg2);
02240 }
02241 
02242 
02243 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
02244    after an alternative or a begin-subexpression.  We assume there is at
02245    least one character before the ^.  */
02246 
02247 static boolean
02248 at_begline_loc_p (pattern, p, syntax)
02249     const char *pattern, *p;
02250     reg_syntax_t syntax;
02251 {
02252   const char *prev = p - 2;
02253   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02254 
02255   return
02256        /* After a subexpression?  */
02257        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02258        /* After an alternative?  */
02259     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02260 }
02261 
02262 
02263 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
02264    at least one character after the $, i.e., `P < PEND'.  */
02265 
02266 static boolean
02267 at_endline_loc_p (p, pend, syntax)
02268     const char *p, *pend;
02269     int syntax;
02270 {
02271   const char *next = p;
02272   boolean next_backslash = *next == '\\';
02273   const char *next_next = p + 1 < pend ? p + 1 : NULL;
02274 
02275   return
02276        /* Before a subexpression?  */
02277        (syntax & RE_NO_BK_PARENS ? *next == ')'
02278         : next_backslash && next_next && *next_next == ')')
02279        /* Before an alternative?  */
02280     || (syntax & RE_NO_BK_VBAR ? *next == '|'
02281         : next_backslash && next_next && *next_next == '|');
02282 }
02283 
02284 
02285 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
02286    false if it's not.  */
02287 
02288 static boolean
02289 group_in_compile_stack (compile_stack, regnum)
02290     compile_stack_type compile_stack;
02291     regnum_t regnum;
02292 {
02293   int this_element;
02294 
02295   for (this_element = compile_stack.avail - 1;
02296        this_element >= 0;
02297        this_element--)
02298     if (compile_stack.stack[this_element].regnum == regnum)
02299       return true;
02300 
02301   return false;
02302 }
02303 
02304 
02305 /* Read the ending character of a range (in a bracket expression) from the
02306    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
02307    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
02308    Then we set the translation of all bits between the starting and
02309    ending characters (inclusive) in the compiled pattern B.
02310 
02311    Return an error code.
02312 
02313    We use these short variable names so we can use the same macros as
02314    `regex_compile' itself.  */
02315 
02316 static reg_errcode_t
02317 compile_range (p_ptr, pend, translate, syntax, b)
02318     const char **p_ptr, *pend;
02319     char *translate;
02320     reg_syntax_t syntax;
02321     unsigned char *b;
02322 {
02323   unsigned this_char;
02324 
02325   const char *p = *p_ptr;
02326   int range_start, range_end;
02327 
02328   if (p == pend)
02329     return REG_ERANGE;
02330 
02331   /* Even though the pattern is a signed `char *', we need to fetch
02332      with unsigned char *'s; if the high bit of the pattern character
02333      is set, the range endpoints will be negative if we fetch using a
02334      signed char *.
02335 
02336      We also want to fetch the endpoints without translating them; the
02337      appropriate translation is done in the bit-setting loop below.  */
02338   range_start = ((unsigned char *) p)[-2];
02339   range_end   = ((unsigned char *) p)[0];
02340 
02341   /* Have to increment the pointer into the pattern string, so the
02342      caller isn't still at the ending character.  */
02343   (*p_ptr)++;
02344 
02345   /* If the start is after the end, the range is empty.  */
02346   if (range_start > range_end)
02347     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
02348 
02349   /* Here we see why `this_char' has to be larger than an `unsigned
02350      char' -- the range is inclusive, so if `range_end' == 0xff
02351      (assuming 8-bit characters), we would otherwise go into an infinite
02352      loop, since all characters <= 0xff.  */
02353   for (this_char = range_start; this_char <= range_end; this_char++)
02354     {
02355       SET_LIST_BIT (TRANSLATE (this_char));
02356     }
02357 
02358   return REG_NOERROR;
02359 }
02360 
02361 /* Failure stack declarations and macros; both re_compile_fastmap and
02362    re_match_2 use a failure stack.  These have to be macros because of
02363    REGEX_ALLOCATE.  */
02364 
02365 
02366 /* Number of failure points for which to initially allocate space
02367    when matching.  If this number is exceeded, we allocate more
02368    space, so it is not a hard limit.  */
02369 #ifndef INIT_FAILURE_ALLOC
02370 #define INIT_FAILURE_ALLOC 5
02371 #endif
02372 
02373 /* Roughly the maximum number of failure points on the stack.  Would be
02374    exactly that if always used MAX_FAILURE_SPACE each time we failed.
02375    This is a variable only so users of regex can assign to it; we never
02376    change it ourselves.  */
02377 int re_max_failures = 2000;
02378 
02379 typedef const unsigned char *fail_stack_elt_t;
02380 
02381 typedef struct
02382 {
02383   fail_stack_elt_t *stack;
02384   unsigned size;
02385   unsigned avail;                       /* Offset of next open position.  */
02386 } fail_stack_type;
02387 
02388 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
02389 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
02390 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
02391 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
02392 
02393 
02394 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
02395 
02396 #define INIT_FAIL_STACK()                                               \
02397   do {                                                                  \
02398     fail_stack.stack = (fail_stack_elt_t *)                             \
02399       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
02400                                                                         \
02401     if (fail_stack.stack == NULL)                                       \
02402       return -2;                                                        \
02403                                                                         \
02404     fail_stack.size = INIT_FAILURE_ALLOC;                               \
02405     fail_stack.avail = 0;                                               \
02406   } while (0)
02407 
02408 
02409 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
02410 
02411    Return 1 if succeeds, and 0 if either ran out of memory
02412    allocating space for it or it was already too large.
02413 
02414    REGEX_REALLOCATE requires `destination' be declared.   */
02415 
02416 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
02417   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
02418    ? 0                                                                  \
02419    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
02420         REGEX_REALLOCATE ((fail_stack).stack,                           \
02421           (fail_stack).size * sizeof (fail_stack_elt_t),                \
02422           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
02423                                                                         \
02424       (fail_stack).stack == NULL                                        \
02425       ? 0                                                               \
02426       : ((fail_stack).size <<= 1,                                       \
02427          1)))
02428 
02429 
02430 /* Push PATTERN_OP on FAIL_STACK.
02431 
02432    Return 1 if was able to do so and 0 if ran out of memory allocating
02433    space to do so.  */
02434 #define PUSH_PATTERN_OP(pattern_op, fail_stack)                         \
02435   ((FAIL_STACK_FULL ()                                                  \
02436     && !DOUBLE_FAIL_STACK (fail_stack))                                 \
02437     ? 0                                                                 \
02438     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,           \
02439        1))
02440 
02441 /* This pushes an item onto the failure stack.  Must be a four-byte
02442    value.  Assumes the variable `fail_stack'.  Probably should only
02443    be called from within `PUSH_FAILURE_POINT'.  */
02444 #define PUSH_FAILURE_ITEM(item)                                         \
02445   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
02446 
02447 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
02448 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
02449 
02450 /* Used to omit pushing failure point id's when we're not debugging.  */
02451 #ifdef DEBUG
02452 #define DEBUG_PUSH PUSH_FAILURE_ITEM
02453 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
02454 #else
02455 #define DEBUG_PUSH(item)
02456 #define DEBUG_POP(item_addr)
02457 #endif
02458 
02459 
02460 /* Push the information about the state we will need
02461    if we ever fail back to it.
02462 
02463    Requires variables fail_stack, regstart, regend, reg_info, and
02464    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
02465    declared.
02466 
02467    Does `return FAILURE_CODE' if runs out of memory.  */
02468 
02469 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
02470   do {                                                                  \
02471     char *destination;                                                  \
02472     /* Must be int, so when we don't save any registers, the arithmetic \
02473        of 0 + -1 isn't done as unsigned.  */                            \
02474     int this_reg;                                                       \
02475                                                                         \
02476     DEBUG_STATEMENT (failure_id++);                                     \
02477     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
02478     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
02479     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
02480     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
02481                                                                         \
02482     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
02483     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
02484                                                                         \
02485     /* Ensure we have enough space allocated for what we will push.  */ \
02486     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
02487       {                                                                 \
02488         if (!DOUBLE_FAIL_STACK (fail_stack))                    \
02489           return failure_code;                                          \
02490                                                                         \
02491         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
02492                        (fail_stack).size);                              \
02493         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
02494       }                                                                 \
02495                                                                         \
02496     /* Push the info, starting with the registers.  */                  \
02497     DEBUG_PRINT1 ("\n");                                                \
02498                                                                         \
02499     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
02500          this_reg++)                                                    \
02501       {                                                                 \
02502         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
02503         DEBUG_STATEMENT (num_regs_pushed++);                            \
02504                                                                         \
02505         DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
02506         PUSH_FAILURE_ITEM (regstart[this_reg]);                         \
02507                                                                         \
02508         DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
02509         PUSH_FAILURE_ITEM (regend[this_reg]);                           \
02510                                                                         \
02511         DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
02512         DEBUG_PRINT2 (" match_null=%d",                                 \
02513                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
02514         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
02515         DEBUG_PRINT2 (" matched_something=%d",                          \
02516                       MATCHED_SOMETHING (reg_info[this_reg]));          \
02517         DEBUG_PRINT2 (" ever_matched=%d",                               \
02518                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));     \
02519         DEBUG_PRINT1 ("\n");                                            \
02520         PUSH_FAILURE_ITEM (reg_info[this_reg].word);                    \
02521       }                                                                 \
02522                                                                         \
02523     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
02524     PUSH_FAILURE_ITEM (lowest_active_reg);                              \
02525                                                                         \
02526     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
02527     PUSH_FAILURE_ITEM (highest_active_reg);                             \
02528                                                                         \
02529     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
02530     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
02531     PUSH_FAILURE_ITEM (pattern_place);                                  \
02532                                                                         \
02533     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
02534     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
02535                                  size2);                                \
02536     DEBUG_PRINT1 ("'\n");                                               \
02537     PUSH_FAILURE_ITEM (string_place);                                   \
02538                                                                         \
02539     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
02540     DEBUG_PUSH (failure_id);                                            \
02541   } while (0)
02542 
02543 /* This is the number of items that are pushed and popped on the stack
02544    for each register.  */
02545 #define NUM_REG_ITEMS  3
02546 
02547 /* Individual items aside from the registers.  */
02548 #ifdef DEBUG
02549 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
02550 #else
02551 #define NUM_NONREG_ITEMS 4
02552 #endif
02553 
02554 /* We push at most this many items on the stack.  */
02555 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
02556 
02557 /* We actually push this many items.  */
02558 #define NUM_FAILURE_ITEMS                                               \
02559   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
02560     + NUM_NONREG_ITEMS)
02561 
02562 /* How many items can still be added to the stack without overflowing it.  */
02563 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
02564 
02565 
02566 /* Pops what PUSH_FAIL_STACK pushes.
02567 
02568    We restore into the parameters, all of which should be lvalues:
02569      STR -- the saved data position.
02570      PAT -- the saved pattern position.
02571      LOW_REG, HIGH_REG -- the highest and lowest active registers.
02572      REGSTART, REGEND -- arrays of string positions.
02573      REG_INFO -- array of information about each subexpression.
02574 
02575    Also assumes the variables `fail_stack' and (if debugging), `bufp',
02576    `pend', `string1', `size1', `string2', and `size2'.  */
02577 
02578 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
02579 {                                                                       \
02580   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
02581   int this_reg;                                                         \
02582   const unsigned char *string_temp;                                     \
02583                                                                         \
02584   assert (!FAIL_STACK_EMPTY ());                                        \
02585                                                                         \
02586   /* Remove failure points and point to how many regs pushed.  */       \
02587   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
02588   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
02589   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
02590                                                                         \
02591   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
02592                                                                         \
02593   DEBUG_POP (&failure_id);                                              \
02594   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
02595                                                                         \
02596   /* If the saved string location is NULL, it came from an              \
02597      on_failure_keep_string_jump opcode, and we want to throw away the  \
02598      saved NULL, thus retaining our current position in the string.  */ \
02599   string_temp = POP_FAILURE_ITEM ();                                    \
02600   if (string_temp != NULL)                                              \
02601     str = (const char *) string_temp;                                   \
02602                                                                         \
02603   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
02604   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
02605   DEBUG_PRINT1 ("'\n");                                                 \
02606                                                                         \
02607   pat = (unsigned char *) POP_FAILURE_ITEM ();                          \
02608   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
02609   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
02610                                                                         \
02611   /* Restore register info.  */                                         \
02612   high_reg = (unsigned) POP_FAILURE_ITEM ();                            \
02613   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
02614                                                                         \
02615   low_reg = (unsigned) POP_FAILURE_ITEM ();                             \
02616   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
02617                                                                         \
02618   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
02619     {                                                                   \
02620       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                 \
02621                                                                         \
02622       reg_info[this_reg].word = POP_FAILURE_ITEM ();                    \
02623       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);          \
02624                                                                         \
02625       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();            \
02626       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);             \
02627                                                                         \
02628       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();          \
02629       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);         \
02630     }                                                                   \
02631                                                                         \
02632   DEBUG_STATEMENT (nfailure_points_popped++);                           \
02633 } /* POP_FAILURE_POINT */
02634 
02635 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
02636    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
02637    characters can start a string that matches the pattern.  This fastmap
02638    is used by re_search to skip quickly over impossible starting points.
02639 
02640    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
02641    area as BUFP->fastmap.
02642 
02643    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
02644    the pattern buffer.
02645 
02646    Returns 0 if we succeed, -2 if an internal error.   */
02647 
02648 int
02649 re_compile_fastmap (bufp)
02650      struct re_pattern_buffer *bufp;
02651 {
02652   int j, k;
02653   fail_stack_type fail_stack;
02654 #ifndef REGEX_MALLOC
02655   char *destination;
02656 #endif
02657   /* We don't push any register information onto the failure stack.  */
02658   unsigned num_regs = 0;
02659 
02660   register char *fastmap = bufp->fastmap;
02661   unsigned char *pattern = bufp->buffer;
02662   unsigned long size = bufp->used;
02663   const unsigned char *p = pattern;
02664   register unsigned char *pend = pattern + size;
02665 
02666   /* Assume that each path through the pattern can be null until
02667      proven otherwise.  We set this false at the bottom of switch
02668      statement, to which we get only if a particular path doesn't
02669      match the empty string.  */
02670   boolean path_can_be_null = true;
02671 
02672   /* We aren't doing a `succeed_n' to begin with.  */
02673   boolean succeed_n_p = false;
02674 
02675   assert (fastmap != NULL && p != NULL);
02676 
02677   INIT_FAIL_STACK ();
02678   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
02679   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
02680   bufp->can_be_null = 0;
02681 
02682   while (p != pend || !FAIL_STACK_EMPTY ())
02683     {
02684       if (p == pend)
02685         {
02686           bufp->can_be_null |= path_can_be_null;
02687 
02688           /* Reset for next path.  */
02689           path_can_be_null = true;
02690 
02691           p = fail_stack.stack[--fail_stack.avail];
02692         }
02693 
02694       /* We should never be about to go beyond the end of the pattern.  */
02695       assert (p < pend);
02696 
02697 #ifdef SWITCH_ENUM_BUG
02698       switch ((int) ((re_opcode_t) *p++))
02699 #else
02700       switch ((re_opcode_t) *p++)
02701 #endif
02702         {
02703 
02704         /* I guess the idea here is to simply not bother with a fastmap
02705            if a backreference is used, since it's too hard to figure out
02706            the fastmap for the corresponding group.  Setting
02707            `can_be_null' stops `re_search_2' from using the fastmap, so
02708            that is all we do.  */
02709         case duplicate:
02710           bufp->can_be_null = 1;
02711           return 0;
02712 
02713 
02714       /* Following are the cases which match a character.  These end
02715          with `break'.  */
02716 
02717         case exactn:
02718           fastmap[p[1]] = 1;
02719           break;
02720 
02721 
02722         case charset:
02723           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02724             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
02725               fastmap[j] = 1;
02726           break;
02727 
02728 
02729         case charset_not:
02730           /* Chars beyond end of map must be allowed.  */
02731           for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
02732             fastmap[j] = 1;
02733 
02734           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
02735             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
02736               fastmap[j] = 1;
02737           break;
02738 
02739 
02740         case wordchar:
02741           for (j = 0; j < (1 << BYTEWIDTH); j++)
02742             if (SYNTAX (j) == Sword)
02743               fastmap[j] = 1;
02744           break;
02745 
02746 
02747         case notwordchar:
02748           for (j = 0; j < (1 << BYTEWIDTH); j++)
02749             if (SYNTAX (j) != Sword)
02750               fastmap[j] = 1;
02751           break;
02752 
02753 
02754         case anychar:
02755           /* `.' matches anything ...  */
02756           for (j = 0; j < (1 << BYTEWIDTH); j++)
02757             fastmap[j] = 1;
02758 
02759           /* ... except perhaps newline.  */
02760           if (!(bufp->syntax & RE_DOT_NEWLINE))
02761             fastmap['\n'] = 0;
02762 
02763           /* Return if we have already set `can_be_null'; if we have,
02764              then the fastmap is irrelevant.  Something's wrong here.  */
02765           else if (bufp->can_be_null)
02766             return 0;
02767 
02768           /* Otherwise, have to check alternative paths.  */
02769           break;
02770 
02771 
02772 #ifdef emacs
02773         case syntaxspec:
02774           k = *p++;
02775           for (j = 0; j < (1 << BYTEWIDTH); j++)
02776             if (SYNTAX (j) == (enum syntaxcode) k)
02777               fastmap[j] = 1;
02778           break;
02779 
02780 
02781         case notsyntaxspec:
02782           k = *p++;
02783           for (j = 0; j < (1 << BYTEWIDTH); j++)
02784             if (SYNTAX (j) != (enum syntaxcode) k)
02785               fastmap[j] = 1;
02786           break;
02787 
02788 
02789       /* All cases after this match the empty string.  These end with
02790          `continue'.  */
02791 
02792 
02793         case before_dot:
02794         case at_dot:
02795         case after_dot:
02796           continue;
02797 #endif /* not emacs */
02798 
02799 
02800         case no_op:
02801         case begline:
02802         case endline:
02803         case begbuf:
02804         case endbuf:
02805         case wordbound:
02806         case notwordbound:
02807         case wordbeg:
02808         case wordend:
02809         case push_dummy_failure:
02810           continue;
02811 
02812 
02813         case jump_n:
02814         case pop_failure_jump:
02815         case maybe_pop_jump:
02816         case jump:
02817         case jump_past_alt:
02818         case dummy_failure_jump:
02819           EXTRACT_NUMBER_AND_INCR (j, p);
02820           p += j;
02821           if (j > 0)
02822             continue;
02823 
02824           /* Jump backward implies we just went through the body of a
02825              loop and matched nothing.  Opcode jumped to should be
02826              `on_failure_jump' or `succeed_n'.  Just treat it like an
02827              ordinary jump.  For a * loop, it has pushed its failure
02828              point already; if so, discard that as redundant.  */
02829           if ((re_opcode_t) *p != on_failure_jump
02830               && (re_opcode_t) *p != succeed_n)
02831             continue;
02832 
02833           p++;
02834           EXTRACT_NUMBER_AND_INCR (j, p);
02835           p += j;
02836 
02837           /* If what's on the stack is where we are now, pop it.  */
02838           if (!FAIL_STACK_EMPTY ()
02839               && fail_stack.stack[fail_stack.avail - 1] == p)
02840             fail_stack.avail--;
02841 
02842           continue;
02843 
02844 
02845         case on_failure_jump:
02846         case on_failure_keep_string_jump:
02847         handle_on_failure_jump:
02848           EXTRACT_NUMBER_AND_INCR (j, p);
02849 
02850           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
02851              end of the pattern.  We don't want to push such a point,
02852              since when we restore it above, entering the switch will
02853              increment `p' past the end of the pattern.  We don't need
02854              to push such a point since we obviously won't find any more
02855              fastmap entries beyond `pend'.  Such a pattern can match
02856              the null string, though.  */
02857           if (p + j < pend)
02858             {
02859               if (!PUSH_PATTERN_OP (p + j, fail_stack))
02860                 return -2;
02861             }
02862           else
02863             bufp->can_be_null = 1;
02864 
02865           if (succeed_n_p)
02866             {
02867               EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
02868               succeed_n_p = false;
02869             }
02870 
02871           continue;
02872 
02873 
02874         case succeed_n:
02875           /* Get to the number of times to succeed.  */
02876           p += 2;
02877 
02878           /* Increment p past the n for when k != 0.  */
02879           EXTRACT_NUMBER_AND_INCR (k, p);
02880           if (k == 0)
02881             {
02882               p -= 4;
02883               succeed_n_p = true;  /* Spaghetti code alert.  */
02884               goto handle_on_failure_jump;
02885             }
02886           continue;
02887 
02888 
02889         case set_number_at:
02890           p += 4;
02891           continue;
02892 
02893 
02894         case start_memory:
02895         case stop_memory:
02896           p += 2;
02897           continue;
02898 
02899 
02900         default:
02901           abort (); /* We have listed all the cases.  */
02902         } /* switch *p++ */
02903 
02904       /* Getting here means we have found the possible starting
02905          characters for one path of the pattern -- and that the empty
02906          string does not match.  We need not follow this path further.
02907          Instead, look at the next alternative (remembered on the
02908          stack), or quit if no more.  The test at the top of the loop
02909          does these things.  */
02910       path_can_be_null = false;
02911       p = pend;
02912     } /* while p */
02913 
02914   /* Set `can_be_null' for the last path (also the first path, if the
02915      pattern is empty).  */
02916   bufp->can_be_null |= path_can_be_null;
02917   return 0;
02918 } /* re_compile_fastmap */
02919 
02920 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
02921    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
02922    this memory for recording register information.  STARTS and ENDS
02923    must be allocated using the malloc library routine, and must each
02924    be at least NUM_REGS * sizeof (regoff_t) bytes long.
02925 
02926    If NUM_REGS == 0, then subsequent matches should allocate their own
02927    register data.
02928 
02929    Unless this function is called, the first search or match using
02930    PATTERN_BUFFER will allocate its own register data, without
02931    freeing the old data.  */
02932 
02933 void
02934 re_set_registers (bufp, regs, num_regs, starts, ends)
02935     struct re_pattern_buffer *bufp;
02936     struct re_registers *regs;
02937     unsigned num_regs;
02938     regoff_t *starts, *ends;
02939 {
02940   if (num_regs)
02941     {
02942       bufp->regs_allocated = REGS_REALLOCATE;
02943       regs->num_regs = num_regs;
02944       regs->start = starts;
02945       regs->end = ends;
02946     }
02947   else
02948     {
02949       bufp->regs_allocated = REGS_UNALLOCATED;
02950       regs->num_regs = 0;
02951       regs->start = regs->end = (regoff_t) 0;
02952     }
02953 }
02954 
02955 /* Searching routines.  */
02956 
02957 /* Like re_search_2, below, but only one string is specified, and
02958    doesn't let you say where to stop matching. */
02959 
02960 int
02961 re_search (bufp, string, size, startpos, range, regs)
02962      struct re_pattern_buffer *bufp;
02963      const char *string;
02964      int size, startpos, range;
02965      struct re_registers *regs;
02966 {
02967   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
02968                       regs, size);
02969 }
02970 
02971 
02972 /* Using the compiled pattern in BUFP->buffer, first tries to match the
02973    virtual concatenation of STRING1 and STRING2, starting first at index
02974    STARTPOS, then at STARTPOS + 1, and so on.
02975 
02976    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
02977 
02978    RANGE is how far to scan while trying to match.  RANGE = 0 means try
02979    only at STARTPOS; in general, the last start tried is STARTPOS +
02980    RANGE.
02981 
02982    In REGS, return the indices of the virtual concatenation of STRING1
02983    and STRING2 that matched the entire BUFP->buffer and its contained
02984    subexpressions.
02985 
02986    Do not consider matching one past the index STOP in the virtual
02987    concatenation of STRING1 and STRING2.
02988 
02989    We return either the position in the strings at which the match was
02990    found, -1 if no match, or -2 if error (such as failure
02991    stack overflow).  */
02992 
02993 int
02994 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
02995      struct re_pattern_buffer *bufp;
02996      const char *string1, *string2;
02997      int size1, size2;
02998      int startpos;
02999      int range;
03000      struct re_registers *regs;
03001      int stop;
03002 {
03003   int val;
03004   register char *fastmap = bufp->fastmap;
03005   register char *translate = bufp->translate;
03006   int total_size = size1 + size2;
03007   int endpos = startpos + range;
03008 
03009   /* Check for out-of-range STARTPOS.  */
03010   if (startpos < 0 || startpos > total_size)
03011     return -1;
03012 
03013   /* Fix up RANGE if it might eventually take us outside
03014      the virtual concatenation of STRING1 and STRING2.  */
03015   if (endpos < -1)
03016     range = -1 - startpos;
03017   else if (endpos > total_size)
03018     range = total_size - startpos;
03019 
03020   /* If the search isn't to be a backwards one, don't waste time in a
03021      search for a pattern that must be anchored.  */
03022   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03023     {
03024       if (startpos > 0)
03025         return -1;
03026       else
03027         range = 1;
03028     }
03029 
03030   /* Update the fastmap now if not correct already.  */
03031   if (fastmap && !bufp->fastmap_accurate)
03032     if (re_compile_fastmap (bufp) == -2)
03033       return -2;
03034 
03035   /* Loop through the string, looking for a place to start matching.  */
03036   for (;;)
03037     {
03038       /* If a fastmap is supplied, skip quickly over characters that
03039          cannot be the start of a match.  If the pattern can match the
03040          null string, however, we don't need to skip characters; we want
03041          the first null string.  */
03042       if (fastmap && startpos < total_size && !bufp->can_be_null)
03043         {
03044           if (range > 0)        /* Searching forwards.  */
03045             {
03046               register const char *d;
03047               register int lim = 0;
03048               int irange = range;
03049 
03050               if (startpos < size1 && startpos + range >= size1)
03051                 lim = range - (size1 - startpos);
03052 
03053               d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03054 
03055               /* Written out as an if-else to avoid testing `translate'
03056                  inside the loop.  */
03057               if (translate)
03058                 while (range > lim
03059                        && !fastmap[(unsigned char)
03060                                    translate[(unsigned char) *d++]])
03061                   range--;
03062               else
03063                 while (range > lim && !fastmap[(unsigned char) *d++])
03064                   range--;
03065 
03066               startpos += irange - range;
03067             }
03068           else                          /* Searching backwards.  */
03069             {
03070               register char c = (size1 == 0 || startpos >= size1
03071                                  ? string2[startpos - size1]
03072                                  : string1[startpos]);
03073 
03074               if (!fastmap[(unsigned char) TRANSLATE (c)])
03075                 goto advance;
03076             }
03077         }
03078 
03079       /* If can't match the null string, and that's all we have left, fail.  */
03080       if (range >= 0 && startpos == total_size && fastmap
03081           && !bufp->can_be_null)
03082         return -1;
03083 
03084       val = re_match_2 (bufp, string1, size1, string2, size2,
03085                         startpos, regs, stop);
03086       if (val >= 0)
03087         return startpos;
03088 
03089       if (val == -2)
03090         return -2;
03091 
03092     advance:
03093       if (!range)
03094         break;
03095       else if (range > 0)
03096         {
03097           range--;
03098           startpos++;
03099         }
03100       else
03101         {
03102           range++;
03103           startpos--;
03104         }
03105     }
03106   return -1;
03107 } /* re_search_2 */
03108 
03109 /* Declarations and macros for re_match_2.  */
03110 
03111 static int
03112 bcmp_translate (
03113 #ifdef __STDC__
03114      unsigned char *s1,
03115      unsigned char *s2,
03116      register int len,
03117      char *translate
03118 #endif
03119      );
03120 
03121 /* Structure for per-register (a.k.a. per-group) information.
03122    This must not be longer than one word, because we push this value
03123    onto the failure stack.  Other register information, such as the
03124    starting and ending positions (which are addresses), and the list of
03125    inner groups (which is a bits list) are maintained in separate
03126    variables.
03127 
03128    We are making a (strictly speaking) nonportable assumption here: that
03129    the compiler will pack our bit fields into something that fits into
03130    the type of `word', i.e., is something that fits into one item on the
03131    failure stack.  */
03132 typedef union
03133 {
03134   fail_stack_elt_t word;
03135   struct
03136   {
03137       /* This field is one if this group can match the empty string,
03138          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
03139 #define MATCH_NULL_UNSET_VALUE 3
03140     unsigned match_null_string_p : 2;
03141     unsigned is_active : 1;
03142     unsigned matched_something : 1;
03143     unsigned ever_matched_something : 1;
03144   } bits;
03145 } register_info_type;
03146 static boolean common_op_match_null_string_p (
03147 #ifdef __STDC__
03148     unsigned char **p,
03149     unsigned char *end,
03150     register_info_type *reg_info
03151 #endif
03152     );
03153 static boolean alt_match_null_string_p (
03154 #ifdef __STDC__
03155     unsigned char *p,
03156     unsigned char *end,
03157     register_info_type *reg_info
03158 #endif
03159     );
03160 
03161 static boolean
03162 group_match_null_string_p (
03163 #ifdef __STDC__
03164     unsigned char **p,
03165     unsigned char *end,
03166     register_info_type *reg_info
03167 #endif
03168     );
03169 
03170 
03171 
03172 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
03173 #define IS_ACTIVE(R)  ((R).bits.is_active)
03174 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
03175 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
03176 
03177 
03178 /* Call this when have matched a real character; it sets `matched' flags
03179    for the subexpressions which we are currently inside.  Also records
03180    that those subexprs have matched.  */
03181 #define SET_REGS_MATCHED()                                              \
03182   do                                                                    \
03183     {                                                                   \
03184       unsigned r;                                                       \
03185       for (r = lowest_active_reg; r <= highest_active_reg; r++)         \
03186         {                                                               \
03187           MATCHED_SOMETHING (reg_info[r])                               \
03188             = EVER_MATCHED_SOMETHING (reg_info[r])                      \
03189             = 1;                                                        \
03190         }                                                               \
03191     }                                                                   \
03192   while (0)
03193 
03194 
03195 /* This converts PTR, a pointer into one of the search strings `string1'
03196    and `string2' into an offset from the beginning of that string.  */
03197 #define POINTER_TO_OFFSET(ptr)                                          \
03198   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
03199 
03200 /* Registers are set to a sentinel when they haven't yet matched.  */
03201 #define REG_UNSET_VALUE ((char *) -1)
03202 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
03203 
03204 
03205 /* Macros for dealing with the split strings in re_match_2.  */
03206 
03207 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
03208 
03209 /* Call before fetching a character with *d.  This switches over to
03210    string2 if necessary.  */
03211 #define PREFETCH()                                                      \
03212   while (d == dend)                                                     \
03213     {                                                                   \
03214       /* End of string2 => fail.  */                                    \
03215       if (dend == end_match_2)                                          \
03216         goto fail;                                                      \
03217       /* End of string1 => advance to string2.  */                      \
03218       d = string2;                                                      \
03219       dend = end_match_2;                                               \
03220     }
03221 
03222 
03223 /* Test if at very beginning or at very end of the virtual concatenation
03224    of `string1' and `string2'.  If only one string, it's `string2'.  */
03225 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03226 #define AT_STRINGS_END(d) ((d) == end2)
03227 
03228 
03229 /* Test if D points to a character which is word-constituent.  We have
03230    two special cases to check for: if past the end of string1, look at
03231    the first character in string2; and if before the beginning of
03232    string2, look at the last character in string1.  */
03233 #define WORDCHAR_P(d)                                                   \
03234   (SYNTAX ((d) == end1 ? *string2                                       \
03235            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
03236    == Sword)
03237 
03238 /* Test if the character before D and the one at D differ with respect
03239    to being word-constituent.  */
03240 #define AT_WORD_BOUNDARY(d)                                             \
03241   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
03242    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03243 
03244 
03245 /* Free everything we malloc.  */
03246 #ifdef REGEX_MALLOC
03247 #define FREE_VAR(var) if (var) free (var); var = NULL
03248 #define FREE_VARIABLES()                                                \
03249   do {                                                                  \
03250     FREE_VAR (fail_stack.stack);                                        \
03251     FREE_VAR (regstart);                                                \
03252     FREE_VAR (regend);                                                  \
03253     FREE_VAR (old_regstart);                                            \
03254     FREE_VAR (old_regend);                                              \
03255     FREE_VAR (best_regstart);                                           \
03256     FREE_VAR (best_regend);                                             \
03257     FREE_VAR (reg_info);                                                \
03258     FREE_VAR (reg_dummy);                                               \
03259     FREE_VAR (reg_info_dummy);                                          \
03260   } while (0)
03261 #else /* not REGEX_MALLOC */
03262 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
03263 #define FREE_VARIABLES() alloca (0)
03264 #endif /* not REGEX_MALLOC */
03265 
03266 
03267 /* These values must meet several constraints.  They must not be valid
03268    register values; since we have a limit of 255 registers (because
03269    we use only one byte in the pattern for the register number), we can
03270    use numbers larger than 255.  They must differ by 1, because of
03271    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
03272    be larger than the value for the highest register, so we do not try
03273    to actually save any registers when none are active.  */
03274 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03275 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03276 
03277 /* Matching routines.  */
03278 
03279 #ifndef emacs   /* Emacs never uses this.  */
03280 /* re_match is like re_match_2 except it takes only a single string.  */
03281 
03282 int
03283 re_match (bufp, string, size, pos, regs)
03284      struct re_pattern_buffer *bufp;
03285      const char *string;
03286      int size, pos;
03287      struct re_registers *regs;
03288  {
03289   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
03290 }
03291 #endif /* not emacs */
03292 
03293 
03294 /* re_match_2 matches the compiled pattern in BUFP against the
03295    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
03296    and SIZE2, respectively).  We start matching at POS, and stop
03297    matching at STOP.
03298 
03299    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
03300    store offsets for the substring each group matched in REGS.  See the
03301    documentation for exactly how many groups we fill.
03302 
03303    We return -1 if no match, -2 if an internal error (such as the
03304    failure stack overflowing).  Otherwise, we return the length of the
03305    matched substring.  */
03306 
03307 int
03308 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
03309      struct re_pattern_buffer *bufp;
03310      const char *string1, *string2;
03311      int size1, size2;
03312      int pos;
03313      struct re_registers *regs;
03314      int stop;
03315 {
03316   /* General temporaries.  */
03317   int mcnt;
03318   unsigned char *p1;
03319 
03320   /* Just past the end of the corresponding string.  */
03321   const char *end1, *end2;
03322 
03323   /* Pointers into string1 and string2, just past the last characters in
03324      each to consider matching.  */
03325   const char *end_match_1, *end_match_2;
03326 
03327   /* Where we are in the data, and the end of the current string.  */
03328   const char *d, *dend;
03329 
03330   /* Where we are in the pattern, and the end of the pattern.  */
03331   unsigned char *p = bufp->buffer;
03332   register unsigned char *pend = p + bufp->used;
03333 
03334   /* We use this to map every character in the string.  */
03335   char *translate = bufp->translate;
03336 
03337   /* Failure point stack.  Each place that can handle a failure further
03338      down the line pushes a failure point on this stack.  It consists of
03339      restart, regend, and reg_info for all registers corresponding to
03340      the subexpressions we're currently inside, plus the number of such
03341      registers, and, finally, two char *'s.  The first char * is where
03342      to resume scanning the pattern; the second one is where to resume
03343      scanning the strings.  If the latter is zero, the failure point is
03344      a ``dummy''; if a failure happens and the failure point is a dummy,
03345      it gets discarded and the next next one is tried.  */
03346   fail_stack_type fail_stack;
03347 #ifdef DEBUG
03348   static unsigned failure_id = 0;
03349   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03350 #endif
03351 
03352   /* We fill all the registers internally, independent of what we
03353      return, for use in backreferences.  The number here includes
03354      an element for register zero.  */
03355   unsigned num_regs = bufp->re_nsub + 1;
03356 
03357   /* The currently active registers.  */
03358   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03359   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03360 
03361   /* Information on the contents of registers. These are pointers into
03362      the input strings; they record just what was matched (on this
03363      attempt) by a subexpression part of the pattern, that is, the
03364      regnum-th regstart pointer points to where in the pattern we began
03365      matching and the regnum-th regend points to right after where we
03366      stopped matching the regnum-th subexpression.  (The zeroth register
03367      keeps track of what the whole pattern matches.)  */
03368   const char **regstart, **regend;
03369 
03370   /* If a group that's operated upon by a repetition operator fails to
03371      match anything, then the register for its start will need to be
03372      restored because it will have been set to wherever in the string we
03373      are when we last see its open-group operator.  Similarly for a
03374      register's end.  */
03375   const char **old_regstart, **old_regend;
03376 
03377   /* The is_active field of reg_info helps us keep track of which (possibly
03378      nested) subexpressions we are currently in. The matched_something
03379      field of reg_info[reg_num] helps us tell whether or not we have
03380      matched any of the pattern so far this time through the reg_num-th
03381      subexpression.  These two fields get reset each time through any
03382      loop their register is in.  */
03383   register_info_type *reg_info;
03384 
03385   /* The following record the register info as found in the above
03386      variables when we find a match better than any we've seen before.
03387      This happens as we backtrack through the failure points, which in
03388      turn happens only if we have not yet matched the entire string. */
03389   unsigned best_regs_set = false;
03390   const char **best_regstart, **best_regend;
03391 
03392   /* Logically, this is `best_regend[0]'.  But we don't want to have to
03393      allocate space for that if we're not allocating space for anything
03394      else (see below).  Also, we never need info about register 0 for
03395      any of the other register vectors, and it seems rather a kludge to
03396      treat `best_regend' differently than the rest.  So we keep track of
03397      the end of the best match so far in a separate variable.  We
03398      initialize this to NULL so that when we backtrack the first time
03399      and need to test it, it's not garbage.  */
03400   const char *match_end = NULL;
03401 
03402   /* Used when we pop values we don't care about.  */
03403   const char **reg_dummy;
03404   register_info_type *reg_info_dummy;
03405 
03406 #ifdef DEBUG
03407   /* Counts the total number of registers pushed.  */
03408   unsigned num_regs_pushed = 0;
03409 #endif
03410 
03411   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03412 
03413   INIT_FAIL_STACK ();
03414 
03415   /* Do not bother to initialize all the register variables if there are
03416      no groups in the pattern, as it takes a fair amount of time.  If
03417      there are groups, we include space for register 0 (the whole
03418      pattern), even though we never use it, since it simplifies the
03419      array indexing.  We should fix this.  */
03420   if (bufp->re_nsub)
03421     {
03422       regstart = REGEX_TALLOC (num_regs, const char *);
03423       regend = REGEX_TALLOC (num_regs, const char *);
03424       old_regstart = REGEX_TALLOC (num_regs, const char *);
03425       old_regend = REGEX_TALLOC (num_regs, const char *);
03426       best_regstart = REGEX_TALLOC (num_regs, const char *);
03427       best_regend = REGEX_TALLOC (num_regs, const char *);
03428       reg_info = REGEX_TALLOC (num_regs, register_info_type);
03429       reg_dummy = REGEX_TALLOC (num_regs, const char *);
03430       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03431 
03432       if (!(regstart && regend && old_regstart && old_regend && reg_info
03433             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03434         {
03435           FREE_VARIABLES ();
03436           return -2;
03437         }
03438     }
03439 #ifdef REGEX_MALLOC
03440   else
03441     {
03442       /* We must initialize all our variables to NULL, so that
03443          `FREE_VARIABLES' doesn't try to free them.  */
03444       regstart = regend = old_regstart = old_regend = best_regstart
03445         = best_regend = reg_dummy = NULL;
03446       reg_info = reg_info_dummy = (register_info_type *) NULL;
03447     }
03448 #endif /* REGEX_MALLOC */
03449 
03450   /* The starting position is bogus.  */
03451   if (pos < 0 || pos > size1 + size2)
03452     {
03453       FREE_VARIABLES ();
03454       return -1;
03455     }
03456 
03457   /* Initialize subexpression text positions to -1 to mark ones that no
03458      start_memory/stop_memory has been seen for. Also initialize the
03459      register information struct.  */
03460   for (mcnt = 1; mcnt < num_regs; mcnt++)
03461     {
03462       regstart[mcnt] = regend[mcnt]
03463         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03464 
03465       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03466       IS_ACTIVE (reg_info[mcnt]) = 0;
03467       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03468       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03469     }
03470 
03471   /* We move `string1' into `string2' if the latter's empty -- but not if
03472      `string1' is null.  */
03473   if (size2 == 0 && string1 != NULL)
03474     {
03475       string2 = string1;
03476       size2 = size1;
03477       string1 = 0;
03478       size1 = 0;
03479     }
03480   end1 = string1 + size1;
03481   end2 = string2 + size2;
03482 
03483   /* Compute where to stop matching, within the two strings.  */
03484   if (stop <= size1)
03485     {
03486       end_match_1 = string1 + stop;
03487       end_match_2 = string2;
03488     }
03489   else
03490     {
03491       end_match_1 = end1;
03492       end_match_2 = string2 + stop - size1;
03493     }
03494 
03495   /* `p' scans through the pattern as `d' scans through the data.
03496      `dend' is the end of the input string that `d' points within.  `d'
03497      is advanced into the following input string whenever necessary, but
03498      this happens before fetching; therefore, at the beginning of the
03499      loop, `d' can be pointing at the end of a string, but it cannot
03500      equal `string2'.  */
03501   if (size1 > 0 && pos <= size1)
03502     {
03503       d = string1 + pos;
03504       dend = end_match_1;
03505     }
03506   else
03507     {
03508       d = string2 + pos - size1;
03509       dend = end_match_2;
03510     }
03511 
03512   DEBUG_PRINT1 ("The compiled pattern is: ");
03513   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03514   DEBUG_PRINT1 ("The string to match is: `");
03515   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03516   DEBUG_PRINT1 ("'\n");
03517 
03518   /* This loops over pattern commands.  It exits by returning from the
03519      function if the match is complete, or it drops through if the match
03520      fails at this starting point in the input data.  */
03521   for (;;)
03522     {
03523       DEBUG_PRINT2 ("\n0x%x: ", p);
03524 
03525       if (p == pend)
03526         { /* End of pattern means we might have succeeded.  */
03527           DEBUG_PRINT1 ("end of pattern ... ");
03528 
03529           /* If we haven't matched the entire string, and we want the
03530              longest match, try backtracking.  */
03531           if (d != end_match_2)
03532             {
03533               DEBUG_PRINT1 ("backtracking.\n");
03534 
03535               if (!FAIL_STACK_EMPTY ())
03536                 { /* More failure points to try.  */
03537                   boolean same_str_p = (FIRST_STRING_P (match_end)
03538                                         == MATCHING_IN_FIRST_STRING);
03539 
03540                   /* If exceeds best match so far, save it.  */
03541                   if (!best_regs_set
03542                       || (same_str_p && d > match_end)
03543                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
03544                     {
03545                       best_regs_set = true;
03546                       match_end = d;
03547 
03548                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
03549 
03550                       for (mcnt = 1; mcnt < num_regs; mcnt++)
03551                         {
03552                           best_regstart[mcnt] = regstart[mcnt];
03553                           best_regend[mcnt] = regend[mcnt];
03554                         }
03555                     }
03556                   goto fail;
03557                 }
03558 
03559               /* If no failure points, don't restore garbage.  */
03560               else if (best_regs_set)
03561                 {
03562                 restore_best_regs:
03563                   /* Restore best match.  It may happen that `dend ==
03564                      end_match_1' while the restored d is in string2.
03565                      For example, the pattern `x.*y.*z' against the
03566                      strings `x-' and `y-z-', if the two strings are
03567                      not consecutive in memory.  */
03568                   DEBUG_PRINT1 ("Restoring best registers.\n");
03569 
03570                   d = match_end;
03571                   dend = ((d >= string1 && d <= end1)
03572                            ? end_match_1 : end_match_2);
03573 
03574                   for (mcnt = 1; mcnt < num_regs; mcnt++)
03575                     {
03576                       regstart[mcnt] = best_regstart[mcnt];
03577                       regend[mcnt] = best_regend[mcnt];
03578                     }
03579                 }
03580             } /* d != end_match_2 */
03581 
03582           DEBUG_PRINT1 ("Accepting match.\n");
03583 
03584           /* If caller wants register contents data back, do it.  */
03585           if (regs && !bufp->no_sub)
03586             {
03587               /* Have the register data arrays been allocated?  */
03588               if (bufp->regs_allocated == REGS_UNALLOCATED)
03589                 { /* No.  So allocate them with malloc.  We need one
03590                      extra element beyond `num_regs' for the `-1' marker
03591                      GNU code uses.  */
03592                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
03593                   regs->start = TALLOC (regs->num_regs, regoff_t);
03594                   regs->end = TALLOC (regs->num_regs, regoff_t);
03595                   if (regs->start == NULL || regs->end == NULL)
03596                     return -2;
03597                   bufp->regs_allocated = REGS_REALLOCATE;
03598                 }
03599               else if (bufp->regs_allocated == REGS_REALLOCATE)
03600                 { /* Yes.  If we need more elements than were already
03601                      allocated, reallocate them.  If we need fewer, just
03602                      leave it alone.  */
03603                   if (regs->num_regs < num_regs + 1)
03604                     {
03605                       regs->num_regs = num_regs + 1;
03606                       RETALLOC (regs->start, regs->num_regs, regoff_t);
03607                       RETALLOC (regs->end, regs->num_regs, regoff_t);
03608                       if (regs->start == NULL || regs->end == NULL)
03609                         return -2;
03610                     }
03611                 }
03612               else
03613                 {
03614                   /* These braces fend off a "empty body in an else-statement"
03615                      warning under GCC when assert expands to nothing.  */
03616                   assert (bufp->regs_allocated == REGS_FIXED);
03617                 }
03618 
03619               /* Convert the pointer data in `regstart' and `regend' to
03620                  indices.  Register zero has to be set differently,
03621                  since we haven't kept track of any info for it.  */
03622               if (regs->num_regs > 0)
03623                 {
03624                   regs->start[0] = pos;
03625                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
03626                                   : d - string2 + size1);
03627                 }
03628 
03629               /* Go through the first `min (num_regs, regs->num_regs)'
03630                  registers, since that is all we initialized.  */
03631               for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
03632                 {
03633                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
03634                     regs->start[mcnt] = regs->end[mcnt] = -1;
03635                   else
03636                     {
03637                       regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
03638                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
03639                     }
03640                 }
03641 
03642               /* If the regs structure we return has more elements than
03643                  were in the pattern, set the extra elements to -1.  If
03644                  we (re)allocated the registers, this is the case,
03645                  because we always allocate enough to have at least one
03646                  -1 at the end.  */
03647               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
03648                 regs->start[mcnt] = regs->end[mcnt] = -1;
03649             } /* regs && !bufp->no_sub */
03650 
03651           FREE_VARIABLES ();
03652           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
03653                         nfailure_points_pushed, nfailure_points_popped,
03654                         nfailure_points_pushed - nfailure_points_popped);
03655           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
03656 
03657           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
03658                             ? string1
03659                             : string2 - size1);
03660 
03661           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
03662 
03663           return mcnt;
03664         }
03665 
03666       /* Otherwise match next pattern command.  */
03667 #ifdef SWITCH_ENUM_BUG
03668       switch ((int) ((re_opcode_t) *p++))
03669 #else
03670       switch ((re_opcode_t) *p++)
03671 #endif
03672         {
03673         /* Ignore these.  Used to ignore the n of succeed_n's which
03674            currently have n == 0.  */
03675         case no_op:
03676           DEBUG_PRINT1 ("EXECUTING no_op.\n");
03677           break;
03678 
03679 
03680         /* Match the next n pattern characters exactly.  The following
03681            byte in the pattern defines n, and the n bytes after that
03682            are the characters to match.  */
03683         case exactn:
03684           mcnt = *p++;
03685           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
03686 
03687           /* This is written out as an if-else so we don't waste time
03688              testing `translate' inside the loop.  */
03689           if (translate)
03690             {
03691               do
03692                 {
03693                   PREFETCH ();
03694                   if (translate[(unsigned char) *d++] != (char) *p++)
03695                     goto fail;
03696                 }
03697               while (--mcnt);
03698             }
03699           else
03700             {
03701               do
03702                 {
03703                   PREFETCH ();
03704                   if (*d++ != (char) *p++) goto fail;
03705                 }
03706               while (--mcnt);
03707             }
03708           SET_REGS_MATCHED ();
03709           break;
03710 
03711 
03712         /* Match any character except possibly a newline or a null.  */
03713         case anychar:
03714           DEBUG_PRINT1 ("EXECUTING anychar.\n");
03715 
03716           PREFETCH ();
03717 
03718           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
03719               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
03720             goto fail;
03721 
03722           SET_REGS_MATCHED ();
03723           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
03724           d++;
03725           break;
03726 
03727 
03728         case charset:
03729         case charset_not:
03730           {
03731             register unsigned char c;
03732             boolean not = (re_opcode_t) *(p - 1) == charset_not;
03733 
03734             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
03735 
03736             PREFETCH ();
03737             c = TRANSLATE (*d); /* The character to match.  */
03738 
03739             /* Cast to `unsigned' instead of `unsigned char' in case the
03740                bit list is a full 32 bytes long.  */
03741             if (c < (unsigned) (*p * BYTEWIDTH)
03742                 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
03743               not = !not;
03744 
03745             p += 1 + *p;
03746 
03747             if (!not) goto fail;
03748 
03749             SET_REGS_MATCHED ();
03750             d++;
03751             break;
03752           }
03753 
03754 
03755         /* The beginning of a group is represented by start_memory.
03756            The arguments are the register number in the next byte, and the
03757            number of groups inner to this one in the next.  The text
03758            matched within the group is recorded (in the internal
03759            registers data structure) under the register number.  */
03760         case start_memory:
03761           DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
03762 
03763           /* Find out if this group can match the empty string.  */
03764           p1 = p;               /* To send to group_match_null_string_p.  */
03765 
03766           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
03767             REG_MATCH_NULL_STRING_P (reg_info[*p])
03768               = group_match_null_string_p (&p1, pend, reg_info);
03769 
03770           /* Save the position in the string where we were the last time
03771              we were at this open-group operator in case the group is
03772              operated upon by a repetition operator, e.g., with `(a*)*b'
03773              against `ab'; then we want to ignore where we are now in
03774              the string in case this attempt to match fails.  */
03775           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
03776                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
03777                              : regstart[*p];
03778           DEBUG_PRINT2 ("  old_regstart: %d\n",
03779                          POINTER_TO_OFFSET (old_regstart[*p]));
03780 
03781           regstart[*p] = d;
03782           DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
03783 
03784           IS_ACTIVE (reg_info[*p]) = 1;
03785           MATCHED_SOMETHING (reg_info[*p]) = 0;
03786 
03787           /* This is the new highest active register.  */
03788           highest_active_reg = *p;
03789 
03790           /* If nothing was active before, this is the new lowest active
03791              register.  */
03792           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
03793             lowest_active_reg = *p;
03794 
03795           /* Move past the register number and inner group count.  */
03796           p += 2;
03797           break;
03798 
03799 
03800         /* The stop_memory opcode represents the end of a group.  Its
03801            arguments are the same as start_memory's: the register
03802            number, and the number of inner groups.  */
03803         case stop_memory:
03804           DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
03805 
03806           /* We need to save the string position the last time we were at
03807              this close-group operator in case the group is operated
03808              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
03809              against `aba'; then we want to ignore where we are now in
03810              the string in case this attempt to match fails.  */
03811           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
03812                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
03813                            : regend[*p];
03814           DEBUG_PRINT2 ("      old_regend: %d\n",
03815                          POINTER_TO_OFFSET (old_regend[*p]));
03816 
03817           regend[*p] = d;
03818           DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
03819 
03820           /* This register isn't active anymore.  */
03821           IS_ACTIVE (reg_info[*p]) = 0;
03822 
03823           /* If this was the only register active, nothing is active
03824              anymore.  */
03825           if (lowest_active_reg == highest_active_reg)
03826             {
03827               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03828               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03829             }
03830           else
03831             { /* We must scan for the new highest active register, since
03832                  it isn't necessarily one less than now: consider
03833                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
03834                  new highest active register is 1.  */
03835               unsigned char r = *p - 1;
03836               while (r > 0 && !IS_ACTIVE (reg_info[r]))
03837                 r--;
03838 
03839               /* If we end up at register zero, that means that we saved
03840                  the registers as the result of an `on_failure_jump', not
03841                  a `start_memory', and we jumped to past the innermost
03842                  `stop_memory'.  For example, in ((.)*) we save
03843                  registers 1 and 2 as a result of the *, but when we pop
03844                  back to the second ), we are at the stop_memory 1.
03845                  Thus, nothing is active.  */
03846               if (r == 0)
03847                 {
03848                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03849                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03850                 }
03851               else
03852                 highest_active_reg = r;
03853             }
03854 
03855           /* If just failed to match something this time around with a
03856              group that's operated on by a repetition operator, try to
03857              force exit from the ``loop'', and restore the register
03858              information for this group that we had before trying this
03859              last match.  */
03860           if ((!MATCHED_SOMETHING (reg_info[*p])
03861                || (re_opcode_t) p[-3] == start_memory)
03862               && (p + 2) < pend)
03863             {
03864               boolean is_a_jump_n = false;
03865 
03866               p1 = p + 2;
03867               mcnt = 0;
03868               switch ((re_opcode_t) *p1++)
03869                 {
03870                   case jump_n:
03871                     is_a_jump_n = true;
03872                   case pop_failure_jump:
03873                   case maybe_pop_jump:
03874                   case jump:
03875                   case dummy_failure_jump:
03876                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
03877                     if (is_a_jump_n)
03878                       p1 += 2;
03879                     break;
03880 
03881                   default:
03882                     /* do nothing */ ;
03883                 }
03884               p1 += mcnt;
03885 
03886               /* If the next operation is a jump backwards in the pattern
03887                  to an on_failure_jump right before the start_memory
03888                  corresponding to this stop_memory, exit from the loop
03889                  by forcing a failure after pushing on the stack the
03890                  on_failure_jump's jump in the pattern, and d.  */
03891               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
03892                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
03893                 {
03894                   /* If this group ever matched anything, then restore
03895                      what its registers were before trying this last
03896                      failed match, e.g., with `(a*)*b' against `ab' for
03897                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
03898                      against `aba' for regend[3].
03899 
03900                      Also restore the registers for inner groups for,
03901                      e.g., `((a*)(b*))*' against `aba' (register 3 would
03902                      otherwise get trashed).  */
03903 
03904                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
03905                     {
03906                       unsigned r;
03907 
03908                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
03909 
03910                       /* Restore this and inner groups' (if any) registers.  */
03911                       for (r = *p; r < *p + *(p + 1); r++)
03912                         {
03913                           regstart[r] = old_regstart[r];
03914 
03915                           /* xx why this test?  */
03916                           if ((int) old_regend[r] >= (int) regstart[r])
03917                             regend[r] = old_regend[r];
03918                         }
03919                     }
03920                   p1++;
03921                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
03922                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
03923 
03924                   goto fail;
03925                 }
03926             }
03927 
03928           /* Move past the register number and the inner group count.  */
03929           p += 2;
03930           break;
03931 
03932 
03933         /* <digit> has been turned into a `duplicate' command which is
03934            followed by the numeric value of <digit> as the register number.  */
03935         case duplicate:
03936           {
03937             register const char *d2, *dend2;
03938             int regno = *p++;   /* Get which register to match against.  */
03939             DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
03940 
03941             /* Can't back reference a group which we've never matched.  */
03942             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
03943               goto fail;
03944 
03945             /* Where in input to try to start matching.  */
03946             d2 = regstart[regno];
03947 
03948             /* Where to stop matching; if both the place to start and
03949                the place to stop matching are in the same string, then
03950                set to the place to stop, otherwise, for now have to use
03951                the end of the first string.  */
03952 
03953             dend2 = ((FIRST_STRING_P (regstart[regno])
03954                       == FIRST_STRING_P (regend[regno]))
03955                      ? regend[regno] : end_match_1);
03956             for (;;)
03957               {
03958                 /* If necessary, advance to next segment in register
03959                    contents.  */
03960                 while (d2 == dend2)
03961                   {
03962                     if (dend2 == end_match_2) break;
03963                     if (dend2 == regend[regno]) break;
03964 
03965                     /* End of string1 => advance to string2. */
03966                     d2 = string2;
03967                     dend2 = regend[regno];
03968                   }
03969                 /* At end of register contents => success */
03970                 if (d2 == dend2) break;
03971 
03972                 /* If necessary, advance to next segment in data.  */
03973                 PREFETCH ();
03974 
03975                 /* How many characters left in this segment to match.  */
03976                 mcnt = dend - d;
03977 
03978                 /* Want how many consecutive characters we can match in
03979                    one shot, so, if necessary, adjust the count.  */
03980                 if (mcnt > dend2 - d2)
03981                   mcnt = dend2 - d2;
03982 
03983                 /* Compare that many; failure if mismatch, else move
03984                    past them.  */
03985                 if (translate
03986                     ? bcmp_translate ((unsigned char*)(d), (unsigned char *)(d2), mcnt, translate)
03987                     : bcmp (d, d2, mcnt))
03988                   goto fail;
03989                 d += mcnt, d2 += mcnt;
03990               }
03991           }
03992           break;
03993 
03994 
03995         /* begline matches the empty string at the beginning of the string
03996            (unless `not_bol' is set in `bufp'), and, if
03997            `newline_anchor' is set, after newlines.  */
03998         case begline:
03999           DEBUG_PRINT1 ("EXECUTING begline.\n");
04000 
04001           if (AT_STRINGS_BEG (d))
04002             {
04003               if (!bufp->not_bol) break;
04004             }
04005           else if (d[-1] == '\n' && bufp->newline_anchor)
04006             {
04007               break;
04008             }
04009           /* In all other cases, we fail.  */
04010           goto fail;
04011 
04012 
04013         /* endline is the dual of begline.  */
04014         case endline:
04015           DEBUG_PRINT1 ("EXECUTING endline.\n");
04016 
04017           if (AT_STRINGS_END (d))
04018             {
04019               if (!bufp->not_eol) break;
04020             }
04021 
04022           /* We have to ``prefetch'' the next character.  */
04023           else if ((d == end1 ? *string2 : *d) == '\n'
04024                    && bufp->newline_anchor)
04025             {
04026               break;
04027             }
04028           goto fail;
04029 
04030 
04031         /* Match at the very beginning of the data.  */
04032         case begbuf:
04033           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04034           if (AT_STRINGS_BEG (d))
04035             break;
04036           goto fail;
04037 
04038 
04039         /* Match at the very end of the data.  */
04040         case endbuf:
04041           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04042           if (AT_STRINGS_END (d))
04043             break;
04044           goto fail;
04045 
04046 
04047         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
04048            pushes NULL as the value for the string on the stack.  Then
04049            `pop_failure_point' will keep the current value for the
04050            string, instead of restoring it.  To see why, consider
04051            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
04052            then the . fails against the \n.  But the next thing we want
04053            to do is match the \n against the \n; if we restored the
04054            string value, we would be back at the foo.
04055 
04056            Because this is used only in specific cases, we don't need to
04057            check all the things that `on_failure_jump' does, to make
04058            sure the right things get saved on the stack.  Hence we don't
04059            share its code.  The only reason to push anything on the
04060            stack at all is that otherwise we would have to change
04061            `anychar's code to do something besides goto fail in this
04062            case; that seems worse than this.  */
04063         case on_failure_keep_string_jump:
04064           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04065 
04066           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04067           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04068 
04069           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04070           break;
04071 
04072 
04073         /* Uses of on_failure_jump:
04074 
04075            Each alternative starts with an on_failure_jump that points
04076            to the beginning of the next alternative.  Each alternative
04077            except the last ends with a jump that in effect jumps past
04078            the rest of the alternatives.  (They really jump to the
04079            ending jump of the following alternative, because tensioning
04080            these jumps is a hassle.)
04081 
04082            Repeats start with an on_failure_jump that points past both
04083            the repetition text and either the following jump or
04084            pop_failure_jump back to this on_failure_jump.  */
04085         case on_failure_jump:
04086         on_failure:
04087           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04088 
04089           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04090           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04091 
04092           /* If this on_failure_jump comes right before a group (i.e.,
04093              the original * applied to a group), save the information
04094              for that group and all inner ones, so that if we fail back
04095              to this point, the group's information will be correct.
04096              For example, in \(a*\)*\1, we need the preceding group,
04097              and in \(\(a*\)b*\)\2, we need the inner group.  */
04098 
04099           /* We can't use `p' to check ahead because we push
04100              a failure point to `p + mcnt' after we do this.  */
04101           p1 = p;
04102 
04103           /* We need to skip no_op's before we look for the
04104              start_memory in case this on_failure_jump is happening as
04105              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
04106              against aba.  */
04107           while (p1 < pend && (re_opcode_t) *p1 == no_op)
04108             p1++;
04109 
04110           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04111             {
04112               /* We have a new highest active register now.  This will
04113                  get reset at the start_memory we are about to get to,
04114                  but we will have saved all the registers relevant to
04115                  this repetition op, as described above.  */
04116               highest_active_reg = *(p1 + 1) + *(p1 + 2);
04117               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04118                 lowest_active_reg = *(p1 + 1);
04119             }
04120 
04121           DEBUG_PRINT1 (":\n");
04122           PUSH_FAILURE_POINT (p + mcnt, d, -2);
04123           break;
04124 
04125 
04126         /* A smart repeat ends with `maybe_pop_jump'.
04127            We change it to either `pop_failure_jump' or `jump'.  */
04128         case maybe_pop_jump:
04129           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04130           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04131           {
04132             register unsigned char *p2 = p;
04133 
04134             /* Compare the beginning of the repeat with what in the
04135                pattern follows its end. If we can establish that there
04136                is nothing that they would both match, i.e., that we
04137                would have to backtrack because of (as in, e.g., `a*a')
04138                then we can change to pop_failure_jump, because we'll
04139                never have to backtrack.
04140 
04141                This is not true in the case of alternatives: in
04142                `(a|ab)*' we do need to backtrack to the `ab' alternative
04143                (e.g., if the string was `ab').  But instead of trying to
04144                detect that here, the alternative has put on a dummy
04145                failure point which is what we will end up popping.  */
04146 
04147             /* Skip over open/close-group commands.  */
04148             while (p2 + 2 < pend
04149                    && ((re_opcode_t) *p2 == stop_memory
04150                        || (re_opcode_t) *p2 == start_memory))
04151               p2 += 3;                  /* Skip over args, too.  */
04152 
04153             /* If we're at the end of the pattern, we can change.  */
04154             if (p2 == pend)
04155               {
04156                 /* Consider what happens when matching ":\(.*\)"
04157                    against ":/".  I don't really understand this code
04158                    yet.  */
04159                 p[-3] = (unsigned char) pop_failure_jump;
04160                 DEBUG_PRINT1
04161                   ("  End of pattern: change to `pop_failure_jump'.\n");
04162               }
04163 
04164             else if ((re_opcode_t) *p2 == exactn
04165                      || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04166               {
04167                 register unsigned char c
04168                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
04169                 p1 = p + mcnt;
04170 
04171                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
04172                    to the `maybe_finalize_jump' of this case.  Examine what
04173                    follows.  */
04174                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04175                   {
04176                     p[-3] = (unsigned char) pop_failure_jump;
04177                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04178                                   c, p1[5]);
04179                   }
04180 
04181                 else if ((re_opcode_t) p1[3] == charset
04182                          || (re_opcode_t) p1[3] == charset_not)
04183                   {
04184                     int not = (re_opcode_t) p1[3] == charset_not;
04185 
04186                     if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04187                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04188                       not = !not;
04189 
04190                     /* `not' is equal to 1 if c would match, which means
04191                         that we can't change to pop_failure_jump.  */
04192                     if (!not)
04193                       {
04194                         p[-3] = (unsigned char) pop_failure_jump;
04195                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04196                       }
04197                   }
04198               }
04199           }
04200           p -= 2;               /* Point at relative address again.  */
04201           if ((re_opcode_t) p[-1] != pop_failure_jump)
04202             {
04203               p[-1] = (unsigned char) jump;
04204               DEBUG_PRINT1 ("  Match => jump.\n");
04205               goto unconditional_jump;
04206             }
04207         /* Note fall through.  */
04208 
04209 
04210         /* The end of a simple repeat has a pop_failure_jump back to
04211            its matching on_failure_jump, where the latter will push a
04212            failure point.  The pop_failure_jump takes off failure
04213            points put on by this pop_failure_jump's matching
04214            on_failure_jump; we got through the pattern to here from the
04215            matching on_failure_jump, so didn't fail.  */
04216         case pop_failure_jump:
04217           {
04218             /* We need to pass separate storage for the lowest and
04219                highest registers, even though we don't care about the
04220                actual values.  Otherwise, we will restore only one
04221                register from the stack, since lowest will == highest in
04222                `pop_failure_point'.  */
04223             unsigned dummy_low_reg, dummy_high_reg;
04224             unsigned char *pdummy;
04225             const char *sdummy;
04226 
04227             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04228             POP_FAILURE_POINT (sdummy, pdummy,
04229                                dummy_low_reg, dummy_high_reg,
04230                                reg_dummy, reg_dummy, reg_info_dummy);
04231           }
04232           /* Note fall through.  */
04233 
04234 
04235         /* Unconditionally jump (without popping any failure points).  */
04236         case jump:
04237         unconditional_jump:
04238           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
04239           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04240           p += mcnt;                            /* Do the jump.  */
04241           DEBUG_PRINT2 ("(to 0x%x).\n", p);
04242           break;
04243 
04244 
04245         /* We need this opcode so we can detect where alternatives end
04246            in `group_match_null_string_p' et al.  */
04247         case jump_past_alt:
04248           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04249           goto unconditional_jump;
04250 
04251 
04252         /* Normally, the on_failure_jump pushes a failure point, which
04253            then gets popped at pop_failure_jump.  We will end up at
04254            pop_failure_jump, also, and with a pattern of, say, `a+', we
04255            are skipping over the on_failure_jump, so we have to push
04256            something meaningless for pop_failure_jump to pop.  */
04257         case dummy_failure_jump:
04258           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04259           /* It doesn't matter what we push for the string here.  What
04260              the code at `fail' tests is the value for the pattern.  */
04261           PUSH_FAILURE_POINT (0, 0, -2);
04262           goto unconditional_jump;
04263 
04264 
04265         /* At the end of an alternative, we need to push a dummy failure
04266            point in case we are followed by a `pop_failure_jump', because
04267            we don't want the failure point for the alternative to be
04268            popped.  For example, matching `(a|ab)*' against `aab'
04269            requires that we match the `ab' alternative.  */
04270         case push_dummy_failure:
04271           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04272           /* See comments just above at `dummy_failure_jump' about the
04273              two zeroes.  */
04274           PUSH_FAILURE_POINT (0, 0, -2);
04275           break;
04276 
04277         /* Have to succeed matching what follows at least n times.
04278            After that, handle like `on_failure_jump'.  */
04279         case succeed_n:
04280           EXTRACT_NUMBER (mcnt, p + 2);
04281           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04282 
04283           assert (mcnt >= 0);
04284           /* Originally, this is how many times we HAVE to succeed.  */
04285           if (mcnt > 0)
04286             {
04287                mcnt--;
04288                p += 2;
04289                STORE_NUMBER_AND_INCR (p, mcnt);
04290                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
04291             }
04292           else if (mcnt == 0)
04293             {
04294               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
04295               p[2] = (unsigned char) no_op;
04296               p[3] = (unsigned char) no_op;
04297               goto on_failure;
04298             }
04299           break;
04300 
04301         case jump_n:
04302           EXTRACT_NUMBER (mcnt, p + 2);
04303           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04304 
04305           /* Originally, this is how many times we CAN jump.  */
04306           if (mcnt)
04307             {
04308                mcnt--;
04309                STORE_NUMBER (p + 2, mcnt);
04310                goto unconditional_jump;
04311             }
04312           /* If don't have to jump any more, skip over the rest of command.  */
04313           else
04314             p += 4;
04315           break;
04316 
04317         case set_number_at:
04318           {
04319             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04320 
04321             EXTRACT_NUMBER_AND_INCR (mcnt, p);
04322             p1 = p + mcnt;
04323             EXTRACT_NUMBER_AND_INCR (mcnt, p);
04324             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
04325             STORE_NUMBER (p1, mcnt);
04326             break;
04327           }
04328 
04329         case wordbound:
04330           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04331           if (AT_WORD_BOUNDARY (d))
04332             break;
04333           goto fail;
04334 
04335         case notwordbound:
04336           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04337           if (AT_WORD_BOUNDARY (d))
04338             goto fail;
04339           break;
04340 
04341         case wordbeg:
04342           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04343           if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04344             break;
04345           goto fail;
04346 
04347         case wordend:
04348           DEBUG_PRINT1 ("EXECUTING wordend.\n");
04349           if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04350               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04351             break;
04352           goto fail;
04353 
04354 #ifdef emacs
04355 #ifdef emacs19
04356         case before_dot:
04357           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
04358           if (PTR_CHAR_POS ((unsigned char *) d) >= point)
04359             goto fail;
04360           break;
04361 
04362         case at_dot:
04363           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04364           if (PTR_CHAR_POS ((unsigned char *) d) != point)
04365             goto fail;
04366           break;
04367 
04368         case after_dot:
04369           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
04370           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
04371             goto fail;
04372           break;
04373 #else /* not emacs19 */
04374         case at_dot:
04375           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04376           if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
04377             goto fail;
04378           break;
04379 #endif /* not emacs19 */
04380 
04381         case syntaxspec:
04382           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
04383           mcnt = *p++;
04384           goto matchsyntax;
04385 
04386         case wordchar:
04387           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
04388           mcnt = (int) Sword;
04389         matchsyntax:
04390           PREFETCH ();
04391           if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
04392             goto fail;
04393           SET_REGS_MATCHED ();
04394           break;
04395 
04396         case notsyntaxspec:
04397           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
04398           mcnt = *p++;
04399           goto matchnotsyntax;
04400 
04401         case notwordchar:
04402           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
04403           mcnt = (int) Sword;
04404         matchnotsyntax:
04405           PREFETCH ();
04406           if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
04407             goto fail;
04408           SET_REGS_MATCHED ();
04409           break;
04410 
04411 #else /* not emacs */
04412         case wordchar:
04413           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
04414           PREFETCH ();
04415           if (!WORDCHAR_P (d))
04416             goto fail;
04417           SET_REGS_MATCHED ();
04418           d++;
04419           break;
04420 
04421         case notwordchar:
04422           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
04423           PREFETCH ();
04424           if (WORDCHAR_P (d))
04425             goto fail;
04426           SET_REGS_MATCHED ();
04427           d++;
04428           break;
04429 #endif /* not emacs */
04430 
04431         default:
04432           abort ();
04433         }
04434       continue;  /* Successfully executed one pattern command; keep going.  */
04435 
04436 
04437     /* We goto here if a matching operation fails. */
04438     fail:
04439       if (!FAIL_STACK_EMPTY ())
04440         { /* A restart point is known.  Restore to that state.  */
04441           DEBUG_PRINT1 ("\nFAIL:\n");
04442           POP_FAILURE_POINT (d, p,
04443                              lowest_active_reg, highest_active_reg,
04444                              regstart, regend, reg_info);
04445 
04446           /* If this failure point is a dummy, try the next one.  */
04447           if (!p)
04448             goto fail;
04449 
04450           /* If we failed to the end of the pattern, don't examine *p.  */
04451           assert (p <= pend);
04452           if (p < pend)
04453             {
04454               boolean is_a_jump_n = false;
04455 
04456               /* If failed to a backwards jump that's part of a repetition
04457                  loop, need to pop this failure point and use the next one.  */
04458               switch ((re_opcode_t) *p)
04459                 {
04460                 case jump_n:
04461                   is_a_jump_n = true;
04462                 case maybe_pop_jump:
04463                 case pop_failure_jump:
04464                 case jump:
04465                   p1 = p + 1;
04466                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04467                   p1 += mcnt;
04468 
04469                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
04470                       || (!is_a_jump_n
04471                           && (re_opcode_t) *p1 == on_failure_jump))
04472                     goto fail;
04473                   break;
04474                 default:
04475                   /* do nothing */ ;
04476                 }
04477             }
04478 
04479           if (d >= string1 && d <= end1)
04480             dend = end_match_1;
04481         }
04482       else
04483         break;   /* Matching at this starting point really fails.  */
04484     } /* for (;;) */
04485 
04486   if (best_regs_set)
04487     goto restore_best_regs;
04488 
04489   FREE_VARIABLES ();
04490 
04491   return -1;                            /* Failure to match.  */
04492 } /* re_match_2 */
04493 
04494 /* Subroutine definitions for re_match_2.  */
04495 
04496 
04497 /* We are passed P pointing to a register number after a start_memory.
04498 
04499    Return true if the pattern up to the corresponding stop_memory can
04500    match the empty string, and false otherwise.
04501 
04502    If we find the matching stop_memory, sets P to point to one past its number.
04503    Otherwise, sets P to an undefined byte less than or equal to END.
04504 
04505    We don't handle duplicates properly (yet).  */
04506 
04507 static boolean
04508 group_match_null_string_p (p, end, reg_info)
04509     unsigned char **p, *end;
04510     register_info_type *reg_info;
04511 {
04512   int mcnt;
04513   /* Point to after the args to the start_memory.  */
04514   unsigned char *p1 = *p + 2;
04515 
04516   while (p1 < end)
04517     {
04518       /* Skip over opcodes that can match nothing, and return true or
04519          false, as appropriate, when we get to one that can't, or to the
04520          matching stop_memory.  */
04521 
04522       switch ((re_opcode_t) *p1)
04523         {
04524         /* Could be either a loop or a series of alternatives.  */
04525         case on_failure_jump:
04526           p1++;
04527           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04528 
04529           /* If the next operation is not a jump backwards in the
04530              pattern.  */
04531 
04532           if (mcnt >= 0)
04533             {
04534               /* Go through the on_failure_jumps of the alternatives,
04535                  seeing if any of the alternatives cannot match nothing.
04536                  The last alternative starts with only a jump,
04537                  whereas the rest start with on_failure_jump and end
04538                  with a jump, e.g., here is the pattern for `a|b|c':
04539 
04540                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
04541                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
04542                  /exactn/1/c
04543 
04544                  So, we have to first go through the first (n-1)
04545                  alternatives and then deal with the last one separately.  */
04546 
04547 
04548               /* Deal with the first (n-1) alternatives, which start
04549                  with an on_failure_jump (see above) that jumps to right
04550                  past a jump_past_alt.  */
04551 
04552               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
04553                 {
04554                   /* `mcnt' holds how many bytes long the alternative
04555                      is, including the ending `jump_past_alt' and
04556                      its number.  */
04557 
04558                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
04559                                                       reg_info))
04560                     return false;
04561 
04562                   /* Move to right after this alternative, including the
04563                      jump_past_alt.  */
04564                   p1 += mcnt;
04565 
04566                   /* Break if it's the beginning of an n-th alternative
04567                      that doesn't begin with an on_failure_jump.  */
04568                   if ((re_opcode_t) *p1 != on_failure_jump)
04569                     break;
04570 
04571                   /* Still have to check that it's not an n-th
04572                      alternative that starts with an on_failure_jump.  */
04573                   p1++;
04574                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04575                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
04576                     {
04577                       /* Get to the beginning of the n-th alternative.  */
04578                       p1 -= 3;
04579                       break;
04580                     }
04581                 }
04582 
04583               /* Deal with the last alternative: go back and get number
04584                  of the `jump_past_alt' just before it.  `mcnt' contains
04585                  the length of the alternative.  */
04586               EXTRACT_NUMBER (mcnt, p1 - 2);
04587 
04588               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
04589                 return false;
04590 
04591               p1 += mcnt;       /* Get past the n-th alternative.  */
04592             } /* if mcnt > 0 */
04593           break;
04594 
04595 
04596         case stop_memory:
04597           assert (p1[1] == **p);
04598           *p = p1 + 2;
04599           return true;
04600 
04601 
04602         default:
04603           if (!common_op_match_null_string_p (&p1, end, reg_info))
04604             return false;
04605         }
04606     } /* while p1 < end */
04607 
04608   return false;
04609 } /* group_match_null_string_p */
04610 
04611 
04612 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
04613    It expects P to be the first byte of a single alternative and END one
04614    byte past the last. The alternative can contain groups.  */
04615 
04616 static boolean
04617 alt_match_null_string_p (p, end, reg_info)
04618     unsigned char *p, *end;
04619     register_info_type *reg_info;
04620 {
04621   int mcnt;
04622   unsigned char *p1 = p;
04623 
04624   while (p1 < end)
04625     {
04626       /* Skip over opcodes that can match nothing, and break when we get
04627          to one that can't.  */
04628 
04629       switch ((re_opcode_t) *p1)
04630         {
04631         /* It's a loop.  */
04632         case on_failure_jump:
04633           p1++;
04634           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04635           p1 += mcnt;
04636           break;
04637 
04638         default:
04639           if (!common_op_match_null_string_p (&p1, end, reg_info))
04640             return false;
04641         }
04642     }  /* while p1 < end */
04643 
04644   return true;
04645 } /* alt_match_null_string_p */
04646 
04647 
04648 /* Deals with the ops common to group_match_null_string_p and
04649    alt_match_null_string_p.
04650 
04651    Sets P to one after the op and its arguments, if any.  */
04652 
04653 static boolean
04654 common_op_match_null_string_p (p, end, reg_info)
04655     unsigned char **p, *end;
04656     register_info_type *reg_info;
04657 {
04658   int mcnt;
04659   boolean ret;
04660   int reg_no;
04661   unsigned char *p1 = *p;
04662 
04663   switch ((re_opcode_t) *p1++)
04664     {
04665     case no_op:
04666     case begline:
04667     case endline:
04668     case begbuf:
04669     case endbuf:
04670     case wordbeg:
04671     case wordend:
04672     case wordbound:
04673     case notwordbound:
04674 #ifdef emacs
04675     case before_dot:
04676     case at_dot:
04677     case after_dot:
04678 #endif
04679       break;
04680 
04681     case start_memory:
04682       reg_no = *p1;
04683       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
04684       ret = group_match_null_string_p (&p1, end, reg_info);
04685 
04686       /* Have to set this here in case we're checking a group which
04687          contains a group and a back reference to it.  */
04688 
04689       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
04690         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
04691 
04692       if (!ret)
04693         return false;
04694       break;
04695 
04696     /* If this is an optimized succeed_n for zero times, make the jump.  */
04697     case jump:
04698       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04699       if (mcnt >= 0)
04700         p1 += mcnt;
04701       else
04702         return false;
04703       break;
04704 
04705     case succeed_n:
04706       /* Get to the number of times to succeed.  */
04707       p1 += 2;
04708       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04709 
04710       if (mcnt == 0)
04711         {
04712           p1 -= 4;
04713           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04714           p1 += mcnt;
04715         }
04716       else
04717         return false;
04718       break;
04719 
04720     case duplicate:
04721       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
04722         return false;
04723       break;
04724 
04725     case set_number_at:
04726       p1 += 4;
04727 
04728     default:
04729       /* All other opcodes mean we cannot match the empty string.  */
04730       return false;
04731   }
04732 
04733   *p = p1;
04734   return true;
04735 } /* common_op_match_null_string_p */
04736 
04737 
04738 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
04739    bytes; nonzero otherwise.  */
04740 
04741 static int
04742 bcmp_translate (s1, s2, len, translate)
04743      unsigned char *s1, *s2;
04744      register int len;
04745      char *translate;
04746 {
04747   register unsigned char *p1 = s1, *p2 = s2;
04748   while (len)
04749     {
04750       if (translate[*p1++] != translate[*p2++]) return 1;
04751       len--;
04752     }
04753   return 0;
04754 }
04755 
04756 /* Entry points for GNU code.  */
04757 
04758 /* re_compile_pattern is the GNU regular expression compiler: it
04759    compiles PATTERN (of length SIZE) and puts the result in BUFP.
04760    Returns 0 if the pattern was valid, otherwise an error string.
04761 
04762    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
04763    are set in BUFP on entry.
04764 
04765    We call regex_compile to do the actual compilation.  */
04766 
04767 const char *
04768 re_compile_pattern (pattern, length, bufp)
04769      const char *pattern;
04770      int length;
04771      struct re_pattern_buffer *bufp;
04772 {
04773   reg_errcode_t ret;
04774 
04775   /* GNU code is written to assume at least RE_NREGS registers will be set
04776      (and at least one extra will be -1).  */
04777   bufp->regs_allocated = REGS_UNALLOCATED;
04778 
04779   /* And GNU code determines whether or not to get register information
04780      by passing null for the REGS argument to re_match, etc., not by
04781      setting no_sub.  */
04782   bufp->no_sub = 0;
04783 
04784   /* Match anchors at newline.  */
04785   bufp->newline_anchor = 1;
04786 
04787   ret = regex_compile (pattern, length, re_syntax_options, bufp);
04788 
04789   return re_error_msg[(int) ret];
04790 }
04791 
04792 /* Entry points compatible with 4.2 BSD regex library.  We don't define
04793    them if this is an Emacs or POSIX compilation.  */
04794 
04795 #if !defined (emacs) && !defined (_POSIX_SOURCE)
04796 
04797 /* BSD has one and only one pattern buffer.  */
04798 static struct re_pattern_buffer re_comp_buf;
04799 
04800 char *
04801 re_comp (s)
04802     const char *s;
04803 {
04804   reg_errcode_t ret;
04805 
04806   if (!s)
04807     {
04808       if (!re_comp_buf.buffer)
04809         return "No previous regular expression";
04810       return 0;
04811     }
04812 
04813   if (!re_comp_buf.buffer)
04814     {
04815       re_comp_buf.buffer = (unsigned char *) malloc (200);
04816       if (re_comp_buf.buffer == NULL)
04817         return "Memory exhausted";
04818       re_comp_buf.allocated = 200;
04819 
04820       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
04821       if (re_comp_buf.fastmap == NULL)
04822         return "Memory exhausted";
04823     }
04824 
04825   /* Since `re_exec' always passes NULL for the `regs' argument, we
04826      don't need to initialize the pattern buffer fields which affect it.  */
04827 
04828   /* Match anchors at newlines.  */
04829   re_comp_buf.newline_anchor = 1;
04830 
04831   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
04832 
04833   /* Yes, we're discarding `const' here.  */
04834   return (char *) re_error_msg[(int) ret];
04835 }
04836 
04837 
04838 int
04839 re_exec (s)
04840     const char *s;
04841 {
04842   const int len = strlen (s);
04843   return
04844     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
04845 }
04846 #endif /* not emacs and not _POSIX_SOURCE */
04847 
04848 /* POSIX.2 functions.  Don't define these for Emacs.  */
04849 
04850 #ifndef emacs
04851 
04852 /* regcomp takes a regular expression as a string and compiles it.
04853 
04854    PREG is a regex_t *.  We do not expect any fields to be initialized,
04855    since POSIX says we shouldn't.  Thus, we set
04856 
04857      `buffer' to the compiled pattern;
04858      `used' to the length of the compiled pattern;
04859      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
04860        REG_EXTENDED bit in CFLAGS is set; otherwise, to
04861        RE_SYNTAX_POSIX_BASIC;
04862      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
04863      `fastmap' and `fastmap_accurate' to zero;
04864      `re_nsub' to the number of subexpressions in PATTERN.
04865 
04866    PATTERN is the address of the pattern string.
04867 
04868    CFLAGS is a series of bits which affect compilation.
04869 
04870      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
04871      use POSIX basic syntax.
04872 
04873      If REG_NEWLINE is set, then . and [^...] don't match newline.
04874      Also, regexec will try a match beginning after every newline.
04875 
04876      If REG_ICASE is set, then we considers upper- and lowercase
04877      versions of letters to be equivalent when matching.
04878 
04879      If REG_NOSUB is set, then when PREG is passed to regexec, that
04880      routine will report only success or failure, and nothing about the
04881      registers.
04882 
04883    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
04884    the return codes and their meanings.)  */
04885 
04886 int
04887 regcomp (preg, pattern, cflags)
04888     regex_t *preg;
04889     const char *pattern;
04890     int cflags;
04891 {
04892   reg_errcode_t ret;
04893   unsigned syntax
04894     = (cflags & REG_EXTENDED) ?
04895       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
04896 
04897   /* regex_compile will allocate the space for the compiled pattern.  */
04898   preg->buffer = 0;
04899   preg->allocated = 0;
04900 
04901   /* Don't bother to use a fastmap when searching.  This simplifies the
04902      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
04903      characters after newlines into the fastmap.  This way, we just try
04904      every character.  */
04905   preg->fastmap = 0;
04906 
04907   if (cflags & REG_ICASE)
04908     {
04909       unsigned i;
04910 
04911       preg->translate = (char *) malloc (CHAR_SET_SIZE);
04912       if (preg->translate == NULL)
04913         return (int) REG_ESPACE;
04914 
04915       /* Map uppercase characters to corresponding lowercase ones.  */
04916       for (i = 0; i < CHAR_SET_SIZE; i++)
04917         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
04918     }
04919   else
04920     preg->translate = NULL;
04921 
04922   /* If REG_NEWLINE is set, newlines are treated differently.  */
04923   if (cflags & REG_NEWLINE)
04924     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
04925       syntax &= ~RE_DOT_NEWLINE;
04926       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
04927       /* It also changes the matching behavior.  */
04928       preg->newline_anchor = 1;
04929     }
04930   else
04931     preg->newline_anchor = 0;
04932 
04933   preg->no_sub = !!(cflags & REG_NOSUB);
04934 
04935   /* POSIX says a null character in the pattern terminates it, so we
04936      can use strlen here in compiling the pattern.  */
04937   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
04938 
04939   /* POSIX doesn't distinguish between an unmatched open-group and an
04940      unmatched close-group: both are REG_EPAREN.  */
04941   if (ret == REG_ERPAREN) ret = REG_EPAREN;
04942 
04943   return (int) ret;
04944 }
04945 
04946 
04947 /* regexec searches for a given pattern, specified by PREG, in the
04948    string STRING.
04949 
04950    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
04951    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
04952    least NMATCH elements, and we set them to the offsets of the
04953    corresponding matched substrings.
04954 
04955    EFLAGS specifies `execution flags' which affect matching: if
04956    REG_NOTBOL is set, then ^ does not match at the beginning of the
04957    string; if REG_NOTEOL is set, then $ does not match at the end.
04958 
04959    We return 0 if we find a match and REG_NOMATCH if not.  */
04960 
04961 int
04962 regexec (preg, string, nmatch, pmatch, eflags)
04963     const regex_t *preg;
04964     const char *string;
04965     size_t nmatch;
04966     regmatch_t pmatch[];
04967     int eflags;
04968 {
04969   int ret;
04970   struct re_registers regs;
04971   regex_t private_preg;
04972   int len = strlen (string);
04973   boolean want_reg_info = !preg->no_sub && nmatch > 0;
04974 
04975   private_preg = *preg;
04976 
04977   private_preg.not_bol = !!(eflags & REG_NOTBOL);
04978   private_preg.not_eol = !!(eflags & REG_NOTEOL);
04979 
04980   /* The user has told us exactly how many registers to return
04981      information about, via `nmatch'.  We have to pass that on to the
04982      matching routines.  */
04983   private_preg.regs_allocated = REGS_FIXED;
04984 
04985   if (want_reg_info)
04986     {
04987       regs.num_regs = nmatch;
04988       regs.start = TALLOC (nmatch, regoff_t);
04989       regs.end = TALLOC (nmatch, regoff_t);
04990       if (regs.start == NULL || regs.end == NULL)
04991         return (int) REG_NOMATCH;
04992     }
04993 
04994   /* Perform the searching operation.  */
04995   ret = re_search (&private_preg, string, len,
04996                    /* start: */ 0, /* range: */ len,
04997                    want_reg_info ? &regs : (struct re_registers *) 0);
04998 
04999   /* Copy the register information to the POSIX structure.  */
05000   if (want_reg_info)
05001     {
05002       if (ret >= 0)
05003         {
05004           unsigned r;
05005 
05006           for (r = 0; r < nmatch; r++)
05007             {
05008               pmatch[r].rm_so = regs.start[r];
05009               pmatch[r].rm_eo = regs.end[r];
05010             }
05011         }
05012 
05013       /* If we needed the temporary register info, free the space now.  */
05014       free (regs.start);
05015       free (regs.end);
05016     }
05017 
05018   /* We want zero return to mean success, unlike `re_search'.  */
05019   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05020 }
05021 
05022 int
05023 regexec1 (preg, string, stringLength, nmatch, pmatch, eflags)
05024     const regex_t *preg;
05025     const char *string;
05026     int   stringLength;
05027     size_t nmatch;
05028     regmatch_t pmatch[];
05029     int eflags;
05030 {
05031   int ret;
05032   struct re_registers regs;
05033   regex_t private_preg;
05034   int len = stringLength;
05035   boolean want_reg_info = !preg->no_sub && nmatch > 0;
05036 
05037   private_preg = *preg;
05038 
05039   private_preg.not_bol = !!(eflags & REG_NOTBOL);
05040   private_preg.not_eol = !!(eflags & REG_NOTEOL);
05041 
05042   /* The user has told us exactly how many registers to return
05043      information about, via `nmatch'.  We have to pass that on to the
05044      matching routines.  */
05045   private_preg.regs_allocated = REGS_FIXED;
05046 
05047   if (want_reg_info)
05048     {
05049       regs.num_regs = nmatch;
05050       regs.start = TALLOC (nmatch, regoff_t);
05051       regs.end = TALLOC (nmatch, regoff_t);
05052       if (regs.start == NULL || regs.end == NULL)
05053         return (int) REG_NOMATCH;
05054     }
05055 
05056   /* Perform the searching operation.  */
05057   ret = re_search (&private_preg, string, len,
05058                    /* start: */ 0, /* range: */ len,
05059                    want_reg_info ? &regs : (struct re_registers *) 0);
05060 
05061   /* Copy the register information to the POSIX structure.  */
05062   if (want_reg_info)
05063     {
05064       if (ret >= 0)
05065         {
05066           unsigned r;
05067 
05068           for (r = 0; r < nmatch; r++)
05069             {
05070               pmatch[r].rm_so = regs.start[r];
05071               pmatch[r].rm_eo = regs.end[r];
05072             }
05073         }
05074 
05075       /* If we needed the temporary register info, free the space now.  */
05076       free (regs.start);
05077       free (regs.end);
05078     }
05079 
05080   /* We want zero return to mean success, unlike `re_search'.  */
05081   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05082 }
05083 
05084 
05085 /* Returns a message corresponding to an error code, ERRCODE, returned
05086    from either regcomp or regexec.   We don't use PREG here.  */
05087 
05088 size_t
05089 regerror (errcode, preg, errbuf, errbuf_size)
05090     int errcode;
05091     const regex_t *preg;
05092     char *errbuf;
05093     size_t errbuf_size;
05094 {
05095   const char *msg;
05096   size_t msg_size;
05097 
05098   if (errcode < 0
05099       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
05100     /* Only error codes returned by the rest of the code should be passed
05101        to this routine.  If we are given anything else, or if other regex
05102        code generates an invalid error code, then the program has a bug.
05103        Dump core so we can fix it.  */
05104     abort ();
05105 
05106   msg = re_error_msg[errcode];
05107 
05108   /* POSIX doesn't require that we do anything in this case, but why
05109      not be nice.  */
05110   if (! msg)
05111     msg = "Success";
05112 
05113   msg_size = strlen (msg) + 1; /* Includes the null.  */
05114 
05115   if (errbuf_size != 0)
05116     {
05117       if (msg_size > errbuf_size)
05118         {
05119           strncpy (errbuf, msg, errbuf_size - 1);
05120           errbuf[errbuf_size - 1] = 0;
05121         }
05122       else
05123         strcpy (errbuf, msg);
05124     }
05125 
05126   return msg_size;
05127 }
05128 
05129 
05130 /* Free dynamically allocated space used by PREG.  */
05131 
05132 void
05133 regfree (preg)
05134     regex_t *preg;
05135 {
05136   if (preg->buffer != NULL)
05137     free (preg->buffer);
05138   preg->buffer = NULL;
05139 
05140   preg->allocated = 0;
05141   preg->used = 0;
05142 
05143   if (preg->fastmap != NULL)
05144     free (preg->fastmap);
05145   preg->fastmap = NULL;
05146   preg->fastmap_accurate = 0;
05147 
05148   if (preg->translate != NULL)
05149     free (preg->translate);
05150   preg->translate = NULL;
05151 }
05152 
05153 #endif /* not emacs  */
05154 
05155 /*
05156 Local variables:
05157 make-backup-files: t
05158 version-control: t
05159 trim-versions-without-asking: nil
05160 End:
05161 */
Generated on Wed Feb 29 22:50:05 2012 for CXXUtilities by  doxygen 1.6.3