MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
engine.c
1 /*
2  * The matching engine and friends. This file is #included by regexec.c
3  * after suitable #defines of a variety of macros used herein, so that
4  * different state representations can be used without duplicating masses
5  * of code.
6  */
7 
8 #ifdef SNAMES
9 #define matcher smatcher
10 #define fast sfast
11 #define slow sslow
12 #define dissect sdissect
13 #define backref sbackref
14 #define step sstep
15 #define print sprint
16 #define at sat
17 #define match smat
18 #endif
19 #ifdef LNAMES
20 #define matcher lmatcher
21 #define fast lfast
22 #define slow lslow
23 #define dissect ldissect
24 #define backref lbackref
25 #define step lstep
26 #define print lprint
27 #define at lat
28 #define match lmat
29 #endif
30 
31 /* another structure passed up and down to avoid zillions of parameters */
32 struct match {
33  struct re_guts *g;
34  int eflags;
35  my_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
36  char *offp; /* offsets work from here */
37  char *beginp; /* start of string -- virtual NUL precedes */
38  char *endp; /* end of string -- virtual NUL here */
39  char *coldp; /* can be no match starting before here */
40  char **lastpos; /* [nplus+1] */
41  STATEVARS;
42  states st; /* current states */
43  states fresh; /* states for a fresh start */
44  states tmp; /* temporary */
45  states empty; /* empty set of states */
46 };
47 
48 #include "engine.ih"
49 
50 #ifdef REDEBUG
51 #define SP(t, s, c) print(m, t, s, c, stdout)
52 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
53 #define NOTE(str) { if (m->eflags&MY_REG_TRACE) printf("=%s\n", (str)); }
54 #else
55 #define SP(t, s, c) /* nothing */
56 #define AT(t, p1, p2, s1, s2) /* nothing */
57 #define NOTE(s) /* nothing */
58 #endif
59 
60 /*
61  - matcher - the actual matching engine
62  == static int matcher(register struct re_guts *g, char *string, \
63  == size_t nmatch, regmatch_t pmatch[], int eflags);
64  */
65 static int /* 0 success, MY_REG_NOMATCH failure */
66 matcher(charset,g, str, nmatch, pmatch, eflags)
67 const CHARSET_INFO *charset;
68 register struct re_guts *g;
69 char *str;
70 size_t nmatch;
71 my_regmatch_t pmatch[];
72 int eflags;
73 {
74  register char *endp;
75  register uint i;
76  struct match mv;
77  register struct match *m = &mv;
78  register char *dp;
79  register const sopno gf = g->firststate+1; /* +1 for OEND */
80  register const sopno gl = g->laststate;
81  char *start;
82  char *stop;
83 
84  /* simplify the situation where possible */
85  if (g->cflags&MY_REG_NOSUB)
86  nmatch = 0;
87  if (eflags&MY_REG_STARTEND) {
88  start = str + pmatch[0].rm_so;
89  stop = str + pmatch[0].rm_eo;
90  } else {
91  start = str;
92  stop = start + strlen(start);
93  }
94  if (stop < start)
95  return(MY_REG_INVARG);
96 
97  /* prescreening; this does wonders for this rather slow code */
98  if (g->must != NULL) {
99  for (dp = start; dp < stop; dp++)
100  if (*dp == g->must[0] && stop - dp >= g->mlen &&
101  memcmp(dp, g->must, (size_t)g->mlen) == 0)
102  break;
103  if (dp == stop) /* we didn't find g->must */
104  return(MY_REG_NOMATCH);
105  }
106 
107  /* match struct setup */
108  m->g = g;
109  m->eflags = eflags;
110  m->pmatch = NULL;
111  m->lastpos = NULL;
112  m->offp = str;
113  m->beginp = start;
114  m->endp = stop;
115  STATESETUP(m, 4);
116  SETUP(m->st);
117  SETUP(m->fresh);
118  SETUP(m->tmp);
119  SETUP(m->empty);
120  CLEAR(m->empty);
121 
122  /* this loop does only one repetition except for backrefs */
123  for (;;) {
124  endp = fast(charset, m, start, stop, gf, gl);
125  if (endp == NULL) { /* a miss */
126  if (m->pmatch != NULL)
127  free((char *)m->pmatch);
128  if (m->lastpos != NULL)
129  free((char *)m->lastpos);
130  STATETEARDOWN(m);
131  return(MY_REG_NOMATCH);
132  }
133  if (nmatch == 0 && !g->backrefs)
134  break; /* no further info needed */
135 
136  /* where? */
137  assert(m->coldp != NULL);
138  for (;;) {
139  NOTE("finding start");
140  endp = slow(charset, m, m->coldp, stop, gf, gl);
141  if (endp != NULL)
142  break;
143  assert(m->coldp < m->endp);
144  m->coldp++;
145  }
146  if (nmatch == 1 && !g->backrefs)
147  break; /* no further info needed */
148 
149  /* oh my, he wants the subexpressions... */
150  if (m->pmatch == NULL)
151  m->pmatch = (my_regmatch_t *)malloc((m->g->nsub + 1) *
152  sizeof(my_regmatch_t));
153  if (m->pmatch == NULL) {
154  if (m->lastpos != NULL)
155  free((char *)m->lastpos);
156  STATETEARDOWN(m);
157  return(MY_REG_ESPACE);
158  }
159  for (i = 1; i <= m->g->nsub; i++)
160  m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
161  if (!g->backrefs && !(m->eflags&MY_REG_BACKR)) {
162  NOTE("dissecting");
163  dp = dissect(charset, m, m->coldp, endp, gf, gl);
164  } else {
165  if (g->nplus > 0 && m->lastpos == NULL)
166  m->lastpos = (char **)malloc((g->nplus+1) *
167  sizeof(char *));
168  if (g->nplus > 0 && m->lastpos == NULL) {
169  free(m->pmatch);
170  STATETEARDOWN(m);
171  return(MY_REG_ESPACE);
172  }
173  NOTE("backref dissect");
174  dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
175  }
176  if (dp != NULL)
177  break;
178 
179  /* uh-oh... we couldn't find a subexpression-level match */
180  assert(g->backrefs); /* must be back references doing it */
181  assert(g->nplus == 0 || m->lastpos != NULL);
182  for (;;) {
183  if (dp != NULL || endp <= m->coldp)
184  break; /* defeat */
185  NOTE("backoff");
186  endp = slow(charset, m, m->coldp, endp-1, gf, gl);
187  if (endp == NULL)
188  break; /* defeat */
189  /* try it on a shorter possibility */
190 #ifndef NDEBUG
191  for (i = 1; i <= m->g->nsub; i++) {
192  assert(m->pmatch[i].rm_so == -1);
193  assert(m->pmatch[i].rm_eo == -1);
194  }
195 #endif
196  NOTE("backoff dissect");
197  dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
198  }
199  assert(dp == NULL || dp == endp);
200  if (dp != NULL) /* found a shorter one */
201  break;
202 
203  /* despite initial appearances, there is no match here */
204  NOTE("false alarm");
205  start = m->coldp + 1; /* recycle starting later */
206  assert(start <= stop);
207  }
208 
209  /* fill in the details if requested */
210  if (nmatch > 0) {
211  pmatch[0].rm_so = m->coldp - m->offp;
212  pmatch[0].rm_eo = endp - m->offp;
213  }
214  if (nmatch > 1) {
215  assert(m->pmatch != NULL);
216  for (i = 1; i < nmatch; i++)
217  if (i <= m->g->nsub)
218  pmatch[i] = m->pmatch[i];
219  else {
220  pmatch[i].rm_so = -1;
221  pmatch[i].rm_eo = -1;
222  }
223  }
224 
225  if (m->pmatch != NULL)
226  free((char *)m->pmatch);
227  if (m->lastpos != NULL)
228  free((char *)m->lastpos);
229  STATETEARDOWN(m);
230  return(0);
231 }
232 
233 /*
234  - dissect - figure out what matched what, no back references
235  == static char *dissect(register struct match *m, char *start, \
236  == char *stop, sopno startst, sopno stopst);
237  */
238 static char * /* == stop (success) always */
239 dissect(charset, m, start, stop, startst, stopst)
240 const CHARSET_INFO *charset;
241 register struct match *m;
242 char *start;
243 char *stop;
244 sopno startst;
245 sopno stopst;
246 {
247  register uint i;
248  register sopno ss; /* start sop of current subRE */
249  register sopno es; /* end sop of current subRE */
250  register char *sp; /* start of string matched by it */
251  register char *stp; /* string matched by it cannot pass here */
252  register char *rest; /* start of rest of string */
253  register char *tail; /* string unmatched by rest of RE */
254  register sopno ssub; /* start sop of subsubRE */
255  register sopno esub; /* end sop of subsubRE */
256  register char *ssp; /* start of string matched by subsubRE */
257  register char *sep; /* end of string matched by subsubRE */
258  register char *oldssp; /* previous ssp */
259 
260  AT("diss", start, stop, startst, stopst);
261  sp = start;
262  for (ss = startst; ss < stopst; ss = es) {
263  /* identify end of subRE */
264  es = ss;
265  switch (OP(m->g->strip[es])) {
266  case OPLUS_:
267  case OQUEST_:
268  es += OPND(m->g->strip[es]);
269  break;
270  case OCH_:
271  while (OP(m->g->strip[es]) != O_CH)
272  es += OPND(m->g->strip[es]);
273  break;
274  }
275  es++;
276 
277  /* figure out what it matched */
278  switch (OP(m->g->strip[ss])) {
279  case OEND:
280  assert(nope);
281  break;
282  case OCHAR:
283  sp++;
284  break;
285  case OBOL:
286  case OEOL:
287  case OBOW:
288  case OEOW:
289  break;
290  case OANY:
291  case OANYOF:
292  sp++;
293  break;
294  case OBACK_:
295  case O_BACK:
296  assert(nope);
297  break;
298  /* cases where length of match is hard to find */
299  case OQUEST_:
300  stp = stop;
301  for (;;) {
302  /* how long could this one be? */
303  rest = slow(charset, m, sp, stp, ss, es);
304  assert(rest != NULL); /* it did match */
305  /* could the rest match the rest? */
306  tail = slow(charset, m, rest, stop, es, stopst);
307  if (tail == stop)
308  break; /* yes! */
309  /* no -- try a shorter match for this one */
310  stp = rest - 1;
311  assert(stp >= sp); /* it did work */
312  }
313  ssub = ss + 1;
314  esub = es - 1;
315  /* did innards match? */
316  if (slow(charset, m, sp, rest, ssub, esub) != NULL)
317  sp = dissect(charset, m, sp, rest, ssub, esub);
318  assert(sp == rest);
319  sp = rest;
320  break;
321  case OPLUS_:
322  stp = stop;
323  for (;;) {
324  /* how long could this one be? */
325  rest = slow(charset, m, sp, stp, ss, es);
326  assert(rest != NULL); /* it did match */
327  /* could the rest match the rest? */
328  tail = slow(charset, m, rest, stop, es, stopst);
329  if (tail == stop)
330  break; /* yes! */
331  /* no -- try a shorter match for this one */
332  stp = rest - 1;
333  assert(stp >= sp); /* it did work */
334  }
335  ssub = ss + 1;
336  esub = es - 1;
337  ssp = sp;
338  oldssp = ssp;
339  for (;;) { /* find last match of innards */
340  sep = slow(charset, m, ssp, rest, ssub, esub);
341  if (sep == NULL || sep == ssp)
342  break; /* failed or matched null */
343  oldssp = ssp; /* on to next try */
344  ssp = sep;
345  }
346  if (sep == NULL) {
347  /* last successful match */
348  sep = ssp;
349  ssp = oldssp;
350  }
351  assert(sep == rest); /* must exhaust substring */
352  assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
353  sp = dissect(charset, m, ssp, sep, ssub, esub);
354  assert(sp == sep);
355  sp = rest;
356  break;
357  case OCH_:
358  stp = stop;
359  for (;;) {
360  /* how long could this one be? */
361  rest = slow(charset, m, sp, stp, ss, es);
362  assert(rest != NULL); /* it did match */
363  /* could the rest match the rest? */
364  tail = slow(charset, m, rest, stop, es, stopst);
365  if (tail == stop)
366  break; /* yes! */
367  /* no -- try a shorter match for this one */
368  stp = rest - 1;
369  assert(stp >= sp); /* it did work */
370  }
371  ssub = ss + 1;
372  esub = ss + OPND(m->g->strip[ss]) - 1;
373  assert(OP(m->g->strip[esub]) == OOR1);
374  for (;;) { /* find first matching branch */
375  if (slow(charset, m, sp, rest, ssub, esub) == rest)
376  break; /* it matched all of it */
377  /* that one missed, try next one */
378  assert(OP(m->g->strip[esub]) == OOR1);
379  esub++;
380  assert(OP(m->g->strip[esub]) == OOR2);
381  ssub = esub + 1;
382  esub += OPND(m->g->strip[esub]);
383  if (OP(m->g->strip[esub]) == OOR2)
384  esub--;
385  else
386  assert(OP(m->g->strip[esub]) == O_CH);
387  }
388  sp = dissect(charset, m, sp, rest, ssub, esub);
389  assert(sp == rest);
390  sp = rest;
391  break;
392  case O_PLUS:
393  case O_QUEST:
394  case OOR1:
395  case OOR2:
396  case O_CH:
397  assert(nope);
398  break;
399  case OLPAREN:
400  i = OPND(m->g->strip[ss]);
401  assert(0 < i && i <= m->g->nsub);
402  m->pmatch[i].rm_so = sp - m->offp;
403  break;
404  case ORPAREN:
405  i = OPND(m->g->strip[ss]);
406  assert(0 < i && i <= m->g->nsub);
407  m->pmatch[i].rm_eo = sp - m->offp;
408  break;
409  default: /* uh oh */
410  assert(nope);
411  break;
412  }
413  }
414 
415  assert(sp == stop);
416  return(sp);
417 }
418 
419 /*
420  - backref - figure out what matched what, figuring in back references
421  == static char *backref(register struct match *m, char *start, \
422  == char *stop, sopno startst, sopno stopst, sopno lev);
423  */
424 static char * /* == stop (success) or NULL (failure) */
425 backref(charset,m, start, stop, startst, stopst, lev)
426 const CHARSET_INFO *charset;
427 register struct match *m;
428 char *start;
429 char *stop;
430 sopno startst;
431 sopno stopst;
432 sopno lev; /* PLUS nesting level */
433 {
434  register uint i;
435  register sopno ss; /* start sop of current subRE */
436  register char *sp; /* start of string matched by it */
437  register sopno ssub; /* start sop of subsubRE */
438  register sopno esub; /* end sop of subsubRE */
439  register char *ssp; /* start of string matched by subsubRE */
440  register char *dp;
441  register size_t len;
442  register int hard;
443  register sop s;
444  register my_regoff_t offsave;
445  register cset *cs;
446 
447  AT("back", start, stop, startst, stopst);
448  sp = start;
449 
450  /* get as far as we can with easy stuff */
451  hard = 0;
452  for (ss = startst; !hard && ss < stopst; ss++)
453  switch (OP(s = m->g->strip[ss])) {
454  case OCHAR:
455  if (sp == stop || *sp++ != (char)OPND(s))
456  return(NULL);
457  break;
458  case OANY:
459  if (sp == stop)
460  return(NULL);
461  sp++;
462  break;
463  case OANYOF:
464  cs = &m->g->sets[OPND(s)];
465  if (sp == stop || !CHIN(cs, *sp++))
466  return(NULL);
467  break;
468  case OBOL:
469  if ( (sp == m->beginp && !(m->eflags&MY_REG_NOTBOL)) ||
470  (sp < m->endp && *(sp-1) == '\n' &&
471  (m->g->cflags&MY_REG_NEWLINE)) )
472  { /* yes */ }
473  else
474  return(NULL);
475  break;
476  case OEOL:
477  if ( (sp == m->endp && !(m->eflags&MY_REG_NOTEOL)) ||
478  (sp < m->endp && *sp == '\n' &&
479  (m->g->cflags&MY_REG_NEWLINE)) )
480  { /* yes */ }
481  else
482  return(NULL);
483  break;
484  case OBOW:
485  if (( (sp == m->beginp && !(m->eflags&MY_REG_NOTBOL)) ||
486  (sp < m->endp && *(sp-1) == '\n' &&
487  (m->g->cflags&MY_REG_NEWLINE)) ||
488  (sp > m->beginp &&
489  !ISWORD(charset,*(sp-1))) ) &&
490  (sp < m->endp && ISWORD(charset,*sp)) )
491  { /* yes */ }
492  else
493  return(NULL);
494  break;
495  case OEOW:
496  if (( (sp == m->endp && !(m->eflags&MY_REG_NOTEOL)) ||
497  (sp < m->endp && *sp == '\n' &&
498  (m->g->cflags&MY_REG_NEWLINE)) ||
499  (sp < m->endp && !ISWORD(charset,*sp)) ) &&
500  (sp > m->beginp && ISWORD(charset,*(sp-1))) )
501  { /* yes */ }
502  else
503  return(NULL);
504  break;
505  case O_QUEST:
506  break;
507  case OOR1: /* matches null but needs to skip */
508  ss++;
509  s = m->g->strip[ss];
510  do {
511  assert(OP(s) == OOR2);
512  ss += OPND(s);
513  } while (OP(s = m->g->strip[ss]) != O_CH);
514  /* note that the ss++ gets us past the O_CH */
515  break;
516  default: /* have to make a choice */
517  hard = 1;
518  break;
519  }
520  if (!hard) { /* that was it! */
521  if (sp != stop)
522  return(NULL);
523  return(sp);
524  }
525  ss--; /* adjust for the for's final increment */
526 
527  /* the hard stuff */
528  AT("hard", sp, stop, ss, stopst);
529  s = m->g->strip[ss];
530  switch (OP(s)) {
531  case OBACK_: /* the vilest depths */
532  i = OPND(s);
533  assert(0 < i && i <= m->g->nsub);
534  if (m->pmatch[i].rm_eo == -1)
535  return(NULL);
536  assert(m->pmatch[i].rm_so != -1);
537  len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
538  assert((size_t) (stop - m->beginp) >= len);
539  if (sp > stop - len)
540  return(NULL); /* not enough left to match */
541  ssp = m->offp + m->pmatch[i].rm_so;
542  if (memcmp(sp, ssp, len) != 0)
543  return(NULL);
544  while (m->g->strip[ss] != SOP(O_BACK, i))
545  ss++;
546  return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
547  break;
548  case OQUEST_: /* to null or not */
549  dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
550  if (dp != NULL)
551  return(dp); /* not */
552  return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
553  break;
554  case OPLUS_:
555  assert(m->lastpos != NULL);
556  assert(lev+1 <= m->g->nplus);
557  m->lastpos[lev+1] = sp;
558  return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
559  break;
560  case O_PLUS:
561  if (sp == m->lastpos[lev]) /* last pass matched null */
562  return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
563  /* try another pass */
564  m->lastpos[lev] = sp;
565  dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
566  if (dp == NULL)
567  return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
568  else
569  return(dp);
570  break;
571  case OCH_: /* find the right one, if any */
572  ssub = ss + 1;
573  esub = ss + OPND(s) - 1;
574  assert(OP(m->g->strip[esub]) == OOR1);
575  for (;;) { /* find first matching branch */
576  dp = backref(charset, m, sp, stop, ssub, esub, lev);
577  if (dp != NULL)
578  return(dp);
579  /* that one missed, try next one */
580  if (OP(m->g->strip[esub]) == O_CH)
581  return(NULL); /* there is none */
582  esub++;
583  assert(OP(m->g->strip[esub]) == OOR2);
584  ssub = esub + 1;
585  esub += OPND(m->g->strip[esub]);
586  if (OP(m->g->strip[esub]) == OOR2)
587  esub--;
588  else
589  assert(OP(m->g->strip[esub]) == O_CH);
590  }
591  break;
592  case OLPAREN: /* must undo assignment if rest fails */
593  i = OPND(s);
594  assert(0 < i && i <= m->g->nsub);
595  offsave = m->pmatch[i].rm_so;
596  m->pmatch[i].rm_so = sp - m->offp;
597  dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
598  if (dp != NULL)
599  return(dp);
600  m->pmatch[i].rm_so = offsave;
601  return(NULL);
602  break;
603  case ORPAREN: /* must undo assignment if rest fails */
604  i = OPND(s);
605  assert(0 < i && i <= m->g->nsub);
606  offsave = m->pmatch[i].rm_eo;
607  m->pmatch[i].rm_eo = sp - m->offp;
608  dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
609  if (dp != NULL)
610  return(dp);
611  m->pmatch[i].rm_eo = offsave;
612  return(NULL);
613  break;
614  default: /* uh oh */
615  assert(nope);
616  break;
617  }
618 
619  /* "can't happen" */
620  assert(nope);
621  /* NOTREACHED */
622  return 0; /* Keep gcc happy */
623 }
624 
625 /*
626  - fast - step through the string at top speed
627  == static char *fast(register struct match *m, char *start, \
628  == char *stop, sopno startst, sopno stopst);
629  */
630 static char * /* where tentative match ended, or NULL */
631 fast(charset, m, start, stop, startst, stopst)
632 const CHARSET_INFO *charset;
633 register struct match *m;
634 char *start;
635 char *stop;
636 sopno startst;
637 sopno stopst;
638 {
639  register states st = m->st;
640  register states fresh = m->fresh;
641  register states tmp = m->tmp;
642  register char *p = start;
643  register int c = (start == m->beginp) ? OUT : *(start-1);
644  register int lastc; /* previous c */
645  register int flagch;
646  register int i;
647  register char *coldp; /* last p after which no match was underway */
648 
649  CLEAR(st);
650  SET1(st, startst);
651  st = step(m->g, startst, stopst, st, NOTHING, st);
652  ASSIGN(fresh, st);
653  SP("start", st, *p);
654  coldp = NULL;
655  for (;;) {
656  /* next character */
657  lastc = c;
658  c = (p == m->endp) ? OUT : *p;
659  if (EQ(st, fresh))
660  coldp = p;
661 
662  /* is there an EOL and/or BOL between lastc and c? */
663  flagch = '\0';
664  i = 0;
665  if ( (lastc == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
666  (lastc == OUT && !(m->eflags&MY_REG_NOTBOL)) ) {
667  flagch = BOL;
668  i = m->g->nbol;
669  }
670  if ( (c == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
671  (c == OUT && !(m->eflags&MY_REG_NOTEOL)) ) {
672  flagch = (flagch == BOL) ? BOLEOL : EOL;
673  i += m->g->neol;
674  }
675  if (i != 0) {
676  for (; i > 0; i--)
677  st = step(m->g, startst, stopst, st, flagch, st);
678  SP("boleol", st, c);
679  }
680 
681  /* how about a word boundary? */
682  if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
683  (c != OUT && ISWORD(charset,c)) ) {
684  flagch = BOW;
685  }
686  if ( (lastc != OUT && ISWORD(charset,lastc)) &&
687  (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
688  flagch = EOW;
689  }
690  if (flagch == BOW || flagch == EOW) {
691  st = step(m->g, startst, stopst, st, flagch, st);
692  SP("boweow", st, c);
693  }
694 
695  /* are we done? */
696  if (ISSET(st, stopst) || p == stop)
697  break; /* NOTE BREAK OUT */
698 
699  /* no, we must deal with this character */
700  ASSIGN(tmp, st);
701  ASSIGN(st, fresh);
702  assert(c != OUT);
703  st = step(m->g, startst, stopst, tmp, c, st);
704  SP("aft", st, c);
705  assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
706  p++;
707  }
708 
709  assert(coldp != NULL);
710  m->coldp = coldp;
711  if (ISSET(st, stopst))
712  return(p+1);
713  else
714  return(NULL);
715 }
716 
717 /*
718  - slow - step through the string more deliberately
719  == static char *slow(register struct match *m, char *start, \
720  == char *stop, sopno startst, sopno stopst);
721  */
722 static char * /* where it ended */
723 slow(charset, m, start, stop, startst, stopst)
724 const CHARSET_INFO *charset;
725 register struct match *m;
726 char *start;
727 char *stop;
728 sopno startst;
729 sopno stopst;
730 {
731  register states st = m->st;
732  register states empty = m->empty;
733  register states tmp = m->tmp;
734  register char *p = start;
735  register int c = (start == m->beginp) ? OUT : *(start-1);
736  register int lastc; /* previous c */
737  register int flagch;
738  register int i;
739  register char *matchp; /* last p at which a match ended */
740 
741  AT("slow", start, stop, startst, stopst);
742  CLEAR(st);
743  SET1(st, startst);
744  SP("sstart", st, *p);
745  st = step(m->g, startst, stopst, st, NOTHING, st);
746  matchp = NULL;
747  for (;;) {
748  /* next character */
749  lastc = c;
750  c = (p == m->endp) ? OUT : *p;
751 
752  /* is there an EOL and/or BOL between lastc and c? */
753  flagch = '\0';
754  i = 0;
755  if ( (lastc == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
756  (lastc == OUT && !(m->eflags&MY_REG_NOTBOL)) ) {
757  flagch = BOL;
758  i = m->g->nbol;
759  }
760  if ( (c == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
761  (c == OUT && !(m->eflags&MY_REG_NOTEOL)) ) {
762  flagch = (flagch == BOL) ? BOLEOL : EOL;
763  i += m->g->neol;
764  }
765  if (i != 0) {
766  for (; i > 0; i--)
767  st = step(m->g, startst, stopst, st, flagch, st);
768  SP("sboleol", st, c);
769  }
770 
771  /* how about a word boundary? */
772  if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
773  (c != OUT && ISWORD(charset,c)) ) {
774  flagch = BOW;
775  }
776  if ( (lastc != OUT && ISWORD(charset,lastc)) &&
777  (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
778  flagch = EOW;
779  }
780  if (flagch == BOW || flagch == EOW) {
781  st = step(m->g, startst, stopst, st, flagch, st);
782  SP("sboweow", st, c);
783  }
784 
785  /* are we done? */
786  if (ISSET(st, stopst))
787  matchp = p;
788  if (EQ(st, empty) || p == stop)
789  break; /* NOTE BREAK OUT */
790 
791  /* no, we must deal with this character */
792  ASSIGN(tmp, st);
793  ASSIGN(st, empty);
794  assert(c != OUT);
795  st = step(m->g, startst, stopst, tmp, c, st);
796  SP("saft", st, c);
797  assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
798  p++;
799  }
800 
801  return(matchp);
802 }
803 
804 
805 /*
806  - step - map set of states reachable before char to set reachable after
807  == static states step(register struct re_guts *g, sopno start, sopno stop, \
808  == register states bef, int ch, register states aft);
809  == #define BOL (OUT+1)
810  == #define EOL (BOL+1)
811  == #define BOLEOL (BOL+2)
812  == #define NOTHING (BOL+3)
813  == #define BOW (BOL+4)
814  == #define EOW (BOL+5)
815  == #define CODEMAX (BOL+5) // highest code used
816  == #define NONCHAR(c) ((c) > CHAR_MAX)
817  == #define NNONCHAR (CODEMAX-CHAR_MAX)
818  */
819 static states
820 step(g, start, stop, bef, ch, aft)
821 register struct re_guts *g;
822 sopno start; /* start state within strip */
823 sopno stop; /* state after stop state within strip */
824 register states bef; /* states reachable before */
825 int ch; /* character or NONCHAR code */
826 register states aft; /* states already known reachable after */
827 {
828  register cset *cs;
829  register sop s;
830  register sopno pc;
831  register onestate here; /* note, macros know this name */
832  register sopno look;
833  register onestate i; /* Changed from int by Monty */
834 
835  for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
836  s = g->strip[pc];
837  switch (OP(s)) {
838  case OEND:
839  assert(pc == stop-1);
840  break;
841  case OCHAR:
842  /* only characters can match */
843  assert(!NONCHAR(ch) || ch != (char)OPND(s));
844  if (ch == (char)OPND(s))
845  FWD(aft, bef, 1);
846  break;
847  case OBOL:
848  if (ch == BOL || ch == BOLEOL)
849  FWD(aft, bef, 1);
850  break;
851  case OEOL:
852  if (ch == EOL || ch == BOLEOL)
853  FWD(aft, bef, 1);
854  break;
855  case OBOW:
856  if (ch == BOW)
857  FWD(aft, bef, 1);
858  break;
859  case OEOW:
860  if (ch == EOW)
861  FWD(aft, bef, 1);
862  break;
863  case OANY:
864  if (!NONCHAR(ch))
865  FWD(aft, bef, 1);
866  break;
867  case OANYOF:
868  cs = &g->sets[OPND(s)];
869  if (!NONCHAR(ch) && CHIN(cs, ch))
870  FWD(aft, bef, 1);
871  break;
872  case OBACK_: /* ignored here */
873  case O_BACK:
874  FWD(aft, aft, 1);
875  break;
876  case OPLUS_: /* forward, this is just an empty */
877  FWD(aft, aft, 1);
878  break;
879  case O_PLUS: /* both forward and back */
880  FWD(aft, aft, 1);
881  i = ISSETBACK(aft, OPND(s));
882  BACK(aft, aft, OPND(s));
883  if (!i && ISSETBACK(aft, OPND(s))) {
884  /* oho, must reconsider loop body */
885  pc -= OPND(s) + 1;
886  INIT(here, pc);
887  }
888  break;
889  case OQUEST_: /* two branches, both forward */
890  FWD(aft, aft, 1);
891  FWD(aft, aft, OPND(s));
892  break;
893  case O_QUEST: /* just an empty */
894  FWD(aft, aft, 1);
895  break;
896  case OLPAREN: /* not significant here */
897  case ORPAREN:
898  FWD(aft, aft, 1);
899  break;
900  case OCH_: /* mark the first two branches */
901  FWD(aft, aft, 1);
902  assert(OP(g->strip[pc+OPND(s)]) == OOR2);
903  FWD(aft, aft, OPND(s));
904  break;
905  case OOR1: /* done a branch, find the O_CH */
906  if (ISSTATEIN(aft, here)) {
907  for (look = 1;
908  OP(s = g->strip[pc+look]) != O_CH;
909  look += OPND(s))
910  assert(OP(s) == OOR2);
911  FWD(aft, aft, look);
912  }
913  break;
914  case OOR2: /* propagate OCH_'s marking */
915  FWD(aft, aft, 1);
916  if (OP(g->strip[pc+OPND(s)]) != O_CH) {
917  assert(OP(g->strip[pc+OPND(s)]) == OOR2);
918  FWD(aft, aft, OPND(s));
919  }
920  break;
921  case O_CH: /* just empty */
922  FWD(aft, aft, 1);
923  break;
924  default: /* ooooops... */
925  assert(nope);
926  break;
927  }
928  }
929 
930  return(aft);
931 }
932 
933 #ifdef REDEBUG
934 /*
935  - print - print a set of states
936  == #ifdef REDEBUG
937  == static void print(struct match *m, char *caption, states st, \
938  == int ch, FILE *d);
939  == #endif
940  */
941 static void
942 print(m, caption, st, ch, d)
943 struct match *m;
944 char *caption;
945 states st;
946 int ch;
947 FILE *d;
948 {
949  register struct re_guts *g = m->g;
950  register int i;
951  register int first = 1;
952  char buf[10];
953 
954  if (!(m->eflags&MY_REG_TRACE))
955  return;
956 
957  fprintf(d, "%s", caption);
958  if (ch != '\0')
959  fprintf(d, " %s", printchar(ch,buf));
960  for (i = 0; i < g->nstates; i++)
961  if (ISSET(st, i)) {
962  fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
963  first = 0;
964  }
965  fprintf(d, "\n");
966 }
967 
968 /*
969  - at - print current situation
970  == #ifdef REDEBUG
971  == static void at(struct match *m, char *title, char *start, char *stop, \
972  == sopno startst, sopno stopst);
973  == #endif
974  */
975 static void
976 at(m, title, start, stop, startst, stopst)
977 struct match *m;
978 char *title;
979 char *start;
980 char *stop;
981 sopno startst;
982 sopno stopst;
983 {
984  char buf[10];
985  if (!(m->eflags&MY_REG_TRACE))
986  return;
987 
988  printf("%s %s-", title, printchar(*start,buf));
989  printf("%s ", printchar(*stop,buf));
990  printf("%ld-%ld\n", (long)startst, (long)stopst,buf);
991 }
992 
993 #ifndef PCHARDONE
994 #define PCHARDONE /* never again */
995 /*
996  - printchar - make a character printable
997  == #ifdef REDEBUG
998  == static char *printchar(int ch);
999  == #endif
1000  *
1001  * Is this identical to regchar() over in debug.c? Well, yes. But a
1002  * duplicate here avoids having a debugging-capable regexec.o tied to
1003  * a matching debug.o, and this is convenient. It all disappears in
1004  * the non-debug compilation anyway, so it doesn't matter much.
1005  */
1006 static char * /* -> representation */
1007 printchar(ch,pbuf)
1008 int ch;
1009 char *pbuf;
1010 {
1011  if (isprint(ch) || ch == ' ')
1012  sprintf(pbuf, "%c", ch);
1013  else
1014  sprintf(pbuf, "\\%o", ch);
1015  return(pbuf);
1016 }
1017 #endif
1018 #endif
1019 
1020 #undef matcher
1021 #undef fast
1022 #undef slow
1023 #undef dissect
1024 #undef backref
1025 #undef step
1026 #undef print
1027 #undef at
1028 #undef match