MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
regcomp.c
1 #include <my_global.h>
2 #include <m_string.h>
3 #include <m_ctype.h>
4 #ifdef __WIN__
5 #include <limits.h>
6 #endif
7 
8 #include "my_regex.h"
9 #include "utils.h"
10 #include "regex2.h"
11 
12 #include "cclass.h"
13 #include "cname.h"
14 
15 /*
16  * parse structure, passed up and down to avoid global variables and
17  * other clumsinesses
18  */
19 struct parse {
20  char *next; /* next character in RE */
21  char *end; /* end of string (-> NUL normally) */
22  int error; /* has an error been seen? */
23  sop *strip; /* malloced strip */
24  sopno ssize; /* malloced strip size (allocated) */
25  sopno slen; /* malloced strip length (used) */
26  int ncsalloc; /* number of csets allocated */
27  struct re_guts *g;
28 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
29  sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
30  sopno pend[NPAREN]; /* -> ) ([0] unused) */
31  const CHARSET_INFO *charset; /* for ctype things */
32 };
33 
34 /* Check if there is enough stack space for recursion. */
35 my_regex_stack_check_t my_regex_enough_mem_in_stack= NULL;
36 
37 #include "regcomp.ih"
38 
39 static char nuls[10]; /* place to point scanner in event of error */
40 
41 struct cclass cclasses[CCLASS_LAST+1]= {
42  { "alnum", "","", _MY_U | _MY_L | _MY_NMR},
43  { "alpha", "","", _MY_U | _MY_L },
44  { "blank", "","", _MY_B },
45  { "cntrl", "","", _MY_CTR },
46  { "digit", "","", _MY_NMR },
47  { "graph", "","", _MY_PNT | _MY_U | _MY_L | _MY_NMR},
48  { "lower", "","", _MY_L },
49  { "print", "","", _MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B },
50  { "punct", "","", _MY_PNT },
51  { "space", "","", _MY_SPC },
52  { "upper", "","", _MY_U },
53  { "xdigit", "","", _MY_X },
54  { NULL,NULL,NULL, 0 }
55 };
56 
57 /*
58  * macros for use with parse structure
59  * BEWARE: these know that the parse structure is named `p' !!!
60  */
61 #define PEEK() (*p->next)
62 #define PEEK2() (*(p->next+1))
63 #define MORE() (p->next < p->end)
64 #define MORE2() (p->next+1 < p->end)
65 #define SEE(c) (MORE() && PEEK() == (c))
66 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
67 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
68 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
69 #define NEXT() (p->next++)
70 #define NEXT2() (p->next += 2)
71 #define NEXTn(n) (p->next += (n))
72 #define GETNEXT() (*p->next++)
73 #define SETERROR(e) seterr(p, (e))
74 #define REQUIRE(co, e) ((co) || SETERROR(e))
75 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
76 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
77 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
78 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
79 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
80 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
81 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
82 #define HERE() (p->slen)
83 #define THERE() (p->slen - 1)
84 #define THERETHERE() (p->slen - 2)
85 #define DROP(n) (p->slen -= (n))
86 
87 #ifndef NDEBUG
88 static int never = 0; /* for use in asserts; shuts lint up */
89 #else
90 #define never 0 /* some <assert.h>s have bugs too */
91 #endif
92 
93 /*
94  - regcomp - interface for parser and compilation
95  = extern int regcomp(regex_t *, const char *, int);
96  = #define MY_REG_BASIC 0000
97  = #define MY_REG_EXTENDED 0001
98  = #define MY_REG_ICASE 0002
99  = #define MY_REG_NOSUB 0004
100  = #define MY_REG_NEWLINE 0010
101  = #define MY_REG_NOSPEC 0020
102  = #define MY_REG_PEND 0040
103  = #define MY_REG_DUMP 0200
104  */
105 int /* 0 success, otherwise MY_REG_something */
106 my_regcomp(preg, pattern, cflags, charset)
107 my_regex_t *preg;
108 const char *pattern;
109 int cflags;
110 const CHARSET_INFO *charset;
111 {
112  struct parse pa;
113  register struct re_guts *g;
114  register struct parse *p = &pa;
115  register int i;
116  register size_t len;
117 #ifdef REDEBUG
118 # define GOODFLAGS(f) (f)
119 #else
120 # define GOODFLAGS(f) ((f)&~MY_REG_DUMP)
121 #endif
122 
123  my_regex_init(charset, NULL); /* Init cclass if neaded */
124  preg->charset=charset;
125  cflags = GOODFLAGS(cflags);
126  if ((cflags&MY_REG_EXTENDED) && (cflags&MY_REG_NOSPEC))
127  return(MY_REG_INVARG);
128 
129  if (cflags&MY_REG_PEND) {
130  if (preg->re_endp < pattern)
131  return(MY_REG_INVARG);
132  len = preg->re_endp - pattern;
133  } else
134  len = strlen((char *)pattern);
135 
136  /* do the mallocs early so failure handling is easy */
137  g = (struct re_guts *)malloc(sizeof(struct re_guts) +
138  (NC-1)*sizeof(cat_t));
139  if (g == NULL)
140  return(MY_REG_ESPACE);
141  p->ssize = (long) (len/(size_t)2*(size_t)3 + (size_t)1); /* ugh */
142  p->strip = (sop *)malloc(p->ssize * sizeof(sop));
143  p->slen = 0;
144  if (p->strip == NULL) {
145  free((char *)g);
146  return(MY_REG_ESPACE);
147  }
148 
149  /* set things up */
150  p->g = g;
151  p->next = (char *)pattern; /* convenience; we do not modify it */
152  p->end = p->next + len;
153  p->error = 0;
154  p->ncsalloc = 0;
155  p->charset = preg->charset;
156  for (i = 0; i < NPAREN; i++) {
157  p->pbegin[i] = 0;
158  p->pend[i] = 0;
159  }
160  g->csetsize = NC;
161  g->sets = NULL;
162  g->setbits = NULL;
163  g->ncsets = 0;
164  g->cflags = cflags;
165  g->iflags = 0;
166  g->nbol = 0;
167  g->neol = 0;
168  g->must = NULL;
169  g->mlen = 0;
170  g->nsub = 0;
171  g->ncategories = 1; /* category 0 is "everything else" */
172  g->categories = &g->catspace[-(CHAR_MIN)];
173  (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
174  g->backrefs = 0;
175 
176  /* do it */
177  EMIT(OEND, 0);
178  g->firststate = THERE();
179  if (cflags&MY_REG_EXTENDED)
180  p_ere(p, OUT);
181  else if (cflags&MY_REG_NOSPEC)
182  p_str(p);
183  else
184  p_bre(p, OUT, OUT);
185  EMIT(OEND, 0);
186  g->laststate = THERE();
187 
188  /* tidy up loose ends and fill things in */
189  categorize(p, g);
190  stripsnug(p, g);
191  findmust(p, g);
192  g->nplus = pluscount(p, g);
193  g->magic = MAGIC2;
194  preg->re_nsub = g->nsub;
195  preg->re_g = g;
196  preg->re_magic = MAGIC1;
197 #ifndef REDEBUG
198  /* not debugging, so can't rely on the assert() in regexec() */
199  if (g->iflags&BAD)
200  SETERROR(MY_REG_ASSERT);
201 #endif
202 
203  /* win or lose, we're done */
204  if (p->error != 0) /* lose */
205  my_regfree(preg);
206  return(p->error);
207 }
208 
209 /*
210  - p_ere - ERE parser top level, concatenation and alternation
211  == static void p_ere(register struct parse *p, int stop);
212  */
213 static void
214 p_ere(p, stop)
215 register struct parse *p;
216 int stop; /* character this ERE should end at */
217 {
218  register char c;
219  register sopno UNINIT_VAR(prevback);
220  register sopno UNINIT_VAR(prevfwd);
221  register sopno conc;
222  register int first = 1; /* is this the first alternative? */
223 
224  for (;;) {
225  /* do a bunch of concatenated expressions */
226  conc = HERE();
227  while (MORE() && (c = PEEK()) != '|' && c != stop)
228  {
229  if (my_regex_enough_mem_in_stack &&
230  my_regex_enough_mem_in_stack(0))
231  {
232  SETERROR(MY_REG_ESPACE);
233  return;
234  }
235  p_ere_exp(p);
236  }
237  if(REQUIRE(HERE() != conc, MY_REG_EMPTY)) {}/* require nonempty */
238 
239  if (!EAT('|'))
240  break; /* NOTE BREAK OUT */
241 
242  if (first) {
243  INSERT(OCH_, conc); /* offset is wrong */
244  prevfwd = conc;
245  prevback = conc;
246  first = 0;
247  }
248  ASTERN(OOR1, prevback);
249  prevback = THERE();
250  AHEAD(prevfwd); /* fix previous offset */
251  prevfwd = HERE();
252  EMIT(OOR2, 0); /* offset is very wrong */
253  }
254 
255  if (!first) { /* tail-end fixups */
256  AHEAD(prevfwd);
257  ASTERN(O_CH, prevback);
258  }
259 
260  assert(!MORE() || SEE(stop));
261 }
262 
263 /*
264  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
265  == static void p_ere_exp(register struct parse *p);
266  */
267 static void
268 p_ere_exp(p)
269 register struct parse *p;
270 {
271  register char c;
272  register sopno pos;
273  register int count;
274  register int count2;
275  register sopno subno;
276  int wascaret = 0;
277 
278  assert(MORE()); /* caller should have ensured this */
279  c = GETNEXT();
280 
281  pos = HERE();
282  switch (c) {
283  case '(':
284  if(REQUIRE(MORE(), MY_REG_EPAREN)) {}
285  p->g->nsub++;
286  subno = (sopno) p->g->nsub;
287  if (subno < NPAREN)
288  p->pbegin[subno] = HERE();
289  EMIT(OLPAREN, subno);
290  if (!SEE(')'))
291  p_ere(p, ')');
292  if (subno < NPAREN) {
293  p->pend[subno] = HERE();
294  assert(p->pend[subno] != 0);
295  }
296  EMIT(ORPAREN, subno);
297  if(MUSTEAT(')', MY_REG_EPAREN)) {}
298  break;
299  case '^':
300  EMIT(OBOL, 0);
301  p->g->iflags |= USEBOL;
302  p->g->nbol++;
303  wascaret = 1;
304  break;
305  case '$':
306  EMIT(OEOL, 0);
307  p->g->iflags |= USEEOL;
308  p->g->neol++;
309  break;
310  case '|':
311  SETERROR(MY_REG_EMPTY);
312  break;
313  case '*':
314  case '+':
315  case '?':
316  SETERROR(MY_REG_BADRPT);
317  break;
318  case '.':
319  if (p->g->cflags&MY_REG_NEWLINE)
320  nonnewline(p);
321  else
322  EMIT(OANY, 0);
323  break;
324  case '[':
325  p_bracket(p);
326  break;
327  case '\\':
328  if(REQUIRE(MORE(), MY_REG_EESCAPE)) {}
329  c = GETNEXT();
330  ordinary(p, c);
331  break;
332  case '{': /* okay as ordinary except if digit follows */
333  if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), MY_REG_BADRPT)) {}
334  /* FALLTHROUGH */
335  default:
336  ordinary(p, c);
337  break;
338  }
339 
340  if (!MORE())
341  return;
342  c = PEEK();
343  /* we call { a repetition if followed by a digit */
344  if (!( c == '*' || c == '+' || c == '?' ||
345  (c == '{' && MORE2() &&
346  my_isdigit(p->charset,PEEK2())) ))
347  return; /* no repetition, we're done */
348  NEXT();
349 
350  if(REQUIRE(!wascaret, MY_REG_BADRPT)) {}
351  switch (c) {
352  case '*': /* implemented as +? */
353  /* this case does not require the (y|) trick, noKLUDGE */
354  INSERT(OPLUS_, pos);
355  ASTERN(O_PLUS, pos);
356  INSERT(OQUEST_, pos);
357  ASTERN(O_QUEST, pos);
358  break;
359  case '+':
360  INSERT(OPLUS_, pos);
361  ASTERN(O_PLUS, pos);
362  break;
363  case '?':
364  /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
365  INSERT(OCH_, pos); /* offset slightly wrong */
366  ASTERN(OOR1, pos); /* this one's right */
367  AHEAD(pos); /* fix the OCH_ */
368  EMIT(OOR2, 0); /* offset very wrong... */
369  AHEAD(THERE()); /* ...so fix it */
370  ASTERN(O_CH, THERETHERE());
371  break;
372  case '{':
373  count = p_count(p);
374  if (EAT(',')) {
375  if (my_isdigit(p->charset,PEEK())) {
376  count2 = p_count(p);
377  if(REQUIRE(count <= count2, MY_REG_BADBR)) {}
378  } else /* single number with comma */
379  count2 = RE_INFINITY;
380  } else /* just a single number */
381  count2 = count;
382  repeat(p, pos, count, count2);
383  if (!EAT('}')) { /* error heuristics */
384  while (MORE() && PEEK() != '}')
385  NEXT();
386  if(REQUIRE(MORE(), MY_REG_EBRACE)) {}
387  SETERROR(MY_REG_BADBR);
388  }
389  break;
390  }
391 
392  if (!MORE())
393  return;
394  c = PEEK();
395  if (!( c == '*' || c == '+' || c == '?' ||
396  (c == '{' && MORE2() &&
397  my_isdigit(p->charset,PEEK2())) ) )
398  return;
399  SETERROR(MY_REG_BADRPT);
400 }
401 
402 /*
403  - p_str - string (no metacharacters) "parser"
404  == static void p_str(register struct parse *p);
405  */
406 static void
407 p_str(p)
408 register struct parse *p;
409 {
410  if(REQUIRE(MORE(), MY_REG_EMPTY)) {}
411  while (MORE())
412  ordinary(p, GETNEXT());
413 }
414 
415 /*
416  - p_bre - BRE parser top level, anchoring and concatenation
417  == static void p_bre(register struct parse *p, register int end1, \
418  == register int end2);
419  * Giving end1 as OUT essentially eliminates the end1/end2 check.
420  *
421  * This implementation is a bit of a kludge, in that a trailing $ is first
422  * taken as an ordinary character and then revised to be an anchor. The
423  * only undesirable side effect is that '$' gets included as a character
424  * category in such cases. This is fairly harmless; not worth fixing.
425  * The amount of lookahead needed to avoid this kludge is excessive.
426  */
427 static void
428 p_bre(p, end1, end2)
429 register struct parse *p;
430 register int end1; /* first terminating character */
431 register int end2; /* second terminating character */
432 {
433  register sopno start = HERE();
434  register int first = 1; /* first subexpression? */
435  register int wasdollar = 0;
436 
437  if (EAT('^')) {
438  EMIT(OBOL, 0);
439  p->g->iflags |= USEBOL;
440  p->g->nbol++;
441  }
442  while (MORE() && !SEETWO(end1, end2)) {
443  wasdollar = p_simp_re(p, first);
444  first = 0;
445  }
446  if (wasdollar) { /* oops, that was a trailing anchor */
447  DROP(1);
448  EMIT(OEOL, 0);
449  p->g->iflags |= USEEOL;
450  p->g->neol++;
451  }
452 
453  if(REQUIRE(HERE() != start, MY_REG_EMPTY)) {} /* require nonempty */
454 }
455 
456 /*
457  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
458  == static int p_simp_re(register struct parse *p, int starordinary);
459  */
460 static int /* was the simple RE an unbackslashed $? */
461 p_simp_re(p, starordinary)
462 register struct parse *p;
463 int starordinary; /* is a leading * an ordinary character? */
464 {
465  register int c;
466  register int count;
467  register int count2;
468  register sopno pos;
469  register int i;
470  register sopno subno;
471 # define BACKSL (1<<CHAR_BIT)
472 
473  pos = HERE(); /* repetion op, if any, covers from here */
474 
475  assert(MORE()); /* caller should have ensured this */
476  c = GETNEXT();
477  if (c == '\\') {
478  if(REQUIRE(MORE(), MY_REG_EESCAPE)) {}
479  c = BACKSL | (unsigned char)GETNEXT();
480  }
481  switch (c) {
482  case '.':
483  if (p->g->cflags&MY_REG_NEWLINE)
484  nonnewline(p);
485  else
486  EMIT(OANY, 0);
487  break;
488  case '[':
489  p_bracket(p);
490  break;
491  case BACKSL|'{':
492  SETERROR(MY_REG_BADRPT);
493  break;
494  case BACKSL|'(':
495  p->g->nsub++;
496  subno = (sopno) p->g->nsub;
497  if (subno < NPAREN)
498  p->pbegin[subno] = HERE();
499  EMIT(OLPAREN, subno);
500  /* the MORE here is an error heuristic */
501  if (MORE() && !SEETWO('\\', ')'))
502  p_bre(p, '\\', ')');
503  if (subno < NPAREN) {
504  p->pend[subno] = HERE();
505  assert(p->pend[subno] != 0);
506  }
507  EMIT(ORPAREN, subno);
508  if(REQUIRE(EATTWO('\\', ')'), MY_REG_EPAREN)) {}
509  break;
510  case BACKSL|')': /* should not get here -- must be user */
511  case BACKSL|'}':
512  SETERROR(MY_REG_EPAREN);
513  break;
514  case BACKSL|'1':
515  case BACKSL|'2':
516  case BACKSL|'3':
517  case BACKSL|'4':
518  case BACKSL|'5':
519  case BACKSL|'6':
520  case BACKSL|'7':
521  case BACKSL|'8':
522  case BACKSL|'9':
523  i = (c&~BACKSL) - '0';
524  assert(i < NPAREN);
525  if (p->pend[i] != 0) {
526  assert((uint) i <= p->g->nsub);
527  EMIT(OBACK_, i);
528  assert(p->pbegin[i] != 0);
529  assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
530  assert(OP(p->strip[p->pend[i]]) == ORPAREN);
531  (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
532  EMIT(O_BACK, i);
533  } else
534  SETERROR(MY_REG_ESUBREG);
535  p->g->backrefs = 1;
536  break;
537  case '*':
538  if(REQUIRE(starordinary, MY_REG_BADRPT)) {}
539  /* FALLTHROUGH */
540  default:
541  ordinary(p, c &~ BACKSL);
542  break;
543  }
544 
545  if (EAT('*')) { /* implemented as +? */
546  /* this case does not require the (y|) trick, noKLUDGE */
547  INSERT(OPLUS_, pos);
548  ASTERN(O_PLUS, pos);
549  INSERT(OQUEST_, pos);
550  ASTERN(O_QUEST, pos);
551  } else if (EATTWO('\\', '{')) {
552  count = p_count(p);
553  if (EAT(',')) {
554  if (MORE() && my_isdigit(p->charset,PEEK())) {
555  count2 = p_count(p);
556  if(REQUIRE(count <= count2, MY_REG_BADBR)) {}
557  } else /* single number with comma */
558  count2 = RE_INFINITY;
559  } else /* just a single number */
560  count2 = count;
561  repeat(p, pos, count, count2);
562  if (!EATTWO('\\', '}')) { /* error heuristics */
563  while (MORE() && !SEETWO('\\', '}'))
564  NEXT();
565  if(REQUIRE(MORE(), MY_REG_EBRACE)) {}
566  SETERROR(MY_REG_BADBR);
567  }
568  } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
569  return(1);
570 
571  return(0);
572 }
573 
574 /*
575  - p_count - parse a repetition count
576  == static int p_count(register struct parse *p);
577  */
578 static int /* the value */
579 p_count(p)
580 register struct parse *p;
581 {
582  register int count = 0;
583  register int ndigits = 0;
584 
585  while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
586  count = count*10 + (GETNEXT() - '0');
587  ndigits++;
588  }
589 
590  if(REQUIRE(ndigits > 0 && count <= DUPMAX, MY_REG_BADBR)) {}
591  return(count);
592 }
593 
594 /*
595  - p_bracket - parse a bracketed character list
596  == static void p_bracket(register struct parse *p);
597  *
598  * Note a significant property of this code: if the allocset() did SETERROR,
599  * no set operations are done.
600  */
601 static void
602 p_bracket(p)
603 register struct parse *p;
604 {
605  register cset *cs = allocset(p);
606  register int invert = 0;
607 
608  /* Dept of Truly Sickening Special-Case Kludges */
609  if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
610  EMIT(OBOW, 0);
611  NEXTn(6);
612  return;
613  }
614  if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
615  EMIT(OEOW, 0);
616  NEXTn(6);
617  return;
618  }
619 
620  if (EAT('^'))
621  invert++; /* make note to invert set at end */
622  if (EAT(']'))
623  CHadd(cs, ']');
624  else if (EAT('-'))
625  CHadd(cs, '-');
626  while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
627  p_b_term(p, cs);
628  if (EAT('-'))
629  CHadd(cs, '-');
630  if(MUSTEAT(']', MY_REG_EBRACK)) {}
631 
632  if (p->error != 0) /* don't mess things up further */
633  return;
634 
635  if (p->g->cflags&MY_REG_ICASE) {
636  register int i;
637  register int ci;
638 
639  for (i = p->g->csetsize - 1; i >= 0; i--)
640  if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
641  ci = othercase(p->charset,i);
642  if (ci != i)
643  CHadd(cs, ci);
644  }
645  if (cs->multis != NULL)
646  mccase(p, cs);
647  }
648  if (invert) {
649  register int i;
650 
651  for (i = p->g->csetsize - 1; i >= 0; i--)
652  if (CHIN(cs, i))
653  CHsub(cs, i);
654  else
655  CHadd(cs, i);
656  if (p->g->cflags&MY_REG_NEWLINE)
657  CHsub(cs, '\n');
658  if (cs->multis != NULL)
659  mcinvert(p, cs);
660  }
661 
662  assert(cs->multis == NULL); /* xxx */
663 
664  if (nch(p, cs) == 1) { /* optimize singleton sets */
665  ordinary(p, firstch(p, cs));
666  freeset(p, cs);
667  } else
668  EMIT(OANYOF, freezeset(p, cs));
669 }
670 
671 /*
672  - p_b_term - parse one term of a bracketed character list
673  == static void p_b_term(register struct parse *p, register cset *cs);
674  */
675 static void
676 p_b_term(p, cs)
677 register struct parse *p;
678 register cset *cs;
679 {
680  register char c;
681  register char start, finish;
682  register int i;
683 
684  /* classify what we've got */
685  switch ((MORE()) ? PEEK() : '\0') {
686  case '[':
687  c = (MORE2()) ? PEEK2() : '\0';
688  break;
689  case '-':
690  SETERROR(MY_REG_ERANGE);
691  return; /* NOTE RETURN */
692  default:
693  c = '\0';
694  break;
695  }
696 
697  switch (c) {
698  case ':': /* character class */
699  NEXT2();
700  if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
701  c = PEEK();
702  if(REQUIRE(c != '-' && c != ']', MY_REG_ECTYPE)) {}
703  p_b_cclass(p, cs);
704  if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
705  if(REQUIRE(EATTWO(':', ']'), MY_REG_ECTYPE)) {}
706  break;
707  case '=': /* equivalence class */
708  NEXT2();
709  if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
710  c = PEEK();
711  if(REQUIRE(c != '-' && c != ']', MY_REG_ECOLLATE)) {}
712  p_b_eclass(p, cs);
713  if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
714  if(REQUIRE(EATTWO('=', ']'), MY_REG_ECOLLATE)) {}
715  break;
716  default: /* symbol, ordinary character, or range */
717 /* xxx revision needed for multichar stuff */
718  start = p_b_symbol(p);
719  if (SEE('-') && MORE2() && PEEK2() != ']') {
720  /* range */
721  NEXT();
722  if (EAT('-'))
723  finish = '-';
724  else
725  finish = p_b_symbol(p);
726  } else
727  finish = start;
728 /* xxx what about signed chars here... */
729  if(REQUIRE(start <= finish, MY_REG_ERANGE)) {}
730  for (i = start; i <= finish; i++)
731  CHadd(cs, i);
732  break;
733  }
734 }
735 
736 /*
737  - p_b_cclass - parse a character-class name and deal with it
738  == static void p_b_cclass(register struct parse *p, register cset *cs);
739  */
740 static void
741 p_b_cclass(p, cs)
742 register struct parse *p;
743 register cset *cs;
744 {
745  register char *sp = p->next;
746  register struct cclass *cp;
747  register size_t len;
748 
749  while (MORE() && my_isalpha(p->charset,PEEK()))
750  NEXT();
751  len = p->next - sp;
752  for (cp = cclasses; cp->name != NULL; cp++)
753  if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
754  break;
755  if (cp->name == NULL) {
756  /* oops, didn't find it */
757  SETERROR(MY_REG_ECTYPE);
758  return;
759  }
760 
761 #ifndef USE_ORIG_REGEX_CODE
762  {
763  register size_t i;
764  for (i=1 ; i<256 ; i++)
765  if (p->charset->ctype[i+1] & cp->mask)
766  CHadd(cs, i);
767  }
768 #else
769  {
770  register char *u = (char*) cp->chars;
771  register char c;
772 
773  while ((c = *u++) != '\0')
774  CHadd(cs, c);
775 
776  for (u = (char*) cp->multis; *u != '\0'; u += strlen(u) + 1)
777  MCadd(p, cs, u);
778  }
779 #endif
780 
781 }
782 
783 /*
784  - p_b_eclass - parse an equivalence-class name and deal with it
785  == static void p_b_eclass(register struct parse *p, register cset *cs);
786  *
787  * This implementation is incomplete. xxx
788  */
789 static void
790 p_b_eclass(p, cs)
791 register struct parse *p;
792 register cset *cs;
793 {
794  register char c;
795 
796  c = p_b_coll_elem(p, '=');
797  CHadd(cs, c);
798 }
799 
800 /*
801  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
802  == static char p_b_symbol(register struct parse *p);
803  */
804 static char /* value of symbol */
805 p_b_symbol(p)
806 register struct parse *p;
807 {
808  register char value;
809 
810  if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
811  if (!EATTWO('[', '.'))
812  return(GETNEXT());
813 
814  /* collating symbol */
815  value = p_b_coll_elem(p, '.');
816  if(REQUIRE(EATTWO('.', ']'), MY_REG_ECOLLATE)) {}
817  return(value);
818 }
819 
820 /*
821  - p_b_coll_elem - parse a collating-element name and look it up
822  == static char p_b_coll_elem(register struct parse *p, int endc);
823  */
824 static char /* value of collating element */
825 p_b_coll_elem(p, endc)
826 register struct parse *p;
827 int endc; /* name ended by endc,']' */
828 {
829  register char *sp = p->next;
830  register struct cname *cp;
831 #ifdef _WIN64
832  register __int64 len;
833 #else
834  register int len;
835 #endif
836  while (MORE() && !SEETWO(endc, ']'))
837  NEXT();
838  if (!MORE()) {
839  SETERROR(MY_REG_EBRACK);
840  return(0);
841  }
842  len = p->next - sp;
843  for (cp = cnames; cp->name != NULL; cp++)
844  if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
845  return(cp->code); /* known name */
846  if (len == 1)
847  return(*sp); /* single character */
848  SETERROR(MY_REG_ECOLLATE); /* neither */
849  return(0);
850 }
851 
852 /*
853  - othercase - return the case counterpart of an alphabetic
854  == static char othercase(int ch);
855  */
856 static char /* if no counterpart, return ch */
857 othercase(charset,ch)
858 const CHARSET_INFO *charset;
859 int ch;
860 {
861  /*
862  In MySQL some multi-byte character sets
863  have 'ctype' array but don't have 'to_lower'
864  and 'to_upper' arrays. In this case we handle
865  only basic latin letters a..z and A..Z.
866 
867  If 'to_lower' and 'to_upper' arrays are empty in a character set,
868  then my_isalpha(cs, ch) should never return TRUE for characters
869  other than basic latin letters. Otherwise it should be
870  considered as a mistake in character set definition.
871  */
872  assert(my_isalpha(charset,ch));
873  if (my_isupper(charset,ch))
874  {
875  return(charset->to_lower ? my_tolower(charset,ch) :
876  ch - 'A' + 'a');
877  }
878  else if (my_islower(charset,ch))
879  {
880  return(charset->to_upper ? my_toupper(charset,ch) :
881  ch - 'a' + 'A');
882  }
883  else /* peculiar, but could happen */
884  return(ch);
885 }
886 
887 /*
888  - bothcases - emit a dualcase version of a two-case character
889  == static void bothcases(register struct parse *p, int ch);
890  *
891  * Boy, is this implementation ever a kludge...
892  */
893 static void
894 bothcases(p, ch)
895 register struct parse *p;
896 int ch;
897 {
898  register char *oldnext = p->next;
899  register char *oldend = p->end;
900  char bracket[3];
901 
902  assert(othercase(p->charset, ch) != ch); /* p_bracket() would recurse */
903  p->next = bracket;
904  p->end = bracket+2;
905  bracket[0] = ch;
906  bracket[1] = ']';
907  bracket[2] = '\0';
908  p_bracket(p);
909  assert(p->next == bracket+2);
910  p->next = oldnext;
911  p->end = oldend;
912 }
913 
914 /*
915  - ordinary - emit an ordinary character
916  == static void ordinary(register struct parse *p, register int ch);
917  */
918 static void
919 ordinary(p, ch)
920 register struct parse *p;
921 register int ch;
922 {
923  register cat_t *cap = p->g->categories;
924 
925  if ((p->g->cflags&MY_REG_ICASE) && my_isalpha(p->charset,ch) &&
926  othercase(p->charset,ch) != ch)
927  bothcases(p, ch);
928  else {
929  EMIT(OCHAR, (unsigned char)ch);
930  if (cap[ch] == 0)
931  cap[ch] = p->g->ncategories++;
932  }
933 }
934 
935 /*
936  - nonnewline - emit MY_REG_NEWLINE version of OANY
937  == static void nonnewline(register struct parse *p);
938  *
939  * Boy, is this implementation ever a kludge...
940  */
941 static void
942 nonnewline(p)
943 register struct parse *p;
944 {
945  register char *oldnext = p->next;
946  register char *oldend = p->end;
947  char bracket[4];
948 
949  p->next = bracket;
950  p->end = bracket+3;
951  bracket[0] = '^';
952  bracket[1] = '\n';
953  bracket[2] = ']';
954  bracket[3] = '\0';
955  p_bracket(p);
956  assert(p->next == bracket+3);
957  p->next = oldnext;
958  p->end = oldend;
959 }
960 
961 /*
962  - repeat - generate code for a bounded repetition, recursively if needed
963  == static void repeat(register struct parse *p, sopno start, int from, int to);
964  */
965 static void
966 repeat(p, start, from, to)
967 register struct parse *p;
968 sopno start; /* operand from here to end of strip */
969 int from; /* repeated from this number */
970 int to; /* to this number of times (maybe RE_INFINITY) */
971 {
972  register sopno finish = HERE();
973 # define N 2
974 # define INF 3
975 # define REP(f, t) ((f)*8 + (t))
976 # define MAP(n) (((n) <= 1) ? (n) : ((n) == RE_INFINITY) ? INF : N)
977  register sopno copy;
978 
979  if (p->error != 0) /* head off possible runaway recursion */
980  return;
981 
982  assert(from <= to);
983 
984  switch (REP(MAP(from), MAP(to))) {
985  case REP(0, 0): /* must be user doing this */
986  DROP(finish-start); /* drop the operand */
987  break;
988  case REP(0, 1): /* as x{1,1}? */
989  case REP(0, N): /* as x{1,n}? */
990  case REP(0, INF): /* as x{1,}? */
991  /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
992  INSERT(OCH_, start); /* offset is wrong... */
993  repeat(p, start+1, 1, to);
994  ASTERN(OOR1, start);
995  AHEAD(start); /* ... fix it */
996  EMIT(OOR2, 0);
997  AHEAD(THERE());
998  ASTERN(O_CH, THERETHERE());
999  break;
1000  case REP(1, 1): /* trivial case */
1001  /* done */
1002  break;
1003  case REP(1, N): /* as x?x{1,n-1} */
1004  /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1005  INSERT(OCH_, start);
1006  ASTERN(OOR1, start);
1007  AHEAD(start);
1008  EMIT(OOR2, 0); /* offset very wrong... */
1009  AHEAD(THERE()); /* ...so fix it */
1010  ASTERN(O_CH, THERETHERE());
1011  copy = dupl(p, start+1, finish+1);
1012  assert(copy == finish+4);
1013  repeat(p, copy, 1, to-1);
1014  break;
1015  case REP(1, INF): /* as x+ */
1016  INSERT(OPLUS_, start);
1017  ASTERN(O_PLUS, start);
1018  break;
1019  case REP(N, N): /* as xx{m-1,n-1} */
1020  copy = dupl(p, start, finish);
1021  repeat(p, copy, from-1, to-1);
1022  break;
1023  case REP(N, INF): /* as xx{n-1,INF} */
1024  copy = dupl(p, start, finish);
1025  repeat(p, copy, from-1, to);
1026  break;
1027  default: /* "can't happen" */
1028  SETERROR(MY_REG_ASSERT); /* just in case */
1029  break;
1030  }
1031 }
1032 
1033 /*
1034  - seterr - set an error condition
1035  == static int seterr(register struct parse *p, int e);
1036  */
1037 static int /* useless but makes type checking happy */
1038 seterr(p, e)
1039 register struct parse *p;
1040 int e;
1041 {
1042  if (p->error == 0) /* keep earliest error condition */
1043  p->error = e;
1044  p->next = nuls; /* try to bring things to a halt */
1045  p->end = nuls;
1046  return(0); /* make the return value well-defined */
1047 }
1048 
1049 /*
1050  - allocset - allocate a set of characters for []
1051  == static cset *allocset(register struct parse *p);
1052  */
1053 static cset *
1054 allocset(p)
1055 register struct parse *p;
1056 {
1057  register int no = p->g->ncsets++;
1058  register size_t nc;
1059  register size_t nbytes;
1060  register cset *cs;
1061  register size_t css = (size_t)p->g->csetsize;
1062  register int i;
1063 
1064  if (no >= p->ncsalloc) { /* need another column of space */
1065  p->ncsalloc += CHAR_BIT;
1066  nc = p->ncsalloc;
1067  assert(nc % CHAR_BIT == 0);
1068  nbytes = nc / CHAR_BIT * css;
1069  if (p->g->sets == NULL)
1070  p->g->sets = (cset *)malloc(nc * sizeof(cset));
1071  else
1072  p->g->sets = (cset *)realloc((char *)p->g->sets,
1073  nc * sizeof(cset));
1074  if (p->g->setbits == NULL)
1075  p->g->setbits = (uch *)malloc(nbytes);
1076  else {
1077  p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1078  nbytes);
1079  /* xxx this isn't right if setbits is now NULL */
1080  for (i = 0; i < no; i++)
1081  p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1082  }
1083  if (p->g->sets != NULL && p->g->setbits != NULL)
1084  (void) memset((char *)p->g->setbits + (nbytes - css),
1085  0, css);
1086  else {
1087  no = 0;
1088  SETERROR(MY_REG_ESPACE);
1089  /* caller's responsibility not to do set ops */
1090  }
1091  }
1092 
1093  assert(p->g->sets != NULL); /* xxx */
1094  cs = &p->g->sets[no];
1095  cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1096  cs->mask = 1 << ((no) % CHAR_BIT);
1097  cs->hash = 0;
1098  cs->smultis = 0;
1099  cs->multis = NULL;
1100 
1101  return(cs);
1102 }
1103 
1104 /*
1105  - freeset - free a now-unused set
1106  == static void freeset(register struct parse *p, register cset *cs);
1107  */
1108 static void
1109 freeset(p, cs)
1110 register struct parse *p;
1111 register cset *cs;
1112 {
1113  register size_t i;
1114  register cset *top = &p->g->sets[p->g->ncsets];
1115  register size_t css = (size_t)p->g->csetsize;
1116 
1117  for (i = 0; i < css; i++)
1118  CHsub(cs, i);
1119  if (cs == top-1) /* recover only the easy case */
1120  p->g->ncsets--;
1121 }
1122 
1123 /*
1124  - freezeset - final processing on a set of characters
1125  == static int freezeset(register struct parse *p, register cset *cs);
1126  *
1127  * The main task here is merging identical sets. This is usually a waste
1128  * of time (although the hash code minimizes the overhead), but can win
1129  * big if MY_REG_ICASE is being used. MY_REG_ICASE, by the way, is why the hash
1130  * is done using addition rather than xor -- all ASCII [aA] sets xor to
1131  * the same value!
1132  */
1133 static int /* set number */
1134 freezeset(p, cs)
1135 register struct parse *p;
1136 register cset *cs;
1137 {
1138  register uch h = cs->hash;
1139  register size_t i;
1140  register cset *top = &p->g->sets[p->g->ncsets];
1141  register cset *cs2;
1142  register size_t css = (size_t)p->g->csetsize;
1143 
1144  /* look for an earlier one which is the same */
1145  for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1146  if (cs2->hash == h && cs2 != cs) {
1147  /* maybe */
1148  for (i = 0; i < css; i++)
1149  if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1150  break; /* no */
1151  if (i == css)
1152  break; /* yes */
1153  }
1154 
1155  if (cs2 < top) { /* found one */
1156  freeset(p, cs);
1157  cs = cs2;
1158  }
1159 
1160  return((int)(cs - p->g->sets));
1161 }
1162 
1163 /*
1164  - firstch - return first character in a set (which must have at least one)
1165  == static int firstch(register struct parse *p, register cset *cs);
1166  */
1167 static int /* character; there is no "none" value */
1168 firstch(p, cs)
1169 register struct parse *p;
1170 register cset *cs;
1171 {
1172  register size_t i;
1173  register size_t css = (size_t)p->g->csetsize;
1174 
1175  for (i = 0; i < css; i++)
1176  if (CHIN(cs, i))
1177  return((char)i);
1178  assert(never);
1179  return(0); /* arbitrary */
1180 }
1181 
1182 /*
1183  - nch - number of characters in a set
1184  == static int nch(register struct parse *p, register cset *cs);
1185  */
1186 static int
1187 nch(p, cs)
1188 register struct parse *p;
1189 register cset *cs;
1190 {
1191  register size_t i;
1192  register size_t css = (size_t)p->g->csetsize;
1193  register int n = 0;
1194 
1195  for (i = 0; i < css; i++)
1196  if (CHIN(cs, i))
1197  n++;
1198  return(n);
1199 }
1200 
1201 #ifdef USE_ORIG_REGEX_CODE
1202 /*
1203  - mcadd - add a collating element to a cset
1204  == static void mcadd(register struct parse *p, register cset *cs, \
1205  == register char *cp);
1206  */
1207 static void
1208 mcadd(p, cs, cp)
1209 register struct parse *p;
1210 register cset *cs;
1211 register char *cp;
1212 {
1213  register size_t oldend = cs->smultis;
1214 
1215  cs->smultis += strlen(cp) + 1;
1216  if (cs->multis == NULL)
1217  cs->multis = malloc(cs->smultis);
1218  else
1219  cs->multis = realloc(cs->multis, cs->smultis);
1220  if (cs->multis == NULL) {
1221  SETERROR(MY_REG_ESPACE);
1222  return;
1223  }
1224 
1225  (void) strcpy(cs->multis + oldend - 1, cp);
1226  cs->multis[cs->smultis - 1] = '\0';
1227 }
1228 #endif
1229 
1230 /*
1231  - mcinvert - invert the list of collating elements in a cset
1232  == static void mcinvert(register struct parse *p, register cset *cs);
1233  *
1234  * This would have to know the set of possibilities. Implementation
1235  * is deferred.
1236  */
1237 static void
1238 mcinvert(p, cs)
1239  register struct parse *p __attribute__((unused));
1240  register cset *cs __attribute__((unused));
1241 {
1242  assert(cs->multis == NULL); /* xxx */
1243 }
1244 
1245 /*
1246  - mccase - add case counterparts of the list of collating elements in a cset
1247  == static void mccase(register struct parse *p, register cset *cs);
1248  *
1249  * This would have to know the set of possibilities. Implementation
1250  * is deferred.
1251  */
1252 static void
1253 mccase(p, cs)
1254 register struct parse *p __attribute__((unused));
1255 register cset *cs __attribute__((unused));
1256 {
1257  assert(cs->multis == NULL); /* xxx */
1258 }
1259 
1260 /*
1261  - isinsets - is this character in any sets?
1262  == static int isinsets(register struct re_guts *g, int c);
1263  */
1264 static int /* predicate */
1265 isinsets(g, c)
1266 register struct re_guts *g;
1267 int c;
1268 {
1269  register uch *col;
1270  register int i;
1271  register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1272  register unsigned uc = (unsigned char)c;
1273 
1274  for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1275  if (col[uc] != 0)
1276  return(1);
1277  return(0);
1278 }
1279 
1280 /*
1281  - samesets - are these two characters in exactly the same sets?
1282  == static int samesets(register struct re_guts *g, int c1, int c2);
1283  */
1284 static int /* predicate */
1285 samesets(g, c1, c2)
1286 register struct re_guts *g;
1287 int c1;
1288 int c2;
1289 {
1290  register uch *col;
1291  register int i;
1292  register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1293  register unsigned uc1 = (unsigned char)c1;
1294  register unsigned uc2 = (unsigned char)c2;
1295 
1296  for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1297  if (col[uc1] != col[uc2])
1298  return(0);
1299  return(1);
1300 }
1301 
1302 /*
1303  - categorize - sort out character categories
1304  == static void categorize(struct parse *p, register struct re_guts *g);
1305  */
1306 static void
1307 categorize(p, g)
1308 struct parse *p;
1309 register struct re_guts *g;
1310 {
1311  register cat_t *cats = g->categories;
1312  register int c;
1313  register int c2;
1314  register cat_t cat;
1315 
1316  /* avoid making error situations worse */
1317  if (p->error != 0)
1318  return;
1319 
1320  for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1321  if (cats[c] == 0 && isinsets(g, c)) {
1322  cat = g->ncategories++;
1323  cats[c] = cat;
1324  for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1325  if (cats[c2] == 0 && samesets(g, c, c2))
1326  cats[c2] = cat;
1327  }
1328 }
1329 
1330 /*
1331  - dupl - emit a duplicate of a bunch of sops
1332  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1333  */
1334 static sopno /* start of duplicate */
1335 dupl(p, start, finish)
1336 register struct parse *p;
1337 sopno start; /* from here */
1338 sopno finish; /* to this less one */
1339 {
1340  register sopno ret = HERE();
1341  register sopno len = finish - start;
1342 
1343  assert(finish >= start);
1344  if (len == 0)
1345  return(ret);
1346  enlarge(p, p->ssize + len); /* this many unexpected additions */
1347  assert(p->ssize >= p->slen + len);
1348  (void) memcpy((char *)(p->strip + p->slen),
1349  (char *)(p->strip + start), (size_t)len*sizeof(sop));
1350  p->slen += len;
1351  return(ret);
1352 }
1353 
1354 /*
1355  - doemit - emit a strip operator
1356  == static void doemit(register struct parse *p, sop op, size_t opnd);
1357  *
1358  * It might seem better to implement this as a macro with a function as
1359  * hard-case backup, but it's just too big and messy unless there are
1360  * some changes to the data structures. Maybe later.
1361  */
1362 static void
1363 doemit(p, op, opnd)
1364 register struct parse *p;
1365 sop op;
1366 size_t opnd;
1367 {
1368  /* avoid making error situations worse */
1369  if (p->error != 0)
1370  return;
1371 
1372  /* deal with oversize operands ("can't happen", more or less) */
1373  assert(opnd < 1<<OPSHIFT);
1374 
1375  /* deal with undersized strip */
1376  if (p->slen >= p->ssize)
1377  enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1378  assert(p->slen < p->ssize);
1379 
1380  /* finally, it's all reduced to the easy case */
1381  p->strip[p->slen++] = SOP(op, opnd);
1382 }
1383 
1384 /*
1385  - doinsert - insert a sop into the strip
1386  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1387  */
1388 static void
1389 doinsert(p, op, opnd, pos)
1390 register struct parse *p;
1391 sop op;
1392 size_t opnd;
1393 sopno pos;
1394 {
1395  register sopno sn;
1396  register sop s;
1397  register int i;
1398 
1399  /* avoid making error situations worse */
1400  if (p->error != 0)
1401  return;
1402 
1403  sn = HERE();
1404  EMIT(op, opnd); /* do checks, ensure space */
1405  assert(HERE() == sn+1);
1406  s = p->strip[sn];
1407 
1408  /* adjust paren pointers */
1409  assert(pos > 0);
1410  for (i = 1; i < NPAREN; i++) {
1411  if (p->pbegin[i] >= pos) {
1412  p->pbegin[i]++;
1413  }
1414  if (p->pend[i] >= pos) {
1415  p->pend[i]++;
1416  }
1417  }
1418  {
1419  int length=(HERE()-pos-1)*sizeof(sop);
1420  bmove_upp((uchar *) &p->strip[pos+1]+length,
1421  (uchar *) &p->strip[pos]+length,
1422  length);
1423  }
1424 #ifdef OLD_CODE
1425  memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1426  (HERE()-pos-1)*sizeof(sop));
1427 #endif
1428  p->strip[pos] = s;
1429 }
1430 
1431 /*
1432  - dofwd - complete a forward reference
1433  == static void dofwd(register struct parse *p, sopno pos, sop value);
1434  */
1435 static void
1436 dofwd(p, pos, value)
1437 register struct parse *p;
1438 register sopno pos;
1439 sop value;
1440 {
1441  /* avoid making error situations worse */
1442  if (p->error != 0)
1443  return;
1444 
1445  assert(value < 1<<OPSHIFT);
1446  p->strip[pos] = OP(p->strip[pos]) | value;
1447 }
1448 
1449 /*
1450  - enlarge - enlarge the strip
1451  == static void enlarge(register struct parse *p, sopno size);
1452  */
1453 static void
1454 enlarge(p, size)
1455 register struct parse *p;
1456 register sopno size;
1457 {
1458  register sop *sp;
1459 
1460  if (p->ssize >= size)
1461  return;
1462 
1463  sp = (sop *)realloc(p->strip, size*sizeof(sop));
1464  if (sp == NULL) {
1465  SETERROR(MY_REG_ESPACE);
1466  return;
1467  }
1468  p->strip = sp;
1469  p->ssize = size;
1470 }
1471 
1472 /*
1473  - stripsnug - compact the strip
1474  == static void stripsnug(register struct parse *p, register struct re_guts *g);
1475  */
1476 static void
1477 stripsnug(p, g)
1478 register struct parse *p;
1479 register struct re_guts *g;
1480 {
1481  g->nstates = p->slen;
1482  g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1483  if (g->strip == NULL) {
1484  SETERROR(MY_REG_ESPACE);
1485  g->strip = p->strip;
1486  }
1487 }
1488 
1489 /*
1490  - findmust - fill in must and mlen with longest mandatory literal string
1491  == static void findmust(register struct parse *p, register struct re_guts *g);
1492  *
1493  * This algorithm could do fancy things like analyzing the operands of |
1494  * for common subsequences. Someday. This code is simple and finds most
1495  * of the interesting cases.
1496  *
1497  * Note that must and mlen got initialized during setup.
1498  */
1499 static void
1500 findmust(p, g)
1501 struct parse *p;
1502 register struct re_guts *g;
1503 {
1504  register sop *scan;
1505  sop *UNINIT_VAR(start);
1506  register sop *UNINIT_VAR(newstart);
1507  register sopno newlen;
1508  register sop s;
1509  register char *cp;
1510  register sopno i;
1511 
1512  /* avoid making error situations worse */
1513  if (p->error != 0)
1514  return;
1515 
1516  /* find the longest OCHAR sequence in strip */
1517  newlen = 0;
1518  scan = g->strip + 1;
1519  do {
1520  s = *scan++;
1521  switch (OP(s)) {
1522  case OCHAR: /* sequence member */
1523  if (newlen == 0) /* new sequence */
1524  newstart = scan - 1;
1525  newlen++;
1526  break;
1527  case OPLUS_: /* things that don't break one */
1528  case OLPAREN:
1529  case ORPAREN:
1530  break;
1531  case OQUEST_: /* things that must be skipped */
1532  case OCH_:
1533  scan--;
1534  do {
1535  scan += OPND(s);
1536  s = *scan;
1537  /* assert() interferes w debug printouts */
1538  if (OP(s) != O_QUEST && OP(s) != O_CH &&
1539  OP(s) != OOR2) {
1540  g->iflags |= BAD;
1541  return;
1542  }
1543  } while (OP(s) != O_QUEST && OP(s) != O_CH);
1544  /* fallthrough */
1545  default: /* things that break a sequence */
1546  if (newlen > g->mlen) { /* ends one */
1547  start = newstart;
1548  g->mlen = newlen;
1549  }
1550  newlen = 0;
1551  break;
1552  }
1553  } while (OP(s) != OEND);
1554 
1555  if (g->mlen == 0) /* there isn't one */
1556  return;
1557 
1558  /* turn it into a character string */
1559  g->must = malloc((size_t)g->mlen + 1);
1560  if (g->must == NULL) { /* argh; just forget it */
1561  g->mlen = 0;
1562  return;
1563  }
1564  cp = g->must;
1565  scan = start;
1566  for (i = g->mlen; i > 0; i--) {
1567  while (OP(s = *scan++) != OCHAR)
1568  continue;
1569  assert(cp < g->must + g->mlen);
1570  *cp++ = (char)OPND(s);
1571  }
1572  assert(cp == g->must + g->mlen);
1573  *cp++ = '\0'; /* just on general principles */
1574 }
1575 
1576 /*
1577  - pluscount - count + nesting
1578  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1579  */
1580 static sopno /* nesting depth */
1581 pluscount(p, g)
1582 struct parse *p;
1583 register struct re_guts *g;
1584 {
1585  register sop *scan;
1586  register sop s;
1587  register sopno plusnest = 0;
1588  register sopno maxnest = 0;
1589 
1590  if (p->error != 0)
1591  return(0); /* there may not be an OEND */
1592 
1593  scan = g->strip + 1;
1594  do {
1595  s = *scan++;
1596  switch (OP(s)) {
1597  case OPLUS_:
1598  plusnest++;
1599  break;
1600  case O_PLUS:
1601  if (plusnest > maxnest)
1602  maxnest = plusnest;
1603  plusnest--;
1604  break;
1605  }
1606  } while (OP(s) != OEND);
1607  if (plusnest != 0)
1608  g->iflags |= BAD;
1609  return(maxnest);
1610 }