MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
replace.c
1 /*
2  Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or
5  modify it under the terms of the GNU General Public License
6  as published by the Free Software Foundation; version 2 of
7  the License.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with this program; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17  02110-1301 USA */
18 
19 /*
20  Replace strings in textfile
21 
22  This program replaces strings in files or from stdin to stdout.
23  It accepts a list of from-string/to-string pairs and replaces
24  each occurrence of a from-string with the corresponding to-string.
25  The first occurrence of a found string is matched. If there is more
26  than one possibility for the string to replace, longer matches
27  are preferred before shorter matches.
28 
29  Special characters in from string:
30  \^ Match start of line.
31  \$ Match end of line.
32  \b Match space-character, start of line or end of line.
33  For end \b the next replace starts locking at the end space-character.
34  An \b alone or in a string matches only a space-character.
35  \r, \t, \v as in C.
36  The programs make a DFA-state-machine of the strings and the speed isn't
37  dependent on the count of replace-strings (only of the number of replaces).
38  A line is assumed ending with \n or \0.
39  There are no limit exept memory on length of strings.
40 
41  Written by Monty.
42  fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44 
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50 
51 #define PC_MALLOC 256 /* Bytes for pointers */
52 #define PS_MALLOC 512 /* Bytes for data */
53 
54 typedef struct st_pointer_array { /* when using array-strings */
55  TYPELIB typelib; /* Pointer to strings */
56  uchar *str; /* Strings is here */
57  uint8 *flag; /* Flag about each var. */
58  uint array_allocs,max_count,length,max_length;
60 
61 #define SPACE_CHAR 256
62 #define START_OF_LINE 257
63 #define END_OF_LINE 258
64 #define LAST_CHAR_CODE 259
65 
66 typedef struct st_replace {
67  my_bool found;
68  struct st_replace *next[256];
69 } REPLACE;
70 
71 typedef struct st_replace_found {
72  my_bool found;
73  char *replace_string;
74  uint to_offset;
75  int from_offset;
77 
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81 
82  /* functions defined in this file */
83 
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86  POINTER_ARRAY *from_array,
87  POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93  char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95  char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99 
100 static int silent=0,verbose=0,updated=0;
101 
102  /* The main program */
103 
104 int main(int argc, char *argv[])
105 {
106  int i,error;
107  char word_end_chars[256],*pos;
108  POINTER_ARRAY from,to;
109  REPLACE *replace;
110  MY_INIT(argv[0]);
111 
112  if (static_get_options(&argc,&argv))
113  exit(1);
114  if (get_replace_strings(&argc,&argv,&from,&to))
115  exit(1);
116 
117  for (i=1,pos=word_end_chars ; i < 256 ; i++)
118  if (my_isspace(&my_charset_latin1,i))
119  *pos++= (char) i;
120  *pos=0;
121  if (!(replace=init_replace((char**) from.typelib.type_names,
122  (char**) to.typelib.type_names,
123  (uint) from.typelib.count,word_end_chars)))
124  exit(1);
125  free_pointer_array(&from);
126  free_pointer_array(&to);
127  if (initialize_buffer())
128  return 1;
129 
130  error=0;
131  if (argc == 0)
132  error=convert_pipe(replace,stdin,stdout);
133  else
134  {
135  while (argc--)
136  {
137  error=convert_file(replace,*(argv++));
138  }
139  }
140  free_buffer();
141  my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
142  exit(error ? 2 : 0);
143  return 0; /* No compiler warning */
144 } /* main */
145 
146 
147  /* reads options */
148  /* Initiates DEBUG - but no debugging here ! */
149 
150 static int static_get_options(argc,argv)
151 register int *argc;
152 register char **argv[];
153 {
154  int help,version;
155  char *pos;
156 
157  silent=verbose=help=0;
158 
159  while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
160  while (*++pos)
161  {
162  version=0;
163  switch((*pos)) {
164  case 's':
165  silent=1;
166  break;
167  case 'v':
168  verbose=1;
169  break;
170  case '#':
171  DBUG_PUSH (++pos);
172  pos= (char*) " "; /* Skip rest of arguments */
173  break;
174  case 'V':
175  version=1;
176  case 'I':
177  case '?':
178  help=1; /* Help text written */
179  printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
180  MACHINE_TYPE);
181  if (version)
182  break;
183  puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
184  puts("This program replaces strings in files or from stdin to stdout.\n"
185  "It accepts a list of from-string/to-string pairs and replaces\n"
186  "each occurrence of a from-string with the corresponding to-string.\n"
187  "The first occurrence of a found string is matched. If there is\n"
188  "more than one possibility for the string to replace, longer\n"
189  "matches are preferred before shorter matches.\n\n"
190  "A from-string can contain these special characters:\n"
191  " \\^ Match start of line.\n"
192  " \\$ Match end of line.\n"
193  " \\b Match space-character, start of line or end of line.\n"
194  " For a end \\b the next replace starts locking at the end\n"
195  " space-character. A \\b alone in a string matches only a\n"
196  " space-character.\n");
197  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
198  puts("or");
199  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
200  puts("");
201  puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
202  break;
203  default:
204  fprintf(stderr,"illegal option: -%c\n",*pos);
205  break;
206  }
207  }
208  }
209  if (*argc == 0)
210  {
211  if (!help)
212  my_message(0,"No replace options given",MYF(ME_BELL));
213  exit(0); /* Don't use as pipe */
214  }
215  return(0);
216 } /* static_get_options */
217 
218 
219 static int get_replace_strings(argc,argv,from_array,to_array)
220 register int *argc;
221 register char **argv[];
222 POINTER_ARRAY *from_array,*to_array;
223 {
224  char *pos;
225 
226  memset(from_array, 0, sizeof(from_array[0]));
227  memset(to_array, 0, sizeof(to_array[0]));
228  while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
229  {
230  insert_pointer_name(from_array,pos);
231  (*argc)--;
232  (*argv)++;
233  if (!*argc || !strcmp(**argv,"--"))
234  {
235  my_message(0,"No to-string for last from-string",MYF(ME_BELL));
236  return 1;
237  }
238  insert_pointer_name(to_array,**argv);
239  (*argc)--;
240  (*argv)++;
241  }
242  if (*argc)
243  { /* Skip "--" argument */
244  (*argc)--;
245  (*argv)++;
246  }
247  return 0;
248 }
249 
250 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
251 {
252  uint i,length,old_count;
253  uchar *new_pos;
254  const char **new_array;
255  DBUG_ENTER("insert_pointer_name");
256 
257  if (! pa->typelib.count)
258  {
259  if (!(pa->typelib.type_names=(const char **)
260  my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
261  (sizeof(char *)+sizeof(*pa->flag))*
262  (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
263  DBUG_RETURN(-1);
264  if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
265  MYF(MY_WME))))
266  {
267  my_free(pa->typelib.type_names);
268  DBUG_RETURN (-1);
269  }
270  pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
271  sizeof(*pa->flag));
272  pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
273  pa->length=0;
274  pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
275  pa->array_allocs=1;
276  }
277  length=(uint) strlen(name)+1;
278  if (pa->length+length >= pa->max_length)
279  {
280  pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
281  pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
282  if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
283  (uint) pa->max_length,
284  MYF(MY_WME))))
285  DBUG_RETURN(1);
286  if (new_pos != pa->str)
287  {
288  my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
289  for (i=0 ; i < pa->typelib.count ; i++)
290  pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
291  char*);
292  pa->str=new_pos;
293  }
294  }
295  if (pa->typelib.count >= pa->max_count-1)
296  {
297  int len;
298  pa->array_allocs++;
299  len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
300  if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
301  (uint) len/
302  (sizeof(uchar*)+sizeof(*pa->flag))*
303  (sizeof(uchar*)+sizeof(*pa->flag)),
304  MYF(MY_WME))))
305  DBUG_RETURN(1);
306  pa->typelib.type_names=new_array;
307  old_count=pa->max_count;
308  pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
309  pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
310  memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
311  old_count*sizeof(*pa->flag));
312  }
313  pa->flag[pa->typelib.count]=0; /* Reset flag */
314  pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
315  pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
316  (void) strmov((char*) pa->str + pa->length, name);
317  pa->length+=length;
318  DBUG_RETURN(0);
319 } /* insert_pointer_name */
320 
321 
322  /* free pointer array */
323 
324 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
325 {
326  if (pa->typelib.count)
327  {
328  pa->typelib.count=0;
329  my_free(pa->typelib.type_names);
330  pa->typelib.type_names=0;
331  my_free(pa->str);
332  }
333  return;
334 } /* free_pointer_array */
335 
336 
337  /* Code for replace rutines */
338 
339 #define SET_MALLOC_HUNC 64
340 
341 typedef struct st_rep_set {
342  uint *bits; /* Pointer to used sets */
343  short next[LAST_CHAR_CODE]; /* Pointer to next sets */
344  uint found_len; /* Best match to date */
345  int found_offset;
346  uint table_offset;
347  uint size_of_bits; /* For convinience */
348 } REP_SET;
349 
350 typedef struct st_rep_sets {
351  uint count; /* Number of sets */
352  uint extra; /* Extra sets in buffer */
353  uint invisible; /* Sets not chown */
354  uint size_of_bits;
355  REP_SET *set,*set_buffer;
356  uint *bit_buffer;
357 } REP_SETS;
358 
359 typedef struct st_found_set {
360  uint table_offset;
361  int found_offset;
362 } FOUND_SET;
363 
364 typedef struct st_follow {
365  int chr;
366  uint table_offset;
367  uint len;
368 } FOLLOWS;
369 
370 
371 static int init_sets(REP_SETS *sets,uint states);
372 static REP_SET *make_new_set(REP_SETS *sets);
373 static void make_sets_invisible(REP_SETS *sets);
374 static void free_last_set(REP_SETS *sets);
375 static void free_sets(REP_SETS *sets);
376 static void internal_set_bit(REP_SET *set, uint bit);
377 static void internal_clear_bit(REP_SET *set, uint bit);
378 static void or_bits(REP_SET *to,REP_SET *from);
379 static void copy_bits(REP_SET *to,REP_SET *from);
380 static int cmp_bits(REP_SET *set1,REP_SET *set2);
381 static int get_next_bit(REP_SET *set,uint lastpos);
382 static short find_set(REP_SETS *sets,REP_SET *find);
383 static short find_found(FOUND_SET *found_set,uint table_offset,
384  int found_offset);
385 static uint start_at_word(char * pos);
386 static uint end_of_word(char * pos);
387 static uint replace_len(char * pos);
388 
389 static uint found_sets=0;
390 
391 
392  /* Init a replace structure for further calls */
393 
394 static REPLACE *init_replace(char * *from, char * *to,uint count,
395  char * word_end_chars)
396 {
397  uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
398  int used_sets,chr;
399  short default_state;
400  char used_chars[LAST_CHAR_CODE],is_word_end[256];
401  char * pos, *to_pos, **to_array;
402  REP_SETS sets;
403  REP_SET *set,*start_states,*word_states,*new_set;
404  FOLLOWS *follow,*follow_ptr;
405  REPLACE *replace;
406  FOUND_SET *found_set;
407  REPLACE_STRING *rep_str;
408  DBUG_ENTER("init_replace");
409 
410  /* Count number of states */
411  for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
412  {
413  len=replace_len(from[i]);
414  if (!len)
415  {
416  errno=EINVAL;
417  my_message(0,"No to-string for last from-string",MYF(ME_BELL));
418  DBUG_RETURN(0);
419  }
420  states+=len+1;
421  result_len+=(uint) strlen(to[i])+1;
422  if (len > max_length)
423  max_length=len;
424  }
425  memset(is_word_end, 0, sizeof(is_word_end));
426  for (i=0 ; word_end_chars[i] ; i++)
427  is_word_end[(uchar) word_end_chars[i]]=1;
428 
429  if (init_sets(&sets,states))
430  DBUG_RETURN(0);
431  found_sets=0;
432  if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
433  MYF(MY_WME))))
434  {
435  free_sets(&sets);
436  DBUG_RETURN(0);
437  }
438  (void) make_new_set(&sets); /* Set starting set */
439  make_sets_invisible(&sets); /* Hide previus sets */
440  used_sets=-1;
441  word_states=make_new_set(&sets); /* Start of new word */
442  start_states=make_new_set(&sets); /* This is first state */
443  if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
444  {
445  free_sets(&sets);
446  my_free(found_set);
447  DBUG_RETURN(0);
448  }
449 
450  /* Init follow_ptr[] */
451  for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
452  {
453  if (from[i][0] == '\\' && from[i][1] == '^')
454  {
455  internal_set_bit(start_states,states+1);
456  if (!from[i][2])
457  {
458  start_states->table_offset=i;
459  start_states->found_offset=1;
460  }
461  }
462  else if (from[i][0] == '\\' && from[i][1] == '$')
463  {
464  internal_set_bit(start_states,states);
465  internal_set_bit(word_states,states);
466  if (!from[i][2] && start_states->table_offset == (uint) ~0)
467  {
468  start_states->table_offset=i;
469  start_states->found_offset=0;
470  }
471  }
472  else
473  {
474  internal_set_bit(word_states,states);
475  if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
476  internal_set_bit(start_states,states+1);
477  else
478  internal_set_bit(start_states,states);
479  }
480  for (pos=from[i], len=0; *pos ; pos++)
481  {
482  if (*pos == '\\' && *(pos+1))
483  {
484  pos++;
485  switch (*pos) {
486  case 'b':
487  follow_ptr->chr = SPACE_CHAR;
488  break;
489  case '^':
490  follow_ptr->chr = START_OF_LINE;
491  break;
492  case '$':
493  follow_ptr->chr = END_OF_LINE;
494  break;
495  case 'r':
496  follow_ptr->chr = '\r';
497  break;
498  case 't':
499  follow_ptr->chr = '\t';
500  break;
501  case 'v':
502  follow_ptr->chr = '\v';
503  break;
504  default:
505  follow_ptr->chr = (uchar) *pos;
506  break;
507  }
508  }
509  else
510  follow_ptr->chr= (uchar) *pos;
511  follow_ptr->table_offset=i;
512  follow_ptr->len= ++len;
513  follow_ptr++;
514  }
515  follow_ptr->chr=0;
516  follow_ptr->table_offset=i;
517  follow_ptr->len=len;
518  follow_ptr++;
519  states+=(uint) len+1;
520  }
521 
522 
523  for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
524  {
525  set=sets.set+set_nr;
526  default_state= 0; /* Start from beginning */
527 
528  /* If end of found-string not found or start-set with current set */
529 
530  for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
531  {
532  if (!follow[i].chr)
533  {
534  if (! default_state)
535  default_state= find_found(found_set,set->table_offset,
536  set->found_offset+1);
537  }
538  }
539  copy_bits(sets.set+used_sets,set); /* Save set for changes */
540  if (!default_state)
541  or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
542 
543  /* Find all chars that follows current sets */
544  memset(used_chars, 0, sizeof(used_chars));
545  for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
546  {
547  used_chars[follow[i].chr]=1;
548  if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
549  follow[i].len > 1) || follow[i].chr == END_OF_LINE)
550  used_chars[0]=1;
551  }
552 
553  /* Mark word_chars used if \b is in state */
554  if (used_chars[SPACE_CHAR])
555  for (pos= word_end_chars ; *pos ; pos++)
556  used_chars[(int) (uchar) *pos] = 1;
557 
558  /* Handle other used characters */
559  for (chr= 0 ; chr < 256 ; chr++)
560  {
561  if (! used_chars[chr])
562  set->next[chr]= (short) (chr ? default_state : -1);
563  else
564  {
565  new_set=make_new_set(&sets);
566  set=sets.set+set_nr; /* if realloc */
567  new_set->table_offset=set->table_offset;
568  new_set->found_len=set->found_len;
569  new_set->found_offset=set->found_offset+1;
570  found_end=0;
571 
572  for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
573  {
574  if (!follow[i].chr || follow[i].chr == chr ||
575  (follow[i].chr == SPACE_CHAR &&
576  (is_word_end[chr] ||
577  (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
578  (follow[i].chr == END_OF_LINE && ! chr))
579  {
580  if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
581  follow[i].len > found_end)
582  found_end=follow[i].len;
583  if (chr && follow[i].chr)
584  internal_set_bit(new_set,i+1); /* To next set */
585  else
586  internal_set_bit(new_set,i);
587  }
588  }
589  if (found_end)
590  {
591  new_set->found_len=0; /* Set for testing if first */
592  bits_set=0;
593  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
594  {
595  if ((follow[i].chr == SPACE_CHAR ||
596  follow[i].chr == END_OF_LINE) && ! chr)
597  bit_nr=i+1;
598  else
599  bit_nr=i;
600  if (follow[bit_nr-1].len < found_end ||
601  (new_set->found_len &&
602  (chr == 0 || !follow[bit_nr].chr)))
603  internal_clear_bit(new_set,i);
604  else
605  {
606  if (chr == 0 || !follow[bit_nr].chr)
607  { /* best match */
608  new_set->table_offset=follow[bit_nr].table_offset;
609  if (chr || (follow[i].chr == SPACE_CHAR ||
610  follow[i].chr == END_OF_LINE))
611  new_set->found_offset=found_end; /* New match */
612  new_set->found_len=found_end;
613  }
614  bits_set++;
615  }
616  }
617  if (bits_set == 1)
618  {
619  set->next[chr] = find_found(found_set,
620  new_set->table_offset,
621  new_set->found_offset);
622  free_last_set(&sets);
623  }
624  else
625  set->next[chr] = find_set(&sets,new_set);
626  }
627  else
628  set->next[chr] = find_set(&sets,new_set);
629  }
630  }
631  }
632 
633  /* Alloc replace structure for the replace-state-machine */
634 
635  if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
636  sizeof(REPLACE_STRING)*(found_sets+1)+
637  sizeof(char *)*count+result_len,
638  MYF(MY_WME | MY_ZEROFILL))))
639  {
640  rep_str=(REPLACE_STRING*) (replace+sets.count);
641  to_array=(char **) (rep_str+found_sets+1);
642  to_pos=(char *) (to_array+count);
643  for (i=0 ; i < count ; i++)
644  {
645  to_array[i]=to_pos;
646  to_pos=strmov(to_pos,to[i])+1;
647  }
648  rep_str[0].found=1;
649  rep_str[0].replace_string=0;
650  for (i=1 ; i <= found_sets ; i++)
651  {
652  pos=from[found_set[i-1].table_offset];
653  rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
654  rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
655  rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
656  rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
657  end_of_word(pos);
658  }
659  for (i=0 ; i < sets.count ; i++)
660  {
661  for (j=0 ; j < 256 ; j++)
662  if (sets.set[i].next[j] >= 0)
663  replace[i].next[j]=replace+sets.set[i].next[j];
664  else
665  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
666  }
667  }
668  my_free(follow);
669  free_sets(&sets);
670  my_free(found_set);
671  DBUG_PRINT("exit",("Replace table has %d states",sets.count));
672  DBUG_RETURN(replace);
673 }
674 
675 
676 static int init_sets(REP_SETS *sets,uint states)
677 {
678  memset(sets, 0, sizeof(*sets));
679  sets->size_of_bits=((states+7)/8);
680  if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
681  MYF(MY_WME))))
682  return 1;
683  if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
684  SET_MALLOC_HUNC,MYF(MY_WME))))
685  {
686  my_free(sets->set);
687  return 1;
688  }
689  return 0;
690 }
691 
692  /* Make help sets invisible for nicer codeing */
693 
694 static void make_sets_invisible(REP_SETS *sets)
695 {
696  sets->invisible=sets->count;
697  sets->set+=sets->count;
698  sets->count=0;
699 }
700 
701 static REP_SET *make_new_set(REP_SETS *sets)
702 {
703  uint i,count,*bit_buffer;
704  REP_SET *set;
705  if (sets->extra)
706  {
707  sets->extra--;
708  set=sets->set+ sets->count++;
709  memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
710  memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
711  set->found_offset=0;
712  set->found_len=0;
713  set->table_offset= (uint) ~0;
714  set->size_of_bits=sets->size_of_bits;
715  return set;
716  }
717  count=sets->count+sets->invisible+SET_MALLOC_HUNC;
718  if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
719  sizeof(REP_SET)*count,
720  MYF(MY_WME))))
721  return 0;
722  sets->set_buffer=set;
723  sets->set=set+sets->invisible;
724  if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
725  (sizeof(uint)*sets->size_of_bits)*count,
726  MYF(MY_WME))))
727  return 0;
728  sets->bit_buffer=bit_buffer;
729  for (i=0 ; i < count ; i++)
730  {
731  sets->set_buffer[i].bits=bit_buffer;
732  bit_buffer+=sets->size_of_bits;
733  }
734  sets->extra=SET_MALLOC_HUNC;
735  return make_new_set(sets);
736 }
737 
738 static void free_last_set(REP_SETS *sets)
739 {
740  sets->count--;
741  sets->extra++;
742  return;
743 }
744 
745 static void free_sets(REP_SETS *sets)
746 {
747  my_free(sets->set_buffer);
748  my_free(sets->bit_buffer);
749  return;
750 }
751 
752 static void internal_set_bit(REP_SET *set, uint bit)
753 {
754  set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
755  return;
756 }
757 
758 static void internal_clear_bit(REP_SET *set, uint bit)
759 {
760  set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
761  return;
762 }
763 
764 
765 static void or_bits(REP_SET *to,REP_SET *from)
766 {
767  reg1 uint i;
768  for (i=0 ; i < to->size_of_bits ; i++)
769  to->bits[i]|=from->bits[i];
770  return;
771 }
772 
773 static void copy_bits(REP_SET *to,REP_SET *from)
774 {
775  memcpy((uchar*) to->bits,(uchar*) from->bits,
776  (size_t) (sizeof(uint) * to->size_of_bits));
777 }
778 
779 static int cmp_bits(REP_SET *set1,REP_SET *set2)
780 {
781  return memcmp(set1->bits, set2->bits,
782  sizeof(uint) * set1->size_of_bits);
783 }
784 
785 
786  /* Get next set bit from set. */
787 
788 static int get_next_bit(REP_SET *set,uint lastpos)
789 {
790  uint pos,*start,*end,bits;
791 
792  start=set->bits+ ((lastpos+1) / WORD_BIT);
793  end=set->bits + set->size_of_bits;
794  bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
795 
796  while (! bits && ++start < end)
797  bits=start[0];
798  if (!bits)
799  return 0;
800  pos=(uint) (start-set->bits)*WORD_BIT;
801  while (! (bits & 1))
802  {
803  bits>>=1;
804  pos++;
805  }
806  return pos;
807 }
808 
809  /* find if there is a same set in sets. If there is, use it and
810  free given set, else put in given set in sets and return it's
811  position */
812 
813 static short find_set(REP_SETS *sets,REP_SET *find)
814 {
815  uint i;
816  for (i=0 ; i < sets->count-1 ; i++)
817  {
818  if (!cmp_bits(sets->set+i,find))
819  {
820  free_last_set(sets);
821  return (short) i;
822  }
823  }
824  return (short) i; /* return new position */
825 }
826 
827 
828 /*
829  find if there is a found_set with same table_offset & found_offset
830  If there is return offset to it, else add new offset and return pos.
831  Pos returned is -offset-2 in found_set_structure because it's is
832  saved in set->next and set->next[] >= 0 points to next set and
833  set->next[] == -1 is reserved for end without replaces.
834 */
835 
836 static short find_found(FOUND_SET *found_set,uint table_offset,
837  int found_offset)
838 {
839  int i;
840  for (i=0 ; (uint) i < found_sets ; i++)
841  if (found_set[i].table_offset == table_offset &&
842  found_set[i].found_offset == found_offset)
843  return (short) (-i-2);
844  found_set[i].table_offset=table_offset;
845  found_set[i].found_offset=found_offset;
846  found_sets++;
847  return (short) (-i-2); /* return new position */
848 }
849 
850  /* Return 1 if regexp starts with \b or ends with \b*/
851 
852 static uint start_at_word(char * pos)
853 {
854  return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
855 }
856 
857 static uint end_of_word(char * pos)
858 {
859  char * end=strend(pos);
860  return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
861  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
862  1 : 0;
863 }
864 
865 
866 static uint replace_len(char * str)
867 {
868  uint len=0;
869  while (*str)
870  {
871  if (str[0] == '\\' && str[1])
872  str++;
873  str++;
874  len++;
875  }
876  return len;
877 }
878 
879 
880  /* The actual loop */
881 
882 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
883  char *from)
884 {
885  reg1 REPLACE *rep_pos;
886  reg2 REPLACE_STRING *rep_str;
887  char *to, *end, *pos, *new;
888 
889  end=(to= *start) + *max_length-1;
890  rep_pos=rep+1;
891  for(;;)
892  {
893  while (!rep_pos->found)
894  {
895  rep_pos= rep_pos->next[(uchar) *from];
896  if (to == end)
897  {
898  (*max_length)+=8192;
899  if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
900  return (uint) -1;
901  to=new+(to - *start);
902  end=(*start=new)+ *max_length-1;
903  }
904  *to++= *from++;
905  }
906  if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
907  return (uint) (to - *start)-1;
908  updated=1; /* Some char * is replaced */
909  to-=rep_str->to_offset;
910  for (pos=rep_str->replace_string; *pos ; pos++)
911  {
912  if (to == end)
913  {
914  (*max_length)*=2;
915  if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
916  return (uint) -1;
917  to=new+(to - *start);
918  end=(*start=new)+ *max_length-1;
919  }
920  *to++= *pos;
921  }
922  if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
923  return (uint) (to - *start);
924  rep_pos=rep;
925  }
926 }
927 
928 static char *buffer; /* The buffer itself, grown as needed. */
929 static int bufbytes; /* Number of bytes in the buffer. */
930 static int bufread,my_eof; /* Number of bytes to get with each read(). */
931 static uint bufalloc;
932 static char *out_buff;
933 static uint out_length;
934 
935 static int initialize_buffer()
936 {
937  bufread = 8192;
938  bufalloc = bufread + bufread / 2;
939  if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
940  return 1;
941  bufbytes=my_eof=0;
942  out_length=bufread;
943  if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
944  return(1);
945  return 0;
946 }
947 
948 static void reset_buffer()
949 {
950  bufbytes=my_eof=0;
951 }
952 
953 static void free_buffer()
954 {
955  my_free(buffer);
956  my_free(out_buff);
957 }
958 
959 
960 /*
961  Fill the buffer retaining the last n bytes at the beginning of the
962  newly filled buffer (for backward context). Returns the number of new
963  bytes read from disk.
964 */
965 
966 static int fill_buffer_retaining(fd,n)
967 File fd;
968 int n;
969 {
970  int i;
971 
972  /* See if we need to grow the buffer. */
973  if ((int) bufalloc - n <= bufread)
974  {
975  while ((int) bufalloc - n <= bufread)
976  {
977  bufalloc *= 2;
978  bufread *= 2;
979  }
980  buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
981  if (! buffer)
982  return(-1);
983  }
984 
985  /* Shift stuff down. */
986  bmove(buffer,buffer+bufbytes-n,(uint) n);
987  bufbytes = n;
988 
989  if (my_eof)
990  return 0;
991 
992  /* Read in new stuff. */
993  if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
994  (size_t) bufread, MYF(MY_WME))) < 0)
995  return -1;
996 
997  /* Kludge to pretend every nonempty file ends with a newline. */
998  if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
999  {
1000  my_eof = i = 1;
1001  buffer[bufbytes] = '\n';
1002  }
1003 
1004  bufbytes += i;
1005  return i;
1006 }
1007 
1008  /* Return 0 if convert is ok */
1009  /* Global variable update is set if something was changed */
1010 
1011 static int convert_pipe(rep,in,out)
1012 REPLACE *rep;
1013 FILE *in,*out;
1014 {
1015  int retain,error;
1016  uint length;
1017  char save_char,*end_of_line,*start_of_line;
1018  DBUG_ENTER("convert_pipe");
1019 
1020  updated=retain=0;
1021  reset_buffer();
1022 
1023  while ((error=fill_buffer_retaining(fileno(in),retain)) > 0)
1024  {
1025  end_of_line=buffer ;
1026  buffer[bufbytes]=0; /* Sentinel */
1027  for (;;)
1028  {
1029  start_of_line=end_of_line;
1030  while (end_of_line[0] != '\n' && end_of_line[0])
1031  end_of_line++;
1032  if (end_of_line == buffer+bufbytes)
1033  {
1034  retain= (int) (end_of_line - start_of_line);
1035  break; /* No end of line, read more */
1036  }
1037  save_char=end_of_line[0];
1038  end_of_line[0]=0;
1039  end_of_line++;
1040  if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1041  (uint) -1)
1042  return 1;
1043  if (!my_eof)
1044  out_buff[length++]=save_char; /* Don't write added newline */
1045  if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1046  DBUG_RETURN(1);
1047  }
1048  }
1049  DBUG_RETURN(error);
1050 }
1051 
1052 
1053 static int convert_file(REPLACE *rep, char * name)
1054 {
1055  int error;
1056  FILE *in,*out;
1057  char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1058 #ifdef HAVE_READLINK
1059  char link_name[FN_REFLEN];
1060 #endif
1061  File temp_file;
1062  size_t dir_buff_length;
1063  DBUG_ENTER("convert_file");
1064 
1065  /* check if name is a symlink */
1066 #ifdef HAVE_READLINK
1067  org_name= (!my_disable_symlinks &&
1068  !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1069 #endif
1070  if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1071  DBUG_RETURN(1);
1072  dirname_part(dir_buff, org_name, &dir_buff_length);
1073  if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1074  MYF(MY_WME))) < 0)
1075  {
1076  my_fclose(in,MYF(0));
1077  DBUG_RETURN(1);
1078  }
1079  if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1080  {
1081  my_fclose(in,MYF(0));
1082  DBUG_RETURN(1);
1083  }
1084 
1085  error=convert_pipe(rep,in,out);
1086  my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1087 
1088  if (updated && ! error)
1089  my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1090  else
1091  my_delete(tempname,MYF(MY_WME));
1092  if (!silent && ! error)
1093  {
1094  if (updated)
1095  printf("%s converted\n",name);
1096  else if (verbose)
1097  printf("%s left unchanged\n",name);
1098  }
1099  DBUG_RETURN(error);
1100 }