MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ctype-simple.c
1 /* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 #include <my_global.h>
17 #include "m_string.h"
18 #include "m_ctype.h"
19 #include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
20 #include <errno.h>
21 
22 #include "stdarg.h"
23 
24 /*
25  Returns the number of bytes required for strnxfrm().
26 */
27 
28 size_t my_strnxfrmlen_simple(const CHARSET_INFO *cs, size_t len)
29 {
30  return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
31 }
32 
33 
34 /*
35  Converts a string into its sort key.
36 
37  SYNOPSIS
38  my_strnxfrm_xxx()
39 
40  IMPLEMENTATION
41 
42  The my_strxfrm_xxx() function transforms a string pointed to by
43  'src' with length 'srclen' according to the charset+collation
44  pair 'cs' and copies the result key into 'dest'.
45 
46  Comparing two strings using memcmp() after my_strnxfrm_xxx()
47  is equal to comparing two original strings with my_strnncollsp_xxx().
48 
49  Not more than 'dstlen' bytes are written into 'dst'.
50  To garantee that the whole string is transformed, 'dstlen' must be
51  at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
52  consequent memcmp() may return a non-accurate result.
53 
54  If the source string is too short to fill whole 'dstlen' bytes,
55  then the 'dest' string is padded up to 'dstlen', ensuring that:
56 
57  "a" == "a "
58  "a\0" < "a"
59  "a\0" < "a "
60 
61  my_strnxfrm_simple() is implemented for 8bit charsets and
62  simple collations with one-to-one string->key transformation.
63 
64  See also implementations for various charsets/collations in
65  other ctype-xxx.c files.
66 
67  RETURN
68 
69  Target len 'dstlen'.
70 
71 */
72 
73 
74 size_t
75 my_strnxfrm_simple(const CHARSET_INFO *cs,
76  uchar *dst, size_t dstlen, uint nweights,
77  const uchar *src, size_t srclen, uint flags)
78 {
79  uchar *map= cs->sort_order;
80  uchar *d0= dst;
81  uint frmlen;
82  if ((frmlen= MY_MIN(dstlen, nweights)) > srclen)
83  frmlen= srclen;
84  if (dst != src)
85  {
86  const uchar *end;
87  for (end= src + frmlen; src < end;)
88  *dst++= map[*src++];
89  }
90  else
91  {
92  const uchar *end;
93  for (end= dst + frmlen; dst < end; dst++)
94  *dst= map[(uchar) *dst];
95  }
96  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
97  nweights - frmlen, flags, 0);
98 }
99 
100 
101 int my_strnncoll_simple(const CHARSET_INFO * cs, const uchar *s, size_t slen,
102  const uchar *t, size_t tlen,
103  my_bool t_is_prefix)
104 {
105  size_t len = ( slen > tlen ) ? tlen : slen;
106  uchar *map= cs->sort_order;
107  if (t_is_prefix && slen > tlen)
108  slen=tlen;
109  while (len--)
110  {
111  if (map[*s++] != map[*t++])
112  return ((int) map[s[-1]] - (int) map[t[-1]]);
113  }
114  /*
115  We can't use (slen - tlen) here as the result may be outside of the
116  precision of a signed int
117  */
118  return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
119 }
120 
121 
122 /*
123  Compare strings, discarding end space
124 
125  SYNOPSIS
126  my_strnncollsp_simple()
127  cs character set handler
128  a First string to compare
129  a_length Length of 'a'
130  b Second string to compare
131  b_length Length of 'b'
132  diff_if_only_endspace_difference
133  Set to 1 if the strings should be regarded as different
134  if they only difference in end space
135 
136  IMPLEMENTATION
137  If one string is shorter as the other, then we space extend the other
138  so that the strings have equal length.
139 
140  This will ensure that the following things hold:
141 
142  "a" == "a "
143  "a\0" < "a"
144  "a\0" < "a "
145 
146  RETURN
147  < 0 a < b
148  = 0 a == b
149  > 0 a > b
150 */
151 
152 int my_strnncollsp_simple(const CHARSET_INFO *cs, const uchar *a,
153  size_t a_length, const uchar *b, size_t b_length,
154  my_bool diff_if_only_endspace_difference)
155 {
156  const uchar *map= cs->sort_order, *end;
157  size_t length;
158  int res;
159 
160 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
161  diff_if_only_endspace_difference= 0;
162 #endif
163 
164  end= a + (length= MY_MIN(a_length, b_length));
165  while (a < end)
166  {
167  if (map[*a++] != map[*b++])
168  return ((int) map[a[-1]] - (int) map[b[-1]]);
169  }
170  res= 0;
171  if (a_length != b_length)
172  {
173  int swap= 1;
174  if (diff_if_only_endspace_difference)
175  res= 1; /* Assume 'a' is bigger */
176  /*
177  Check the next not space character of the longer key. If it's < ' ',
178  then it's smaller than the other key.
179  */
180  if (a_length < b_length)
181  {
182  /* put shorter key in s */
183  a_length= b_length;
184  a= b;
185  swap= -1; /* swap sign of result */
186  res= -res;
187  }
188  for (end= a + a_length-length; a < end ; a++)
189  {
190  if (map[*a] != map[' '])
191  return (map[*a] < map[' ']) ? -swap : swap;
192  }
193  }
194  return res;
195 }
196 
197 
198 size_t my_caseup_str_8bit(const CHARSET_INFO *cs,char *str)
199 {
200  register uchar *map= cs->to_upper;
201  char *str_orig= str;
202  while ((*str= (char) map[(uchar) *str]) != 0)
203  str++;
204  return (size_t) (str - str_orig);
205 }
206 
207 
208 size_t my_casedn_str_8bit(const CHARSET_INFO *cs,char *str)
209 {
210  register uchar *map= cs->to_lower;
211  char *str_orig= str;
212  while ((*str= (char) map[(uchar) *str]) != 0)
213  str++;
214  return (size_t) (str - str_orig);
215 }
216 
217 
218 size_t my_caseup_8bit(const CHARSET_INFO *cs, char *src, size_t srclen,
219  char *dst __attribute__((unused)),
220  size_t dstlen __attribute__((unused)))
221 {
222  char *end= src + srclen;
223  register uchar *map= cs->to_upper;
224  DBUG_ASSERT(src == dst && srclen == dstlen);
225  for ( ; src != end ; src++)
226  *src= (char) map[(uchar) *src];
227  return srclen;
228 }
229 
230 
231 size_t my_casedn_8bit(const CHARSET_INFO *cs, char *src, size_t srclen,
232  char *dst __attribute__((unused)),
233  size_t dstlen __attribute__((unused)))
234 {
235  char *end= src + srclen;
236  register uchar *map=cs->to_lower;
237  DBUG_ASSERT(src == dst && srclen == dstlen);
238  for ( ; src != end ; src++)
239  *src= (char) map[(uchar) *src];
240  return srclen;
241 }
242 
243 int my_strcasecmp_8bit(const CHARSET_INFO *cs,const char *s, const char *t)
244 {
245  register uchar *map=cs->to_upper;
246  while (map[(uchar) *s] == map[(uchar) *t++])
247  if (!*s++) return 0;
248  return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
249 }
250 
251 
252 int my_mb_wc_8bit(const CHARSET_INFO *cs,my_wc_t *wc,
253  const uchar *str,
254  const uchar *end __attribute__((unused)))
255 {
256  if (str >= end)
257  return MY_CS_TOOSMALL;
258 
259  *wc=cs->tab_to_uni[*str];
260  return (!wc[0] && str[0]) ? -1 : 1;
261 }
262 
263 int my_wc_mb_8bit(const CHARSET_INFO *cs,my_wc_t wc,
264  uchar *str,
265  uchar *end)
266 {
267  MY_UNI_IDX *idx;
268 
269  if (str >= end)
270  return MY_CS_TOOSMALL;
271 
272  for (idx=cs->tab_from_uni; idx->tab ; idx++)
273  {
274  if (idx->from <= wc && idx->to >= wc)
275  {
276  str[0]= idx->tab[wc - idx->from];
277  return (!str[0] && wc) ? MY_CS_ILUNI : 1;
278  }
279  }
280  return MY_CS_ILUNI;
281 }
282 
283 
284 /*
285  We can't use vsprintf here as it's not guaranteed to return
286  the length on all operating systems.
287  This function is also not called in a safe environment, so the
288  end buffer must be checked.
289 */
290 
291 size_t my_snprintf_8bit(const CHARSET_INFO *cs __attribute__((unused)),
292  char* to, size_t n __attribute__((unused)),
293  const char* fmt, ...)
294 {
295  va_list args;
296  int result;
297  va_start(args,fmt);
298  result= my_vsnprintf(to, n, fmt, args);
299  va_end(args);
300  return result;
301 }
302 
303 
304 void my_hash_sort_simple(const CHARSET_INFO *cs,
305  const uchar *key, size_t len,
306  ulong *nr1, ulong *nr2)
307 {
308  register uchar *sort_order=cs->sort_order;
309  const uchar *end;
310 
311  /*
312  Remove end space. We have to do this to be able to compare
313  'A ' and 'A' as identical
314  */
315  end= skip_trailing_space(key, len);
316 
317  for (; key < (uchar*) end ; key++)
318  {
319  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
320  ((uint) sort_order[(uint) *key])) + (nr1[0] << 8);
321  nr2[0]+=3;
322  }
323 }
324 
325 
326 long my_strntol_8bit(const CHARSET_INFO *cs,
327  const char *nptr, size_t l, int base,
328  char **endptr, int *err)
329 {
330  int negative;
331  register uint32 cutoff;
332  register uint cutlim;
333  register uint32 i;
334  register const char *s;
335  register uchar c;
336  const char *save, *e;
337  int overflow;
338 
339  *err= 0; /* Initialize error indicator */
340 
341  s = nptr;
342  e = nptr+l;
343 
344  for ( ; s<e && my_isspace(cs, *s) ; s++);
345 
346  if (s == e)
347  {
348  goto noconv;
349  }
350 
351  /* Check for a sign. */
352  if (*s == '-')
353  {
354  negative = 1;
355  ++s;
356  }
357  else if (*s == '+')
358  {
359  negative = 0;
360  ++s;
361  }
362  else
363  negative = 0;
364 
365  save = s;
366  cutoff = ((uint32)~0L) / (uint32) base;
367  cutlim = (uint) (((uint32)~0L) % (uint32) base);
368 
369  overflow = 0;
370  i = 0;
371  for (c = *s; s != e; c = *++s)
372  {
373  if (c>='0' && c<='9')
374  c -= '0';
375  else if (c>='A' && c<='Z')
376  c = c - 'A' + 10;
377  else if (c>='a' && c<='z')
378  c = c - 'a' + 10;
379  else
380  break;
381  if (c >= base)
382  break;
383  if (i > cutoff || (i == cutoff && c > cutlim))
384  overflow = 1;
385  else
386  {
387  i *= (uint32) base;
388  i += c;
389  }
390  }
391 
392  if (s == save)
393  goto noconv;
394 
395  if (endptr != NULL)
396  *endptr = (char *) s;
397 
398  if (negative)
399  {
400  if (i > (uint32) INT_MIN32)
401  overflow = 1;
402  }
403  else if (i > INT_MAX32)
404  overflow = 1;
405 
406  if (overflow)
407  {
408  err[0]= ERANGE;
409  return negative ? INT_MIN32 : INT_MAX32;
410  }
411 
412  return (negative ? -((long) i) : (long) i);
413 
414 noconv:
415  err[0]= EDOM;
416  if (endptr != NULL)
417  *endptr = (char *) nptr;
418  return 0L;
419 }
420 
421 
422 ulong my_strntoul_8bit(const CHARSET_INFO *cs,
423  const char *nptr, size_t l, int base,
424  char **endptr, int *err)
425 {
426  int negative;
427  register uint32 cutoff;
428  register uint cutlim;
429  register uint32 i;
430  register const char *s;
431  register uchar c;
432  const char *save, *e;
433  int overflow;
434 
435  *err= 0; /* Initialize error indicator */
436 
437  s = nptr;
438  e = nptr+l;
439 
440  for( ; s<e && my_isspace(cs, *s); s++);
441 
442  if (s==e)
443  {
444  goto noconv;
445  }
446 
447  if (*s == '-')
448  {
449  negative = 1;
450  ++s;
451  }
452  else if (*s == '+')
453  {
454  negative = 0;
455  ++s;
456  }
457  else
458  negative = 0;
459 
460  save = s;
461  cutoff = ((uint32)~0L) / (uint32) base;
462  cutlim = (uint) (((uint32)~0L) % (uint32) base);
463  overflow = 0;
464  i = 0;
465 
466  for (c = *s; s != e; c = *++s)
467  {
468  if (c>='0' && c<='9')
469  c -= '0';
470  else if (c>='A' && c<='Z')
471  c = c - 'A' + 10;
472  else if (c>='a' && c<='z')
473  c = c - 'a' + 10;
474  else
475  break;
476  if (c >= base)
477  break;
478  if (i > cutoff || (i == cutoff && c > cutlim))
479  overflow = 1;
480  else
481  {
482  i *= (uint32) base;
483  i += c;
484  }
485  }
486 
487  if (s == save)
488  goto noconv;
489 
490  if (endptr != NULL)
491  *endptr = (char *) s;
492 
493  if (overflow)
494  {
495  err[0]= ERANGE;
496  return (~(uint32) 0);
497  }
498 
499  return (negative ? -((long) i) : (long) i);
500 
501 noconv:
502  err[0]= EDOM;
503  if (endptr != NULL)
504  *endptr = (char *) nptr;
505  return 0L;
506 }
507 
508 
509 longlong my_strntoll_8bit(const CHARSET_INFO *cs __attribute__((unused)),
510  const char *nptr, size_t l, int base,
511  char **endptr,int *err)
512 {
513  int negative;
514  register ulonglong cutoff;
515  register uint cutlim;
516  register ulonglong i;
517  register const char *s, *e;
518  const char *save;
519  int overflow;
520 
521  *err= 0; /* Initialize error indicator */
522 
523  s = nptr;
524  e = nptr+l;
525 
526  for(; s<e && my_isspace(cs,*s); s++);
527 
528  if (s == e)
529  {
530  goto noconv;
531  }
532 
533  if (*s == '-')
534  {
535  negative = 1;
536  ++s;
537  }
538  else if (*s == '+')
539  {
540  negative = 0;
541  ++s;
542  }
543  else
544  negative = 0;
545 
546  save = s;
547 
548  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
549  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
550 
551  overflow = 0;
552  i = 0;
553  for ( ; s != e; s++)
554  {
555  register uchar c= *s;
556  if (c>='0' && c<='9')
557  c -= '0';
558  else if (c>='A' && c<='Z')
559  c = c - 'A' + 10;
560  else if (c>='a' && c<='z')
561  c = c - 'a' + 10;
562  else
563  break;
564  if (c >= base)
565  break;
566  if (i > cutoff || (i == cutoff && c > cutlim))
567  overflow = 1;
568  else
569  {
570  i *= (ulonglong) base;
571  i += c;
572  }
573  }
574 
575  if (s == save)
576  goto noconv;
577 
578  if (endptr != NULL)
579  *endptr = (char *) s;
580 
581  if (negative)
582  {
583  if (i > (ulonglong) LONGLONG_MIN)
584  overflow = 1;
585  }
586  else if (i > (ulonglong) LONGLONG_MAX)
587  overflow = 1;
588 
589  if (overflow)
590  {
591  err[0]= ERANGE;
592  return negative ? LONGLONG_MIN : LONGLONG_MAX;
593  }
594 
595  return (negative ? -((longlong) i) : (longlong) i);
596 
597 noconv:
598  err[0]= EDOM;
599  if (endptr != NULL)
600  *endptr = (char *) nptr;
601  return 0L;
602 }
603 
604 
605 ulonglong my_strntoull_8bit(const CHARSET_INFO *cs,
606  const char *nptr, size_t l, int base,
607  char **endptr, int *err)
608 {
609  int negative;
610  register ulonglong cutoff;
611  register uint cutlim;
612  register ulonglong i;
613  register const char *s, *e;
614  const char *save;
615  int overflow;
616 
617  *err= 0; /* Initialize error indicator */
618 
619  s = nptr;
620  e = nptr+l;
621 
622  for(; s<e && my_isspace(cs,*s); s++);
623 
624  if (s == e)
625  {
626  goto noconv;
627  }
628 
629  if (*s == '-')
630  {
631  negative = 1;
632  ++s;
633  }
634  else if (*s == '+')
635  {
636  negative = 0;
637  ++s;
638  }
639  else
640  negative = 0;
641 
642  save = s;
643 
644  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
645  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
646 
647  overflow = 0;
648  i = 0;
649  for ( ; s != e; s++)
650  {
651  register uchar c= *s;
652 
653  if (c>='0' && c<='9')
654  c -= '0';
655  else if (c>='A' && c<='Z')
656  c = c - 'A' + 10;
657  else if (c>='a' && c<='z')
658  c = c - 'a' + 10;
659  else
660  break;
661  if (c >= base)
662  break;
663  if (i > cutoff || (i == cutoff && c > cutlim))
664  overflow = 1;
665  else
666  {
667  i *= (ulonglong) base;
668  i += c;
669  }
670  }
671 
672  if (s == save)
673  goto noconv;
674 
675  if (endptr != NULL)
676  *endptr = (char *) s;
677 
678  if (overflow)
679  {
680  err[0]= ERANGE;
681  return (~(ulonglong) 0);
682  }
683 
684  return (negative ? -((longlong) i) : (longlong) i);
685 
686 noconv:
687  err[0]= EDOM;
688  if (endptr != NULL)
689  *endptr = (char *) nptr;
690  return 0L;
691 }
692 
693 
694 /*
695  Read double from string
696 
697  SYNOPSIS:
698  my_strntod_8bit()
699  cs Character set information
700  str String to convert to double
701  length Optional length for string.
702  end result pointer to end of converted string
703  err Error number if failed conversion
704 
705  NOTES:
706  If length is not INT_MAX32 or str[length] != 0 then the given str must
707  be writeable
708  If length == INT_MAX32 the str must be \0 terminated.
709 
710  It's implemented this way to save a buffer allocation and a memory copy.
711 
712  RETURN
713  Value of number in string
714 */
715 
716 
717 double my_strntod_8bit(const CHARSET_INFO *cs __attribute__((unused)),
718  char *str, size_t length,
719  char **end, int *err)
720 {
721  if (length == INT_MAX32)
722  length= 65535; /* Should be big enough */
723  *end= str + length;
724  return my_strtod(str, end, err);
725 }
726 
727 
728 /*
729  This is a fast version optimized for the case of radix 10 / -10
730 
731  Assume len >= 1
732 */
733 
734 size_t my_long10_to_str_8bit(const CHARSET_INFO *cs __attribute__((unused)),
735  char *dst, size_t len, int radix, long int val)
736 {
737  char buffer[66];
738  register char *p, *e;
739  long int new_val;
740  uint sign=0;
741  unsigned long int uval = (unsigned long int) val;
742 
743  e = p = &buffer[sizeof(buffer)-1];
744  *p= 0;
745 
746  if (radix < 0)
747  {
748  if (val < 0)
749  {
750  /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
751  uval= (unsigned long int)0 - uval;
752  *dst++= '-';
753  len--;
754  sign= 1;
755  }
756  }
757 
758  new_val = (long) (uval / 10);
759  *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
760  val = new_val;
761 
762  while (val != 0)
763  {
764  new_val=val/10;
765  *--p = '0' + (char) (val-new_val*10);
766  val= new_val;
767  }
768 
769  len= MY_MIN(len, (size_t) (e-p));
770  memcpy(dst, p, len);
771  return len+sign;
772 }
773 
774 
775 size_t my_longlong10_to_str_8bit(const CHARSET_INFO *cs
776  __attribute__((unused)),
777  char *dst, size_t len, int radix,
778  longlong val)
779 {
780  char buffer[65];
781  register char *p, *e;
782  long long_val;
783  uint sign= 0;
784  ulonglong uval = (ulonglong)val;
785 
786  if (radix < 0)
787  {
788  if (val < 0)
789  {
790  /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
791  uval = (ulonglong)0 - uval;
792  *dst++= '-';
793  len--;
794  sign= 1;
795  }
796  }
797 
798  e = p = &buffer[sizeof(buffer)-1];
799  *p= 0;
800 
801  if (uval == 0)
802  {
803  *--p= '0';
804  len= 1;
805  goto cnv;
806  }
807 
808  while (uval > (ulonglong) LONG_MAX)
809  {
810  ulonglong quo= uval/(uint) 10;
811  uint rem= (uint) (uval- quo* (uint) 10);
812  *--p = '0' + rem;
813  uval= quo;
814  }
815 
816  long_val= (long) uval;
817  while (long_val != 0)
818  {
819  long quo= long_val/10;
820  *--p = (char) ('0' + (long_val - quo*10));
821  long_val= quo;
822  }
823 
824  len= MY_MIN(len, (size_t) (e-p));
825 cnv:
826  memcpy(dst, p, len);
827  return len+sign;
828 }
829 
830 
831 /*
832 ** Compare string against string with wildcard
833 ** 0 if matched
834 ** -1 if not matched with wildcard
835 ** 1 if matched with wildcard
836 */
837 
838 #ifdef LIKE_CMP_TOUPPER
839 #define likeconv(s,A) (uchar) my_toupper(s,A)
840 #else
841 #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
842 #endif
843 
844 #define INC_PTR(cs,A,B) (A)++
845 
846 static
847 int my_wildcmp_8bit_impl(const CHARSET_INFO *cs,
848  const char *str,const char *str_end,
849  const char *wildstr,const char *wildend,
850  int escape, int w_one, int w_many, int recurse_level)
851 {
852  int result= -1; /* Not found, using wildcards */
853 
854  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
855  return 1;
856  while (wildstr != wildend)
857  {
858  while (*wildstr != w_many && *wildstr != w_one)
859  {
860  if (*wildstr == escape && wildstr+1 != wildend)
861  wildstr++;
862 
863  if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
864  return(1); /* No match */
865  if (wildstr == wildend)
866  return(str != str_end); /* Match if both are at end */
867  result=1; /* Found an anchor char */
868  }
869  if (*wildstr == w_one)
870  {
871  do
872  {
873  if (str == str_end) /* Skip one char if possible */
874  return(result);
875  INC_PTR(cs,str,str_end);
876  } while (++wildstr < wildend && *wildstr == w_one);
877  if (wildstr == wildend)
878  break;
879  }
880  if (*wildstr == w_many)
881  { /* Found w_many */
882  uchar cmp;
883 
884  wildstr++;
885  /* Remove any '%' and '_' from the wild search string */
886  for (; wildstr != wildend ; wildstr++)
887  {
888  if (*wildstr == w_many)
889  continue;
890  if (*wildstr == w_one)
891  {
892  if (str == str_end)
893  return(-1);
894  INC_PTR(cs,str,str_end);
895  continue;
896  }
897  break; /* Not a wild character */
898  }
899  if (wildstr == wildend)
900  return(0); /* Ok if w_many is last */
901  if (str == str_end)
902  return(-1);
903 
904  if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
905  cmp= *++wildstr;
906 
907  INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
908  cmp=likeconv(cs,cmp);
909  do
910  {
911  while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
912  str++;
913  if (str++ == str_end) return(-1);
914  {
915  int tmp=my_wildcmp_8bit_impl(cs,str,str_end,
916  wildstr,wildend,escape,w_one,
917  w_many, recurse_level + 1);
918  if (tmp <= 0)
919  return(tmp);
920  }
921  } while (str != str_end && wildstr[0] != w_many);
922  return(-1);
923  }
924  }
925  return(str != str_end ? 1 : 0);
926 }
927 
928 int my_wildcmp_8bit(const CHARSET_INFO *cs,
929  const char *str,const char *str_end,
930  const char *wildstr,const char *wildend,
931  int escape, int w_one, int w_many)
932 {
933  return my_wildcmp_8bit_impl(cs, str, str_end,
934  wildstr, wildend,
935  escape, w_one, w_many, 1);
936 }
937 
938 
939 /*
940 ** Calculate min_str and max_str that ranges a LIKE string.
941 ** Arguments:
942 ** ptr Pointer to LIKE string.
943 ** ptr_length Length of LIKE string.
944 ** escape Escape character in LIKE. (Normally '\').
945 ** All escape characters should be removed from min_str and max_str
946 ** res_length Length of min_str and max_str.
947 ** min_str Smallest case sensitive string that ranges LIKE.
948 ** Should be space padded to res_length.
949 ** max_str Largest case sensitive string that ranges LIKE.
950 ** Normally padded with the biggest character sort value.
951 **
952 ** The function should return 0 if ok and 1 if the LIKE string can't be
953 ** optimized !
954 */
955 
956 my_bool my_like_range_simple(const CHARSET_INFO *cs,
957  const char *ptr, size_t ptr_length,
958  pbool escape, pbool w_one, pbool w_many,
959  size_t res_length,
960  char *min_str,char *max_str,
961  size_t *min_length, size_t *max_length)
962 {
963  const char *end= ptr + ptr_length;
964  char *min_org=min_str;
965  char *min_end=min_str+res_length;
966  size_t charlen= res_length / cs->mbmaxlen;
967 
968  for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
969  {
970  if (*ptr == escape && ptr+1 != end)
971  {
972  ptr++; /* Skip escape */
973  *min_str++= *max_str++ = *ptr;
974  continue;
975  }
976  if (*ptr == w_one) /* '_' in SQL */
977  {
978  *min_str++='\0'; /* This should be min char */
979  *max_str++= (char) cs->max_sort_char;
980  continue;
981  }
982  if (*ptr == w_many) /* '%' in SQL */
983  {
984  /* Calculate length of keys */
985  *min_length= ((cs->state & MY_CS_BINSORT) ?
986  (size_t) (min_str - min_org) :
987  res_length);
988  *max_length= res_length;
989  do
990  {
991  *min_str++= 0;
992  *max_str++= (char) cs->max_sort_char;
993  } while (min_str != min_end);
994  return 0;
995  }
996  *min_str++= *max_str++ = *ptr;
997  }
998 
999  *min_length= *max_length = (size_t) (min_str - min_org);
1000  while (min_str != min_end)
1001  *min_str++= *max_str++ = ' '; /* Because if key compression */
1002  return 0;
1003 }
1004 
1005 
1006 size_t my_scan_8bit(const CHARSET_INFO *cs, const char *str, const char *end,
1007  int sq)
1008 {
1009  const char *str0= str;
1010  switch (sq)
1011  {
1012  case MY_SEQ_INTTAIL:
1013  if (*str == '.')
1014  {
1015  for(str++ ; str != end && *str == '0' ; str++);
1016  return (size_t) (str - str0);
1017  }
1018  return 0;
1019 
1020  case MY_SEQ_SPACES:
1021  for ( ; str < end ; str++)
1022  {
1023  if (!my_isspace(cs,*str))
1024  break;
1025  }
1026  return (size_t) (str - str0);
1027  default:
1028  return 0;
1029  }
1030 }
1031 
1032 
1033 void my_fill_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1034  char *s, size_t l, int fill)
1035 {
1036  memset(s, fill, l);
1037 }
1038 
1039 
1040 size_t my_numchars_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1041  const char *b, const char *e)
1042 {
1043  return (size_t) (e - b);
1044 }
1045 
1046 
1047 size_t my_numcells_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1048  const char *b, const char *e)
1049 {
1050  return (size_t) (e - b);
1051 }
1052 
1053 
1054 size_t my_charpos_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1055  const char *b __attribute__((unused)),
1056  const char *e __attribute__((unused)),
1057  size_t pos)
1058 {
1059  return pos;
1060 }
1061 
1062 
1063 size_t my_well_formed_len_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1064  const char *start, const char *end,
1065  size_t nchars, int *error)
1066 {
1067  size_t nbytes= (size_t) (end-start);
1068  *error= 0;
1069  return MY_MIN(nbytes, nchars);
1070 }
1071 
1072 
1073 size_t my_lengthsp_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1074  const char *ptr, size_t length)
1075 {
1076  const char *end;
1077  end= (const char *) skip_trailing_space((const uchar *)ptr, length);
1078  return (size_t) (end-ptr);
1079 }
1080 
1081 
1082 uint my_instr_simple(const CHARSET_INFO *cs,
1083  const char *b, size_t b_length,
1084  const char *s, size_t s_length,
1085  my_match_t *match, uint nmatch)
1086 {
1087  register const uchar *str, *search, *end, *search_end;
1088 
1089  if (s_length <= b_length)
1090  {
1091  if (!s_length)
1092  {
1093  if (nmatch)
1094  {
1095  match->beg= 0;
1096  match->end= 0;
1097  match->mb_len= 0;
1098  }
1099  return 1; /* Empty string is always found */
1100  }
1101 
1102  str= (const uchar*) b;
1103  search= (const uchar*) s;
1104  end= (const uchar*) b+b_length-s_length+1;
1105  search_end= (const uchar*) s + s_length;
1106 
1107 skip:
1108  while (str != end)
1109  {
1110  if (cs->sort_order[*str++] == cs->sort_order[*search])
1111  {
1112  register const uchar *i,*j;
1113 
1114  i= str;
1115  j= search+1;
1116 
1117  while (j != search_end)
1118  if (cs->sort_order[*i++] != cs->sort_order[*j++])
1119  goto skip;
1120 
1121  if (nmatch > 0)
1122  {
1123  match[0].beg= 0;
1124  match[0].end= (size_t) (str- (const uchar*)b-1);
1125  match[0].mb_len= match[0].end;
1126 
1127  if (nmatch > 1)
1128  {
1129  match[1].beg= match[0].end;
1130  match[1].end= match[0].end+s_length;
1131  match[1].mb_len= match[1].end-match[1].beg;
1132  }
1133  }
1134  return 2;
1135  }
1136  }
1137  }
1138  return 0;
1139 }
1140 
1141 
1142 typedef struct
1143 {
1144  int nchars;
1145  MY_UNI_IDX uidx;
1146 } uni_idx;
1147 
1148 #define PLANE_SIZE 0x100
1149 #define PLANE_NUM 0x100
1150 #define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
1151 
1152 static int pcmp(const void * f, const void * s)
1153 {
1154  const uni_idx *F= (const uni_idx*) f;
1155  const uni_idx *S= (const uni_idx*) s;
1156  int res;
1157 
1158  if (!(res=((S->nchars)-(F->nchars))))
1159  res=((F->uidx.from)-(S->uidx.to));
1160  return res;
1161 }
1162 
1163 static my_bool
1164 create_fromuni(CHARSET_INFO *cs,
1165  MY_CHARSET_LOADER *loader)
1166 {
1167  uni_idx idx[PLANE_NUM];
1168  int i,n;
1169 
1170  /*
1171  Check that Unicode map is loaded.
1172  It can be not loaded when the collation is
1173  listed in Index.xml but not specified
1174  in the character set specific XML file.
1175  */
1176  if (!cs->tab_to_uni)
1177  return TRUE;
1178 
1179  /* Clear plane statistics */
1180  memset(idx, 0, sizeof(idx));
1181 
1182  /* Count number of characters in each plane */
1183  for (i=0; i< 0x100; i++)
1184  {
1185  uint16 wc=cs->tab_to_uni[i];
1186  int pl= PLANE_NUMBER(wc);
1187 
1188  if (wc || !i)
1189  {
1190  if (!idx[pl].nchars)
1191  {
1192  idx[pl].uidx.from=wc;
1193  idx[pl].uidx.to=wc;
1194  }else
1195  {
1196  idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1197  idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1198  }
1199  idx[pl].nchars++;
1200  }
1201  }
1202 
1203  /* Sort planes in descending order */
1204  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1205 
1206  for (i=0; i < PLANE_NUM; i++)
1207  {
1208  int ch,numchars;
1209 
1210  /* Skip empty plane */
1211  if (!idx[i].nchars)
1212  break;
1213 
1214  numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1215  if (!(idx[i].uidx.tab= (uchar *)
1216  (loader->once_alloc) (numchars *
1217  sizeof(*idx[i].uidx.tab))))
1218  return TRUE;
1219 
1220  memset(idx[i].uidx.tab, 0, numchars*sizeof(*idx[i].uidx.tab));
1221 
1222  for (ch=1; ch < PLANE_SIZE; ch++)
1223  {
1224  uint16 wc=cs->tab_to_uni[ch];
1225  if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1226  {
1227  int ofs= wc - idx[i].uidx.from;
1228  idx[i].uidx.tab[ofs]= ch;
1229  }
1230  }
1231  }
1232 
1233  /* Allocate and fill reverse table for each plane */
1234  n=i;
1235  if (!(cs->tab_from_uni= (MY_UNI_IDX *)
1236  (loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + 1))))
1237  return TRUE;
1238 
1239  for (i=0; i< n; i++)
1240  cs->tab_from_uni[i]= idx[i].uidx;
1241 
1242  /* Set end-of-list marker */
1243  memset(&cs->tab_from_uni[i], 0, sizeof(MY_UNI_IDX));
1244  return FALSE;
1245 }
1246 
1247 static my_bool
1248 my_cset_init_8bit(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
1249 {
1250  cs->caseup_multiply= 1;
1251  cs->casedn_multiply= 1;
1252  cs->pad_char= ' ';
1253  return create_fromuni(cs, loader);
1254 }
1255 
1256 static void set_max_sort_char(CHARSET_INFO *cs)
1257 {
1258  uchar max_char;
1259  uint i;
1260 
1261  if (!cs->sort_order)
1262  return;
1263 
1264  max_char=cs->sort_order[(uchar) cs->max_sort_char];
1265  for (i= 0; i < 256; i++)
1266  {
1267  if ((uchar) cs->sort_order[i] > max_char)
1268  {
1269  max_char=(uchar) cs->sort_order[i];
1270  cs->max_sort_char= i;
1271  }
1272  }
1273 }
1274 
1275 static my_bool
1276 my_coll_init_simple(CHARSET_INFO *cs,
1277  MY_CHARSET_LOADER *loader __attribute__((unused)))
1278 {
1279  set_max_sort_char(cs);
1280  return FALSE;
1281 }
1282 
1283 
1284 longlong my_strtoll10_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1285  const char *nptr, char **endptr, int *error)
1286 {
1287  return my_strtoll10(nptr, endptr, error);
1288 }
1289 
1290 
1291 int my_mb_ctype_8bit(const CHARSET_INFO *cs, int *ctype,
1292  const uchar *s, const uchar *e)
1293 {
1294  if (s >= e)
1295  {
1296  *ctype= 0;
1297  return MY_CS_TOOSMALL;
1298  }
1299  *ctype= cs->ctype[*s + 1];
1300  return 1;
1301 }
1302 
1303 
1304 #define CUTOFF (ULONGLONG_MAX / 10)
1305 #define CUTLIM (ULONGLONG_MAX % 10)
1306 #define DIGITS_IN_ULONGLONG 20
1307 
1308 static ulonglong d10[DIGITS_IN_ULONGLONG]=
1309 {
1310  1,
1311  10,
1312  100,
1313  1000,
1314  10000,
1315  100000,
1316  1000000,
1317  10000000,
1318  100000000,
1319  1000000000,
1320  10000000000ULL,
1321  100000000000ULL,
1322  1000000000000ULL,
1323  10000000000000ULL,
1324  100000000000000ULL,
1325  1000000000000000ULL,
1326  10000000000000000ULL,
1327  100000000000000000ULL,
1328  1000000000000000000ULL,
1329  10000000000000000000ULL
1330 };
1331 
1332 
1333 /*
1334 
1335  Convert a string to unsigned long long integer value
1336  with rounding.
1337 
1338  SYNOPSYS
1339  my_strntoull10_8bit()
1340  cs in pointer to character set
1341  str in pointer to the string to be converted
1342  length in string length
1343  unsigned_flag in whether the number is unsigned
1344  endptr out pointer to the stop character
1345  error out returned error code
1346 
1347  DESCRIPTION
1348  This function takes the decimal representation of integer number
1349  from string str and converts it to an signed or unsigned
1350  long long integer value.
1351  Space characters and tab are ignored.
1352  A sign character might precede the digit characters.
1353  The number may have any number of pre-zero digits.
1354  The number may have decimal point and exponent.
1355  Rounding is always done in "away from zero" style:
1356  0.5 -> 1
1357  -0.5 -> -1
1358 
1359  The function stops reading the string str after "length" bytes
1360  or at the first character that is not a part of correct number syntax:
1361 
1362  <signed numeric literal> ::=
1363  [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1364 
1365  <exact numeric literal> ::=
1366  <unsigned integer> [ <period> [ <unsigned integer> ] ]
1367  | <period> <unsigned integer>
1368  <unsigned integer> ::= <digit>...
1369 
1370  RETURN VALUES
1371  Value of string as a signed/unsigned longlong integer
1372 
1373  endptr cannot be NULL. The function will store the end pointer
1374  to the stop character here.
1375 
1376  The error parameter contains information how things went:
1377  0 ok
1378  ERANGE If the the value of the converted number is out of range
1379  In this case the return value is:
1380  - ULONGLONG_MAX if unsigned_flag and the number was too big
1381  - 0 if unsigned_flag and the number was negative
1382  - LONGLONG_MAX if no unsigned_flag and the number is too big
1383  - LONGLONG_MIN if no unsigned_flag and the number it too big negative
1384 
1385  EDOM If the string didn't contain any digits.
1386  In this case the return value is 0.
1387 */
1388 
1389 ulonglong
1390 my_strntoull10rnd_8bit(const CHARSET_INFO *cs __attribute__((unused)),
1391  const char *str, size_t length, int unsigned_flag,
1392  char **endptr, int *error)
1393 {
1394  const char *dot, *end9, *beg, *end= str + length;
1395  ulonglong ull;
1396  ulong ul;
1397  uchar ch;
1398  int shift= 0, digits= 0, negative, addon;
1399 
1400  /* Skip leading spaces and tabs */
1401  for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1402 
1403  if (str >= end)
1404  goto ret_edom;
1405 
1406  if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1407  {
1408  if (++str == end)
1409  goto ret_edom;
1410  }
1411 
1412  beg= str;
1413  end9= (str + 9) > end ? end : (str + 9);
1414  /* Accumulate small number into ulong, for performance purposes */
1415  for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1416  {
1417  ul= ul * 10 + ch;
1418  }
1419 
1420  if (str >= end) /* Small number without dots and expanents */
1421  {
1422  *endptr= (char*) str;
1423  if (negative)
1424  {
1425  if (unsigned_flag)
1426  {
1427  *error= ul ? MY_ERRNO_ERANGE : 0;
1428  return 0;
1429  }
1430  else
1431  {
1432  *error= 0;
1433  return (ulonglong) (longlong) -(long) ul;
1434  }
1435  }
1436  else
1437  {
1438  *error=0;
1439  return (ulonglong) ul;
1440  }
1441  }
1442 
1443  digits= str - beg;
1444 
1445  /* Continue to accumulate into ulonglong */
1446  for (dot= NULL, ull= ul; str < end; str++)
1447  {
1448  if ((ch= (uchar) (*str - '0')) < 10)
1449  {
1450  if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1451  {
1452  ull= ull * 10 + ch;
1453  digits++;
1454  continue;
1455  }
1456  /*
1457  Adding the next digit would overflow.
1458  Remember the next digit in "addon", for rounding.
1459  Scan all digits with an optional single dot.
1460  */
1461  if (ull == CUTOFF)
1462  {
1463  ull= ULONGLONG_MAX;
1464  addon= 1;
1465  str++;
1466  }
1467  else
1468  addon= (*str >= '5');
1469  if (!dot)
1470  {
1471  for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1472  if (str < end && *str == '.')
1473  {
1474  str++;
1475  for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1476  }
1477  }
1478  else
1479  {
1480  shift= dot - str;
1481  for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1482  }
1483  goto exp;
1484  }
1485 
1486  if (*str == '.')
1487  {
1488  if (dot)
1489  {
1490  /* The second dot character */
1491  addon= 0;
1492  goto exp;
1493  }
1494  else
1495  {
1496  dot= str + 1;
1497  }
1498  continue;
1499  }
1500 
1501  /* Unknown character, exit the loop */
1502  break;
1503  }
1504  shift= dot ? dot - str : 0; /* Right shift */
1505  addon= 0;
1506 
1507 exp: /* [ E [ <sign> ] <unsigned integer> ] */
1508 
1509  if (!digits)
1510  {
1511  str= beg;
1512  goto ret_edom;
1513  }
1514 
1515  if (str < end && (*str == 'e' || *str == 'E'))
1516  {
1517  str++;
1518  if (str < end)
1519  {
1520  int negative_exp, exponent;
1521  if ((negative_exp= (*str == '-')) || *str=='+')
1522  {
1523  if (++str == end)
1524  goto ret_sign;
1525  }
1526  for (exponent= 0 ;
1527  str < end && (ch= (uchar) (*str - '0')) < 10;
1528  str++)
1529  {
1530  exponent= exponent * 10 + ch;
1531  }
1532  shift+= negative_exp ? -exponent : exponent;
1533  }
1534  }
1535 
1536  if (shift == 0) /* No shift, check addon digit */
1537  {
1538  if (addon)
1539  {
1540  if (ull == ULONGLONG_MAX)
1541  goto ret_too_big;
1542  ull++;
1543  }
1544  goto ret_sign;
1545  }
1546 
1547  if (shift < 0) /* Right shift */
1548  {
1549  ulonglong d, r;
1550 
1551  if (-shift >= DIGITS_IN_ULONGLONG)
1552  goto ret_zero; /* Exponent is a big negative number, return 0 */
1553 
1554  d= d10[-shift];
1555  r= (ull % d) * 2;
1556  ull /= d;
1557  if (r >= d)
1558  ull++;
1559  goto ret_sign;
1560  }
1561 
1562  if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1563  {
1564  if (!ull)
1565  goto ret_sign;
1566  goto ret_too_big;
1567  }
1568 
1569  for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1570  {
1571  if (ull > CUTOFF)
1572  goto ret_too_big; /* Overflow, number too big */
1573  }
1574 
1575 ret_sign:
1576  *endptr= (char*) str;
1577 
1578  if (!unsigned_flag)
1579  {
1580  if (negative)
1581  {
1582  if (ull > (ulonglong) LONGLONG_MIN)
1583  {
1584  *error= MY_ERRNO_ERANGE;
1585  return (ulonglong) LONGLONG_MIN;
1586  }
1587  *error= 0;
1588  return (ulonglong) -(longlong) ull;
1589  }
1590  else
1591  {
1592  if (ull > (ulonglong) LONGLONG_MAX)
1593  {
1594  *error= MY_ERRNO_ERANGE;
1595  return (ulonglong) LONGLONG_MAX;
1596  }
1597  *error= 0;
1598  return ull;
1599  }
1600  }
1601 
1602  /* Unsigned number */
1603  if (negative && ull)
1604  {
1605  *error= MY_ERRNO_ERANGE;
1606  return 0;
1607  }
1608  *error= 0;
1609  return ull;
1610 
1611 ret_zero:
1612  *endptr= (char*) str;
1613  *error= 0;
1614  return 0;
1615 
1616 ret_edom:
1617  *endptr= (char*) str;
1618  *error= MY_ERRNO_EDOM;
1619  return 0;
1620 
1621 ret_too_big:
1622  *endptr= (char*) str;
1623  *error= MY_ERRNO_ERANGE;
1624  return unsigned_flag ?
1625  ULONGLONG_MAX :
1626  negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
1627 }
1628 
1629 
1630 /*
1631  Check if a constant can be propagated
1632 
1633  SYNOPSIS:
1634  my_propagate_simple()
1635  cs Character set information
1636  str String to convert to double
1637  length Optional length for string.
1638 
1639  NOTES:
1640  Takes the string in the given charset and check
1641  if it can be safely propagated in the optimizer.
1642 
1643  create table t1 (
1644  s char(5) character set latin1 collate latin1_german2_ci);
1645  insert into t1 values (0xf6); -- o-umlaut
1646  select * from t1 where length(s)=1 and s='oe';
1647 
1648  The above query should return one row.
1649  We cannot convert this query into:
1650  select * from t1 where length('oe')=1 and s='oe';
1651 
1652  Currently we don't check the constant itself,
1653  and decide not to propagate a constant
1654  just if the collation itself allows tricky things
1655  like expansions and contractions. In the future
1656  we can write a more sophisticated functions to
1657  check the constants. For example, 'oa' can always
1658  be safety propagated in German2 because unlike
1659  'oe' it does not have any special meaning.
1660 
1661  RETURN
1662  1 if constant can be safely propagated
1663  0 if it is not safe to propagate the constant
1664 */
1665 
1666 
1667 
1668 my_bool my_propagate_simple(const CHARSET_INFO *cs __attribute__((unused)),
1669  const uchar *str __attribute__((unused)),
1670  size_t length __attribute__((unused)))
1671 {
1672  return 1;
1673 }
1674 
1675 
1676 my_bool my_propagate_complex(const CHARSET_INFO *cs __attribute__((unused)),
1677  const uchar *str __attribute__((unused)),
1678  size_t length __attribute__((unused)))
1679 {
1680  return 0;
1681 }
1682 
1683 
1684 /*
1685  Normalize strxfrm flags
1686 
1687  SYNOPSIS:
1688  my_strxfrm_flag_normalize()
1689  flags - non-normalized flags
1690  nlevels - number of levels
1691 
1692  NOTES:
1693  If levels are omitted, then 1-maximum is assumed.
1694  If any level number is greater than the maximum,
1695  it is treated as the maximum.
1696 
1697  RETURN
1698  normalized flags
1699 */
1700 
1701 uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1702 {
1703  DBUG_ASSERT(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
1704 
1705  /* If levels are omitted, then 1-maximum is assumed*/
1706  if (!(flags & MY_STRXFRM_LEVEL_ALL))
1707  {
1708  static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
1709  uint flag_pad= flags &
1710  (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1711  flags= def_level_flags[maximum] | flag_pad;
1712  }
1713  else
1714  {
1715  uint i;
1716  uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1717  uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1718  uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1719  uint flag_pad= flags &
1720  (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1721 
1722  /*
1723  If any level number is greater than the maximum,
1724  it is treated as the maximum.
1725  */
1726  for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
1727  {
1728  uint src_bit= 1 << i;
1729  if (flag_lev & src_bit)
1730  {
1731  uint dst_bit= 1 << MY_MIN(i, maximum);
1732  flags|= dst_bit;
1733  flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1734  flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1735  }
1736  }
1737  flags|= flag_pad;
1738  }
1739 
1740  return flags;
1741 }
1742 
1743 
1744 /*
1745  Apply DESC and REVERSE collation rules.
1746 
1747  SYNOPSIS:
1748  my_strxfrm_desc_and_reverse()
1749  str - pointer to string
1750  strend - end of string
1751  flags - flags
1752  level - which level, starting from 0.
1753 
1754  NOTES:
1755  Apply DESC or REVERSE or both flags.
1756 
1757  If DESC flag is given, then the weights
1758  come out NOTed or negated for that level.
1759 
1760  If REVERSE flags is given, then the weights come out in
1761  reverse order for that level, that is, starting with
1762  the last character and ending with the first character.
1763 
1764  If nether DESC nor REVERSE flags are give,
1765  the string is not changed.
1766 
1767 */
1768 void
1769 my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
1770  uint flags, uint level)
1771 {
1772  if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1773  {
1774  if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1775  {
1776  for (strend--; str <= strend;)
1777  {
1778  uchar tmp= *str;
1779  *str++= ~*strend;
1780  *strend--= ~tmp;
1781  }
1782  }
1783  else
1784  {
1785  for (; str < strend; str++)
1786  *str= ~*str;
1787  }
1788  }
1789  else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1790  {
1791  for (strend--; str < strend;)
1792  {
1793  uchar tmp= *str;
1794  *str++= *strend;
1795  *strend--= tmp;
1796  }
1797  }
1798 }
1799 
1800 
1801 size_t
1802 my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO *cs,
1803  uchar *str, uchar *frmend, uchar *strend,
1804  uint nweights, uint flags, uint level)
1805 {
1806  if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
1807  {
1808  uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
1809  cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1810  frmend+= fill_length;
1811  }
1812  my_strxfrm_desc_and_reverse(str, frmend, flags, level);
1813  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
1814  {
1815  uint fill_length= strend - frmend;
1816  cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1817  frmend= strend;
1818  }
1819  return frmend - str;
1820 }
1821 
1822 
1823 MY_CHARSET_HANDLER my_charset_8bit_handler=
1824 {
1825  my_cset_init_8bit,
1826  NULL, /* ismbchar */
1827  my_mbcharlen_8bit, /* mbcharlen */
1828  my_numchars_8bit,
1829  my_charpos_8bit,
1830  my_well_formed_len_8bit,
1831  my_lengthsp_8bit,
1832  my_numcells_8bit,
1833  my_mb_wc_8bit,
1834  my_wc_mb_8bit,
1835  my_mb_ctype_8bit,
1836  my_caseup_str_8bit,
1837  my_casedn_str_8bit,
1838  my_caseup_8bit,
1839  my_casedn_8bit,
1840  my_snprintf_8bit,
1841  my_long10_to_str_8bit,
1842  my_longlong10_to_str_8bit,
1843  my_fill_8bit,
1844  my_strntol_8bit,
1845  my_strntoul_8bit,
1846  my_strntoll_8bit,
1847  my_strntoull_8bit,
1848  my_strntod_8bit,
1849  my_strtoll10_8bit,
1850  my_strntoull10rnd_8bit,
1851  my_scan_8bit
1852 };
1853 
1854 MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
1855 {
1856  my_coll_init_simple, /* init */
1857  my_strnncoll_simple,
1858  my_strnncollsp_simple,
1859  my_strnxfrm_simple,
1860  my_strnxfrmlen_simple,
1861  my_like_range_simple,
1862  my_wildcmp_8bit,
1863  my_strcasecmp_8bit,
1864  my_instr_simple,
1865  my_hash_sort_simple,
1866  my_propagate_simple
1867 };