MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ctype-mb.c
1 /* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 #include <my_global.h>
17 #include "m_ctype.h"
18 #include "m_string.h"
19 
20 #ifdef USE_MB
21 
22 
23 size_t my_caseup_str_mb(const CHARSET_INFO *cs, char *str)
24 {
25  register uint32 l;
26  register uchar *map= cs->to_upper;
27  char *str_orig= str;
28 
29  while (*str)
30  {
31  /* Pointing after the '\0' is safe here. */
32  if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
33  str+= l;
34  else
35  {
36  *str= (char) map[(uchar)*str];
37  str++;
38  }
39  }
40  return (size_t) (str - str_orig);
41 }
42 
43 
44 size_t my_casedn_str_mb(const CHARSET_INFO *cs, char *str)
45 {
46  register uint32 l;
47  register uchar *map= cs->to_lower;
48  char *str_orig= str;
49 
50  while (*str)
51  {
52  /* Pointing after the '\0' is safe here. */
53  if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
54  str+= l;
55  else
56  {
57  *str= (char) map[(uchar)*str];
58  str++;
59  }
60  }
61  return (size_t) (str - str_orig);
62 }
63 
64 
65 static inline MY_UNICASE_CHARACTER*
66 get_case_info_for_ch(const CHARSET_INFO *cs, uint page, uint offs)
67 {
69  return cs->caseinfo ? ((p= cs->caseinfo->page[page]) ? &p[offs] : NULL) : NULL;
70 }
71 
72 
73 /*
74  For character sets which don't change octet length in case conversion.
75 */
76 size_t my_caseup_mb(const CHARSET_INFO *cs, char *src, size_t srclen,
77  char *dst __attribute__((unused)),
78  size_t dstlen __attribute__((unused)))
79 {
80  register uint32 l;
81  register char *srcend= src + srclen;
82  register uchar *map= cs->to_upper;
83 
84  DBUG_ASSERT(cs->caseup_multiply == 1);
85  DBUG_ASSERT(src == dst && srclen == dstlen);
86  DBUG_ASSERT(cs->mbmaxlen == 2);
87 
88  while (src < srcend)
89  {
90  if ((l=my_ismbchar(cs, src, srcend)))
91  {
93  if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
94  {
95  *src++= ch->toupper >> 8;
96  *src++= ch->toupper & 0xFF;
97  }
98  else
99  src+= l;
100  }
101  else
102  {
103  *src=(char) map[(uchar) *src];
104  src++;
105  }
106  }
107  return srclen;
108 }
109 
110 
111 size_t my_casedn_mb(const CHARSET_INFO *cs, char *src, size_t srclen,
112  char *dst __attribute__((unused)),
113  size_t dstlen __attribute__((unused)))
114 {
115  register uint32 l;
116  register char *srcend= src + srclen;
117  register uchar *map=cs->to_lower;
118 
119  DBUG_ASSERT(cs->casedn_multiply == 1);
120  DBUG_ASSERT(src == dst && srclen == dstlen);
121  DBUG_ASSERT(cs->mbmaxlen == 2);
122 
123  while (src < srcend)
124  {
125  if ((l= my_ismbchar(cs, src, srcend)))
126  {
128  if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
129  {
130  *src++= ch->tolower >> 8;
131  *src++= ch->tolower & 0xFF;
132  }
133  else
134  src+= l;
135  }
136  else
137  {
138  *src= (char) map[(uchar)*src];
139  src++;
140  }
141  }
142  return srclen;
143 }
144 
145 
146 /*
147  Case folding functions for character set
148  where case conversion can change string octet length.
149  For example, in EUCKR,
150  _euckr 0xA9A5 == "LATIN LETTER DOTLESS I" (Turkish letter)
151  is upper-cased to to
152  _euckr 0x49 "LATIN CAPITAL LETTER I" ('usual' letter I)
153  Length is reduced in this example from two bytes to one byte.
154 */
155 static size_t
156 my_casefold_mb_varlen(const CHARSET_INFO *cs,
157  char *src, size_t srclen,
158  char *dst, size_t dstlen __attribute__((unused)),
159  uchar *map,
160  size_t is_upper)
161 {
162  char *srcend= src + srclen, *dst0= dst;
163 
164  DBUG_ASSERT(cs->mbmaxlen == 2);
165 
166  while (src < srcend)
167  {
168  size_t mblen= my_ismbchar(cs, src, srcend);
169  if (mblen)
170  {
172  if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
173  {
174  int code= is_upper ? ch->toupper : ch->tolower;
175  src+= 2;
176  if (code > 0xFF)
177  *dst++= code >> 8;
178  *dst++= code & 0xFF;
179  }
180  else
181  {
182  *dst++= *src++;
183  *dst++= *src++;
184  }
185  }
186  else
187  {
188  *dst++= (char) map[(uchar) *src++];
189  }
190  }
191  return (size_t) (dst - dst0);
192 }
193 
194 
195 size_t
196 my_casedn_mb_varlen(const CHARSET_INFO *cs, char *src, size_t srclen,
197  char *dst, size_t dstlen)
198 {
199  DBUG_ASSERT(dstlen >= srclen * cs->casedn_multiply);
200  DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
201  return my_casefold_mb_varlen(cs, src, srclen, dst, dstlen, cs->to_lower, 0);
202 }
203 
204 
205 size_t
206 my_caseup_mb_varlen(const CHARSET_INFO *cs, char *src, size_t srclen,
207  char *dst, size_t dstlen)
208 {
209  DBUG_ASSERT(dstlen >= srclen * cs->caseup_multiply);
210  DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
211  return my_casefold_mb_varlen(cs, src, srclen, dst, dstlen, cs->to_upper, 1);
212 }
213 
214 
215 /*
216  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
217  */
218 
219 int my_strcasecmp_mb(const CHARSET_INFO *cs,const char *s, const char *t)
220 {
221  register uint32 l;
222  register uchar *map=cs->to_upper;
223 
224  while (*s && *t)
225  {
226  /* Pointing after the '\0' is safe here. */
227  if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
228  {
229  while (l--)
230  if (*s++ != *t++)
231  return 1;
232  }
233  else if (my_mbcharlen(cs, *t) > 1)
234  return 1;
235  else if (map[(uchar) *s++] != map[(uchar) *t++])
236  return 1;
237  }
238  /* At least one of '*s' and '*t' is zero here. */
239  return (*t != *s);
240 }
241 
242 
243 /*
244 ** Compare string against string with wildcard
245 ** 0 if matched
246 ** -1 if not matched with wildcard
247 ** 1 if matched with wildcard
248 */
249 
250 #define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
251 
252 #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
253 
254 static
255 int my_wildcmp_mb_impl(const CHARSET_INFO *cs,
256  const char *str,const char *str_end,
257  const char *wildstr,const char *wildend,
258  int escape, int w_one, int w_many, int recurse_level)
259 {
260  int result= -1; /* Not found, using wildcards */
261 
262  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
263  return 1;
264  while (wildstr != wildend)
265  {
266  while (*wildstr != w_many && *wildstr != w_one)
267  {
268  int l;
269  if (*wildstr == escape && wildstr+1 != wildend)
270  wildstr++;
271  if ((l = my_ismbchar(cs, wildstr, wildend)))
272  {
273  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
274  return 1;
275  str += l;
276  wildstr += l;
277  }
278  else
279  if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
280  return(1); /* No match */
281  if (wildstr == wildend)
282  return (str != str_end); /* Match if both are at end */
283  result=1; /* Found an anchor char */
284  }
285  if (*wildstr == w_one)
286  {
287  do
288  {
289  if (str == str_end) /* Skip one char if possible */
290  return (result);
291  INC_PTR(cs,str,str_end);
292  } while (++wildstr < wildend && *wildstr == w_one);
293  if (wildstr == wildend)
294  break;
295  }
296  if (*wildstr == w_many)
297  { /* Found w_many */
298  uchar cmp;
299  const char* mb = wildstr;
300  int mb_len=0;
301 
302  wildstr++;
303  /* Remove any '%' and '_' from the wild search string */
304  for (; wildstr != wildend ; wildstr++)
305  {
306  if (*wildstr == w_many)
307  continue;
308  if (*wildstr == w_one)
309  {
310  if (str == str_end)
311  return (-1);
312  INC_PTR(cs,str,str_end);
313  continue;
314  }
315  break; /* Not a wild character */
316  }
317  if (wildstr == wildend)
318  return(0); /* Ok if w_many is last */
319  if (str == str_end)
320  return -1;
321 
322  if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
323  cmp= *++wildstr;
324 
325  mb=wildstr;
326  mb_len= my_ismbchar(cs, wildstr, wildend);
327  INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
328  cmp=likeconv(cs,cmp);
329  do
330  {
331  for (;;)
332  {
333  if (str >= str_end)
334  return -1;
335  if (mb_len)
336  {
337  if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
338  {
339  str += mb_len;
340  break;
341  }
342  }
343  else if (!my_ismbchar(cs, str, str_end) &&
344  likeconv(cs,*str) == cmp)
345  {
346  str++;
347  break;
348  }
349  INC_PTR(cs,str, str_end);
350  }
351  {
352  int tmp=my_wildcmp_mb_impl(cs,str,str_end,
353  wildstr,wildend,escape,w_one,
354  w_many, recurse_level + 1);
355  if (tmp <= 0)
356  return (tmp);
357  }
358  } while (str != str_end && wildstr[0] != w_many);
359  return(-1);
360  }
361  }
362  return (str != str_end ? 1 : 0);
363 }
364 
365 int my_wildcmp_mb(const CHARSET_INFO *cs,
366  const char *str,const char *str_end,
367  const char *wildstr,const char *wildend,
368  int escape, int w_one, int w_many)
369 {
370  return my_wildcmp_mb_impl(cs, str, str_end,
371  wildstr, wildend,
372  escape, w_one, w_many, 1);
373 }
374 
375 
376 size_t my_numchars_mb(const CHARSET_INFO *cs __attribute__((unused)),
377  const char *pos, const char *end)
378 {
379  register size_t count= 0;
380  while (pos < end)
381  {
382  uint mb_len;
383  pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
384  count++;
385  }
386  return count;
387 }
388 
389 
390 size_t my_charpos_mb(const CHARSET_INFO *cs __attribute__((unused)),
391  const char *pos, const char *end, size_t length)
392 {
393  const char *start= pos;
394 
395  while (length && pos < end)
396  {
397  uint mb_len;
398  pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
399  length--;
400  }
401  return (size_t) (length ? end+2-start : pos-start);
402 }
403 
404 
405 size_t my_well_formed_len_mb(const CHARSET_INFO *cs, const char *b,
406  const char *e, size_t pos, int *error)
407 {
408  const char *b_start= b;
409  *error= 0;
410  while (pos)
411  {
412  my_wc_t wc;
413  int mb_len;
414 
415  if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0)
416  {
417  *error= b < e ? 1 : 0;
418  break;
419  }
420  b+= mb_len;
421  pos--;
422  }
423  return (size_t) (b - b_start);
424 }
425 
426 
427 uint my_instr_mb(const CHARSET_INFO *cs,
428  const char *b, size_t b_length,
429  const char *s, size_t s_length,
430  my_match_t *match, uint nmatch)
431 {
432  register const char *end, *b0;
433  int res= 0;
434 
435  if (s_length <= b_length)
436  {
437  if (!s_length)
438  {
439  if (nmatch)
440  {
441  match->beg= 0;
442  match->end= 0;
443  match->mb_len= 0;
444  }
445  return 1; /* Empty string is always found */
446  }
447 
448  b0= b;
449  end= b+b_length-s_length+1;
450 
451  while (b < end)
452  {
453  int mb_len;
454 
455  if (!cs->coll->strnncoll(cs, (uchar*) b, s_length,
456  (uchar*) s, s_length, 0))
457  {
458  if (nmatch)
459  {
460  match[0].beg= 0;
461  match[0].end= (size_t) (b-b0);
462  match[0].mb_len= res;
463  if (nmatch > 1)
464  {
465  match[1].beg= match[0].end;
466  match[1].end= match[0].end+s_length;
467  match[1].mb_len= 0; /* Not computed */
468  }
469  }
470  return 2;
471  }
472  mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
473  b+= mb_len;
474  b_length-= mb_len;
475  res++;
476  }
477  }
478  return 0;
479 }
480 
481 
482 /* BINARY collations handlers for MB charsets */
483 
484 int
485 my_strnncoll_mb_bin(const CHARSET_INFO *cs __attribute__((unused)),
486  const uchar *s, size_t slen,
487  const uchar *t, size_t tlen,
488  my_bool t_is_prefix)
489 {
490  size_t len= MY_MIN(slen,tlen);
491  int cmp= memcmp(s,t,len);
492  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
493 }
494 
495 
496 /*
497  Compare two strings.
498 
499  SYNOPSIS
500  my_strnncollsp_mb_bin()
501  cs Chararacter set
502  s String to compare
503  slen Length of 's'
504  t String to compare
505  tlen Length of 't'
506  diff_if_only_endspace_difference
507  Set to 1 if the strings should be regarded as different
508  if they only difference in end space
509 
510  NOTE
511  This function is used for character strings with binary collations.
512  The shorter string is extended with end space to be as long as the longer
513  one.
514 
515  RETURN
516  A negative number if s < t
517  A positive number if s > t
518  0 if strings are equal
519 */
520 
521 int
522 my_strnncollsp_mb_bin(const CHARSET_INFO *cs __attribute__((unused)),
523  const uchar *a, size_t a_length,
524  const uchar *b, size_t b_length,
525  my_bool diff_if_only_endspace_difference)
526 {
527  const uchar *end;
528  size_t length;
529  int res;
530 
531 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
532  diff_if_only_endspace_difference= 0;
533 #endif
534 
535  end= a + (length= MY_MIN(a_length, b_length));
536  while (a < end)
537  {
538  if (*a++ != *b++)
539  return ((int) a[-1] - (int) b[-1]);
540  }
541  res= 0;
542  if (a_length != b_length)
543  {
544  int swap= 1;
545  if (diff_if_only_endspace_difference)
546  res= 1; /* Assume 'a' is bigger */
547  /*
548  Check the next not space character of the longer key. If it's < ' ',
549  then it's smaller than the other key.
550  */
551  if (a_length < b_length)
552  {
553  /* put shorter key in s */
554  a_length= b_length;
555  a= b;
556  swap= -1; /* swap sign of result */
557  res= -res;
558  }
559  for (end= a + a_length-length; a < end ; a++)
560  {
561  if (*a != ' ')
562  return (*a < ' ') ? -swap : swap;
563  }
564  }
565  return res;
566 }
567 
568 
569 /*
570  Copy one non-ascii character.
571  "dst" must have enough room for the character.
572  Note, we don't use sort_order[] in this macros.
573  This is correct even for case insensitive collations:
574  - basic Latin letters are processed outside this macros;
575  - for other characters sort_order[x] is equal to x.
576 */
577 #define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \
578 { \
579  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
580  case 4: \
581  *dst++= *src++; \
582  /* fall through */ \
583  case 3: \
584  *dst++= *src++; \
585  /* fall through */ \
586  case 2: \
587  *dst++= *src++; \
588  /* fall through */ \
589  case 0: \
590  *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */ \
591  } \
592 }
593 
594 
595 /*
596  For character sets with two or three byte multi-byte
597  characters having multibyte weights *equal* to their codes:
598  cp932, euckr, gb2312, sjis, eucjpms, ujis.
599 */
600 size_t
601 my_strnxfrm_mb(const CHARSET_INFO *cs,
602  uchar *dst, size_t dstlen, uint nweights,
603  const uchar *src, size_t srclen, uint flags)
604 {
605  uchar *d0= dst;
606  uchar *de= dst + dstlen;
607  const uchar *se= src + srclen;
608  const uchar *sort_order= cs->sort_order;
609 
610  DBUG_ASSERT(cs->mbmaxlen <= 4);
611 
612  /*
613  If "srclen" is smaller than both "dstlen" and "nweights"
614  then we can run a simplified loop -
615  without checking "nweights" and "de".
616  */
617  if (dstlen >= srclen && nweights >= srclen)
618  {
619  if (sort_order)
620  {
621  /* Optimized version for a case insensitive collation */
622  for (; src < se; nweights--)
623  {
624  if (*src < 128) /* quickly catch ASCII characters */
625  *dst++= sort_order[*src++];
626  else
627  my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
628  }
629  }
630  else
631  {
632  /* Optimized version for a case sensitive collation (no sort_order) */
633  for (; src < se; nweights--)
634  {
635  if (*src < 128) /* quickly catch ASCII characters */
636  *dst++= *src++;
637  else
638  my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
639  }
640  }
641  goto pad;
642  }
643 
644  /*
645  A thourough loop, checking all possible limits:
646  "se", "nweights" and "de".
647  */
648  for (; src < se && nweights && dst < de; nweights--)
649  {
650  int chlen;
651  if (*src < 128 ||
652  !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
653  {
654  /* Single byte character */
655  *dst++= sort_order ? sort_order[*src++] : *src++;
656  }
657  else
658  {
659  /* Multi-byte character */
660  int len= (dst + chlen <= de) ? chlen : de - dst;
661  memcpy(dst, src, len);
662  dst+= len;
663  src+= len;
664  }
665  }
666 
667 pad:
668  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
669 }
670 
671 
672 int
673 my_strcasecmp_mb_bin(const CHARSET_INFO *cs __attribute__((unused)),
674  const char *s, const char *t)
675 {
676  return strcmp(s,t);
677 }
678 
679 
680 void
681 my_hash_sort_mb_bin(const CHARSET_INFO *cs __attribute__((unused)),
682  const uchar *key, size_t len,ulong *nr1, ulong *nr2)
683 {
684  const uchar *pos = key;
685 
686  /*
687  Remove trailing spaces. We have to do this to be able to compare
688  'A ' and 'A' as identical
689  */
690  key= skip_trailing_space(key, len);
691 
692  for (; pos < (uchar*) key ; pos++)
693  {
694  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
695  ((uint)*pos)) + (nr1[0] << 8);
696  nr2[0]+=3;
697  }
698 }
699 
700 
701 /*
702  Fill the given buffer with 'maximum character' for given charset
703  SYNOPSIS
704  pad_max_char()
705  cs Character set
706  str Start of buffer to fill
707  end End of buffer to fill
708 
709  DESCRIPTION
710  Write max key:
711  - for non-Unicode character sets:
712  just memset using max_sort_char if max_sort_char is one byte.
713  In case when max_sort_char is two bytes, fill with double-byte pairs
714  and optionally pad with a single space character.
715  - for Unicode character set (utf-8):
716  create a buffer with multibyte representation of the max_sort_char
717  character, and copy it into max_str in a loop.
718 */
719 static void pad_max_char(const CHARSET_INFO *cs, char *str, char *end)
720 {
721  char buf[10];
722  char buflen;
723 
724  if (!(cs->state & MY_CS_UNICODE))
725  {
726  if (cs->max_sort_char <= 255)
727  {
728  memset(str, cs->max_sort_char, end - str);
729  return;
730  }
731  buf[0]= cs->max_sort_char >> 8;
732  buf[1]= cs->max_sort_char & 0xFF;
733  buflen= 2;
734  }
735  else
736  {
737  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
738  (uchar*) buf + sizeof(buf));
739  }
740 
741  DBUG_ASSERT(buflen > 0);
742  do
743  {
744  if ((str + buflen) <= end)
745  {
746  /* Enough space for the characer */
747  memcpy(str, buf, buflen);
748  str+= buflen;
749  }
750  else
751  {
752  /*
753  There is no space for whole multibyte
754  character, then add trailing spaces.
755  */
756  *str++= ' ';
757  }
758  } while (str < end);
759 }
760 
761 /*
762 ** Calculate min_str and max_str that ranges a LIKE string.
763 ** Arguments:
764 ** ptr Pointer to LIKE string.
765 ** ptr_length Length of LIKE string.
766 ** escape Escape character in LIKE. (Normally '\').
767 ** All escape characters should be removed from min_str and max_str
768 ** res_length Length of min_str and max_str.
769 ** min_str Smallest case sensitive string that ranges LIKE.
770 ** Should be space padded to res_length.
771 ** max_str Largest case sensitive string that ranges LIKE.
772 ** Normally padded with the biggest character sort value.
773 **
774 ** The function should return 0 if ok and 1 if the LIKE string can't be
775 ** optimized !
776 */
777 
778 my_bool my_like_range_mb(const CHARSET_INFO *cs,
779  const char *ptr,size_t ptr_length,
780  pbool escape, pbool w_one, pbool w_many,
781  size_t res_length,
782  char *min_str,char *max_str,
783  size_t *min_length,size_t *max_length)
784 {
785  uint mb_len;
786  const char *end= ptr + ptr_length;
787  char *min_org= min_str;
788  char *min_end= min_str + res_length;
789  char *max_end= max_str + res_length;
790  size_t maxcharlen= res_length / cs->mbmaxlen;
791  const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
792 
793  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
794  {
795  /* We assume here that escape, w_any, w_namy are one-byte characters */
796  if (*ptr == escape && ptr+1 != end)
797  ptr++; /* Skip escape */
798  else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */
799  {
800 fill_max_and_min:
801  /*
802  Calculate length of keys:
803  'a\0\0... is the smallest possible string when we have space expand
804  a\ff\ff... is the biggest possible string
805  */
806  *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
807  res_length);
808  *max_length= res_length;
809  /* Create min key */
810  do
811  {
812  *min_str++= (char) cs->min_sort_char;
813  } while (min_str != min_end);
814 
815  /*
816  Write max key: create a buffer with multibyte
817  representation of the max_sort_char character,
818  and copy it into max_str in a loop.
819  */
820  *max_length= res_length;
821  pad_max_char(cs, max_str, max_end);
822  return 0;
823  }
824  if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
825  {
826  if (ptr+mb_len > end || min_str+mb_len > min_end)
827  break;
828  while (mb_len--)
829  *min_str++= *max_str++= *ptr++;
830  }
831  else
832  {
833  /*
834  Special case for collations with contractions.
835  For example, in Chezh, 'ch' is a separate letter
836  which is sorted between 'h' and 'i'.
837  If the pattern 'abc%', 'c' at the end can mean:
838  - letter 'c' itself,
839  - beginning of the contraction 'ch'.
840 
841  If we simply return this LIKE range:
842 
843  'abc\min\min\min' and 'abc\max\max\max'
844 
845  then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
846  will only find values starting from 'abc[^h]',
847  but won't find values starting from 'abch'.
848 
849  We must ignore contraction heads followed by w_one or w_many.
850  ('Contraction head' means any letter which can be the first
851  letter in a contraction)
852 
853  For example, for Czech 'abc%', we will return LIKE range,
854  which is equal to LIKE range for 'ab%':
855 
856  'ab\min\min\min\min' and 'ab\max\max\max\max'.
857 
858  */
859  if (contractions && ptr + 1 < end &&
860  my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
861  {
862  /* Ptr[0] is a contraction head. */
863 
864  if (ptr[1] == w_one || ptr[1] == w_many)
865  {
866  /* Contraction head followed by a wildcard, quit. */
867  goto fill_max_and_min;
868  }
869 
870  /*
871  Some letters can be both contraction heads and contraction tails.
872  For example, in Danish 'aa' is a separate single letter which
873  is sorted after 'z'. So 'a' can be both head and tail.
874 
875  If ptr[0]+ptr[1] is a contraction,
876  then put both letters together.
877 
878  If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
879  is not a contraction, then we put only ptr[0],
880  and continue with ptr[1] on the next loop.
881  */
882  if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
883  my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
884  {
885  /* Contraction found */
886  if (maxcharlen == 1 || min_str + 1 >= min_end)
887  {
888  /* Both contraction parts don't fit, quit */
889  goto fill_max_and_min;
890  }
891 
892  /* Put contraction head */
893  *min_str++= *max_str++= *ptr++;
894  maxcharlen--;
895  }
896  }
897  /* Put contraction tail, or a single character */
898  *min_str++= *max_str++= *ptr++;
899  }
900  }
901 
902  *min_length= *max_length = (size_t) (min_str - min_org);
903  while (min_str != min_end)
904  *min_str++= *max_str++= ' '; /* Because if key compression */
905  return 0;
906 }
907 
908 
933 my_bool
934 my_like_range_generic(const CHARSET_INFO *cs,
935  const char *ptr, size_t ptr_length,
936  pbool escape, pbool w_one, pbool w_many,
937  size_t res_length,
938  char *min_str,char *max_str,
939  size_t *min_length,size_t *max_length)
940 {
941  const char *end= ptr + ptr_length;
942  const char *min_org= min_str;
943  const char *max_org= max_str;
944  char *min_end= min_str + res_length;
945  char *max_end= max_str + res_length;
946  size_t charlen= res_length / cs->mbmaxlen;
947  size_t res_length_diff;
948  const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
949 
950  for ( ; charlen > 0; charlen--)
951  {
952  my_wc_t wc, wc2;
953  int res;
954  if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
955  {
956  if (res == MY_CS_ILSEQ) /* Bad sequence */
957  return TRUE; /* min_length and max_length are not important */
958  break; /* End of the string */
959  }
960  ptr+= res;
961 
962  if (wc == (my_wc_t) escape)
963  {
964  if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
965  {
966  if (res == MY_CS_ILSEQ)
967  return TRUE; /* min_length and max_length are not important */
968  /*
969  End of the string: Escape is the last character.
970  Put escape as a normal character.
971  We'll will leave the loop on the next iteration.
972  */
973  }
974  else
975  ptr+= res;
976 
977  /* Put escape character to min_str and max_str */
978  if ((res= cs->cset->wc_mb(cs, wc,
979  (uchar*) min_str, (uchar*) min_end)) <= 0)
980  goto pad_set_lengths; /* No space */
981  min_str+= res;
982 
983  if ((res= cs->cset->wc_mb(cs, wc,
984  (uchar*) max_str, (uchar*) max_end)) <= 0)
985  goto pad_set_lengths; /* No space */
986  max_str+= res;
987  continue;
988  }
989  else if (wc == (my_wc_t) w_one)
990  {
991  if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
992  (uchar*) min_str, (uchar*) min_end)) <= 0)
993  goto pad_set_lengths;
994  min_str+= res;
995 
996  if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
997  (uchar*) max_str, (uchar*) max_end)) <= 0)
998  goto pad_set_lengths;
999  max_str+= res;
1000  continue;
1001  }
1002  else if (wc == (my_wc_t) w_many)
1003  {
1004  /*
1005  Calculate length of keys:
1006  a\min\min... is the smallest possible string
1007  a\max\max... is the biggest possible string
1008  */
1009  *min_length= ((cs->state & MY_CS_BINSORT) ?
1010  (size_t) (min_str - min_org) :
1011  res_length);
1012  *max_length= res_length;
1013  goto pad_min_max;
1014  }
1015 
1016  if (contractions &&
1017  my_uca_can_be_contraction_head(contractions, wc) &&
1018  (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
1019  {
1020  uint16 *weight;
1021  if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
1022  {
1023  /* Contraction head followed by a wildcard */
1024  *min_length= *max_length= res_length;
1025  goto pad_min_max;
1026  }
1027 
1028  if (my_uca_can_be_contraction_tail(contractions, wc2) &&
1029  (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
1030  {
1031  /* Contraction found */
1032  if (charlen == 1)
1033  {
1034  /* contraction does not fit to result */
1035  *min_length= *max_length= res_length;
1036  goto pad_min_max;
1037  }
1038 
1039  ptr+= res;
1040  charlen--;
1041 
1042  /* Put contraction head */
1043  if ((res= cs->cset->wc_mb(cs, wc,
1044  (uchar*) min_str, (uchar*) min_end)) <= 0)
1045  goto pad_set_lengths;
1046  min_str+= res;
1047 
1048  if ((res= cs->cset->wc_mb(cs, wc,
1049  (uchar*) max_str, (uchar*) max_end)) <= 0)
1050  goto pad_set_lengths;
1051  max_str+= res;
1052  wc= wc2; /* Prepare to put contraction tail */
1053  }
1054  }
1055 
1056  /* Normal character, or contraction tail */
1057  if ((res= cs->cset->wc_mb(cs, wc,
1058  (uchar*) min_str, (uchar*) min_end)) <= 0)
1059  goto pad_set_lengths;
1060  min_str+= res;
1061  if ((res= cs->cset->wc_mb(cs, wc,
1062  (uchar*) max_str, (uchar*) max_end)) <= 0)
1063  goto pad_set_lengths;
1064  max_str+= res;
1065  }
1066 
1067 pad_set_lengths:
1068  *min_length= (size_t) (min_str - min_org);
1069  *max_length= (size_t) (max_str - max_org);
1070 
1071 pad_min_max:
1072  /*
1073  Fill up max_str and min_str to res_length.
1074  fill() cannot set incomplete characters and
1075  requires that "length" argument is divisible to mbminlen.
1076  Make sure to call fill() with proper "length" argument.
1077  */
1078  res_length_diff= res_length % cs->mbminlen;
1079  cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
1080  cs->min_sort_char);
1081  cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
1082  cs->max_sort_char);
1083 
1084  /* In case of incomplete characters set the remainder to 0x00's */
1085  if (res_length_diff)
1086  {
1087  /* Example: odd res_length for ucs2 */
1088  memset(min_end - res_length_diff, 0, res_length_diff);
1089  memset(max_end - res_length_diff, 0, res_length_diff);
1090  }
1091  return FALSE;
1092 }
1093 
1094 
1095 static
1096 int
1097 my_wildcmp_mb_bin_impl(const CHARSET_INFO *cs,
1098  const char *str,const char *str_end,
1099  const char *wildstr,const char *wildend,
1100  int escape, int w_one, int w_many, int recurse_level)
1101 {
1102  int result= -1; /* Not found, using wildcards */
1103 
1104  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
1105  return 1;
1106  while (wildstr != wildend)
1107  {
1108  while (*wildstr != w_many && *wildstr != w_one)
1109  {
1110  int l;
1111  if (*wildstr == escape && wildstr+1 != wildend)
1112  wildstr++;
1113  if ((l = my_ismbchar(cs, wildstr, wildend)))
1114  {
1115  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
1116  return 1;
1117  str += l;
1118  wildstr += l;
1119  }
1120  else
1121  if (str == str_end || *wildstr++ != *str++)
1122  return(1); /* No match */
1123  if (wildstr == wildend)
1124  return (str != str_end); /* Match if both are at end */
1125  result=1; /* Found an anchor char */
1126  }
1127  if (*wildstr == w_one)
1128  {
1129  do
1130  {
1131  if (str == str_end) /* Skip one char if possible */
1132  return (result);
1133  INC_PTR(cs,str,str_end);
1134  } while (++wildstr < wildend && *wildstr == w_one);
1135  if (wildstr == wildend)
1136  break;
1137  }
1138  if (*wildstr == w_many)
1139  { /* Found w_many */
1140  uchar cmp;
1141  const char* mb = wildstr;
1142  int mb_len=0;
1143 
1144  wildstr++;
1145  /* Remove any '%' and '_' from the wild search string */
1146  for (; wildstr != wildend ; wildstr++)
1147  {
1148  if (*wildstr == w_many)
1149  continue;
1150  if (*wildstr == w_one)
1151  {
1152  if (str == str_end)
1153  return (-1);
1154  INC_PTR(cs,str,str_end);
1155  continue;
1156  }
1157  break; /* Not a wild character */
1158  }
1159  if (wildstr == wildend)
1160  return(0); /* Ok if w_many is last */
1161  if (str == str_end)
1162  return -1;
1163 
1164  if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
1165  cmp= *++wildstr;
1166 
1167  mb=wildstr;
1168  mb_len= my_ismbchar(cs, wildstr, wildend);
1169  INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
1170  do
1171  {
1172  for (;;)
1173  {
1174  if (str >= str_end)
1175  return -1;
1176  if (mb_len)
1177  {
1178  if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
1179  {
1180  str += mb_len;
1181  break;
1182  }
1183  }
1184  else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
1185  {
1186  str++;
1187  break;
1188  }
1189  INC_PTR(cs,str, str_end);
1190  }
1191  {
1192  int tmp=my_wildcmp_mb_bin_impl(cs,str,str_end,
1193  wildstr,wildend,escape,
1194  w_one,w_many, recurse_level + 1);
1195  if (tmp <= 0)
1196  return (tmp);
1197  }
1198  } while (str != str_end && wildstr[0] != w_many);
1199  return(-1);
1200  }
1201  }
1202  return (str != str_end ? 1 : 0);
1203 }
1204 
1205 int
1206 my_wildcmp_mb_bin(const CHARSET_INFO *cs,
1207  const char *str,const char *str_end,
1208  const char *wildstr,const char *wildend,
1209  int escape, int w_one, int w_many)
1210 {
1211  return my_wildcmp_mb_bin_impl(cs, str, str_end,
1212  wildstr, wildend,
1213  escape, w_one, w_many, 1);
1214 }
1215 
1216 
1217 /*
1218  Data was produced from EastAsianWidth.txt
1219  using utt11-dump utility.
1220 */
1221 static char pg11[256]=
1222 {
1223 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1224 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1225 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
1226 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1227 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1228 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1229 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1230 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1231 };
1232 
1233 static char pg23[256]=
1234 {
1235 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1236 0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1237 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1238 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1239 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1240 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1241 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1242 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1243 };
1244 
1245 static char pg2E[256]=
1246 {
1247 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1248 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1249 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1250 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1251 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
1252 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1253 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1254 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
1255 };
1256 
1257 static char pg2F[256]=
1258 {
1259 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1260 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1261 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1263 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1264 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1265 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
1266 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
1267 };
1268 
1269 static char pg30[256]=
1270 {
1271 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1272 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
1273 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1274 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1275 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
1276 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1277 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1278 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1279 };
1280 
1281 static char pg31[256]=
1282 {
1283 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1284 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1285 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1286 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1287 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1288 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
1289 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1290 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1291 };
1292 
1293 static char pg32[256]=
1294 {
1295 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
1296 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1297 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1298 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
1299 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1300 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1301 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1302 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
1303 };
1304 
1305 static char pg4D[256]=
1306 {
1307 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1308 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1309 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1310 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1311 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1312 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
1313 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1314 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1315 };
1316 
1317 static char pg9F[256]=
1318 {
1319 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1320 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1321 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1322 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1323 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1324 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1325 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1326 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1327 };
1328 
1329 static char pgA4[256]=
1330 {
1331 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1332 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1333 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1334 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1335 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1336 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1337 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1338 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1339 };
1340 
1341 static char pgD7[256]=
1342 {
1343 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1344 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1345 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1346 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1347 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1348 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1349 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1350 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1351 };
1352 
1353 static char pgFA[256]=
1354 {
1355 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1356 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1357 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1358 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1359 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1360 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1361 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1362 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1363 };
1364 
1365 static char pgFE[256]=
1366 {
1367 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1368 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1369 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
1370 1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1371 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1372 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1373 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1374 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1375 };
1376 
1377 static char pgFF[256]=
1378 {
1379 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1380 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1381 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1382 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1383 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1384 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1385 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1386 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1387 };
1388 
1389 static struct {int page; char *p;} utr11_data[256]=
1390 {
1391 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1392 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1393 {0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1394 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1395 {0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1396 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1397 {0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1398 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1399 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1400 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1401 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1402 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1403 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1404 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1405 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1406 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1407 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1408 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1409 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1410 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1411 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1412 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1413 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1414 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1415 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1416 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1417 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1418 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1419 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1420 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1421 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1422 {0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1423 };
1424 
1425 
1426 size_t my_numcells_mb(const CHARSET_INFO *cs, const char *b, const char *e)
1427 {
1428  my_wc_t wc;
1429  size_t clen= 0;
1430 
1431  while (b < e)
1432  {
1433  int mb_len;
1434  uint pg;
1435  if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0 ||
1436  wc > 0xFFFF)
1437  {
1438  /*
1439  Let's think a wrong sequence takes 1 dysplay cell.
1440  Also, consider supplementary characters as taking one cell.
1441  */
1442  mb_len= 1;
1443  b++;
1444  continue;
1445  }
1446  b+= mb_len;
1447  if (wc > 0xFFFF)
1448  {
1449  if (wc >= 0x20000 && wc <= 0x3FFFD) /* CJK Ideograph Extension B, C */
1450  clen+= 1;
1451  }
1452  else
1453  {
1454  pg= (wc >> 8) & 0xFF;
1455  clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1456  }
1457  clen++;
1458  }
1459  return clen;
1460 }
1461 
1462 
1463 int my_mb_ctype_mb(const CHARSET_INFO *cs, int *ctype,
1464  const uchar *s, const uchar *e)
1465 {
1466  my_wc_t wc;
1467  int res= cs->cset->mb_wc(cs, &wc, s, e);
1468  if (res <= 0 || wc > 0xFFFF)
1469  *ctype= 0;
1470  else
1471  *ctype= my_uni_ctype[wc>>8].ctype ?
1472  my_uni_ctype[wc>>8].ctype[wc&0xFF] :
1473  my_uni_ctype[wc>>8].pctype;
1474  return res;
1475 }
1476 
1477 
1478 MY_COLLATION_HANDLER my_collation_mb_bin_handler =
1479 {
1480  NULL, /* init */
1481  my_strnncoll_mb_bin,
1482  my_strnncollsp_mb_bin,
1483  my_strnxfrm_mb,
1484  my_strnxfrmlen_simple,
1485  my_like_range_mb,
1486  my_wildcmp_mb_bin,
1487  my_strcasecmp_mb_bin,
1488  my_instr_mb,
1489  my_hash_sort_mb_bin,
1490  my_propagate_simple
1491 };
1492 
1493 
1494 #endif