MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ctype-ucs2.c
1 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This library is free software; you can redistribute it and/or
4  modify it under the terms of the GNU Library General Public
5  License as published by the Free Software Foundation; version 2
6  of the License.
7 
8  This library is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  Library General Public License for more details.
12 
13  You should have received a copy of the GNU Library General Public
14  License along with this library; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16 
17 /* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
18 
19 #include <my_global.h>
20 #include <my_sys.h>
21 #include "m_string.h"
22 #include "m_ctype.h"
23 #include <errno.h>
24 #include <stdarg.h>
25 
26 
27 #if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
28 #define HAVE_CHARSET_mb2
29 #endif
30 
31 
32 #if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
33 #define HAVE_CHARSET_mb2_or_mb4
34 #endif
35 
36 
37 #ifndef EILSEQ
38 #define EILSEQ ENOENT
39 #endif
40 
41 #undef ULONGLONG_MAX
42 #define ULONGLONG_MAX (~(ulonglong) 0)
43 #define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000))
44 #define INIT_CNT 9
45 #define LFACTOR ULL(1000000000)
46 #define LFACTOR1 ULL(10000000000)
47 #define LFACTOR2 ULL(100000000000)
48 
49 static unsigned long lfactor[9]=
50 { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
51 
52 
53 
54 #ifdef HAVE_CHARSET_mb2_or_mb4
55 static inline int
56 my_bincmp(const uchar *s, const uchar *se,
57  const uchar *t, const uchar *te)
58 {
59  int slen= (int) (se - s), tlen= (int) (te - t);
60  int len= MY_MIN(slen, tlen);
61  int cmp= memcmp(s, t, len);
62  return cmp ? cmp : slen - tlen;
63 }
64 
65 
66 static size_t
67 my_caseup_str_mb2_or_mb4(const CHARSET_INFO * cs __attribute__((unused)),
68  char * s __attribute__((unused)))
69 {
70  DBUG_ASSERT(0);
71  return 0;
72 }
73 
74 
75 static size_t
76 my_casedn_str_mb2_or_mb4(const CHARSET_INFO *cs __attribute__((unused)),
77  char * s __attribute__((unused)))
78 {
79  DBUG_ASSERT(0);
80  return 0;
81 }
82 
83 
84 static int
85 my_strcasecmp_mb2_or_mb4(const CHARSET_INFO *cs __attribute__((unused)),
86  const char *s __attribute__((unused)),
87  const char *t __attribute__((unused)))
88 {
89  DBUG_ASSERT(0);
90  return 0;
91 }
92 
93 
94 static long
95 my_strntol_mb2_or_mb4(const CHARSET_INFO *cs,
96  const char *nptr, size_t l, int base,
97  char **endptr, int *err)
98 {
99  int negative= 0;
100  int overflow;
101  int cnv;
102  my_wc_t wc;
103  register unsigned int cutlim;
104  register uint32 cutoff;
105  register uint32 res;
106  register const uchar *s= (const uchar*) nptr;
107  register const uchar *e= (const uchar*) nptr+l;
108  const uchar *save;
109 
110  *err= 0;
111  do
112  {
113  if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0)
114  {
115  switch (wc)
116  {
117  case ' ' : break;
118  case '\t': break;
119  case '-' : negative= !negative; break;
120  case '+' : break;
121  default : goto bs;
122  }
123  }
124  else /* No more characters or bad multibyte sequence */
125  {
126  if (endptr != NULL )
127  *endptr= (char*) s;
128  err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
129  return 0;
130  }
131  s+= cnv;
132  } while (1);
133 
134 bs:
135 
136  overflow= 0;
137  res= 0;
138  save= s;
139  cutoff= ((uint32)~0L) / (uint32) base;
140  cutlim= (uint) (((uint32)~0L) % (uint32) base);
141 
142  do {
143  if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
144  {
145  s+= cnv;
146  if (wc >= '0' && wc <= '9')
147  wc-= '0';
148  else if (wc >= 'A' && wc <= 'Z')
149  wc= wc - 'A' + 10;
150  else if (wc >= 'a' && wc <= 'z')
151  wc= wc - 'a' + 10;
152  else
153  break;
154  if ((int)wc >= base)
155  break;
156  if (res > cutoff || (res == cutoff && wc > cutlim))
157  overflow= 1;
158  else
159  {
160  res*= (uint32) base;
161  res+= wc;
162  }
163  }
164  else if (cnv == MY_CS_ILSEQ)
165  {
166  if (endptr !=NULL )
167  *endptr = (char*) s;
168  err[0]= EILSEQ;
169  return 0;
170  }
171  else
172  {
173  /* No more characters */
174  break;
175  }
176  } while(1);
177 
178  if (endptr != NULL)
179  *endptr = (char *) s;
180 
181  if (s == save)
182  {
183  err[0]= EDOM;
184  return 0L;
185  }
186 
187  if (negative)
188  {
189  if (res > (uint32) INT_MIN32)
190  overflow= 1;
191  }
192  else if (res > INT_MAX32)
193  overflow= 1;
194 
195  if (overflow)
196  {
197  err[0]= ERANGE;
198  return negative ? INT_MIN32 : INT_MAX32;
199  }
200 
201  return (negative ? -((long) res) : (long) res);
202 }
203 
204 
205 static ulong
206 my_strntoul_mb2_or_mb4(const CHARSET_INFO *cs,
207  const char *nptr, size_t l, int base,
208  char **endptr, int *err)
209 {
210  int negative= 0;
211  int overflow;
212  int cnv;
213  my_wc_t wc;
214  register unsigned int cutlim;
215  register uint32 cutoff;
216  register uint32 res;
217  register const uchar *s= (const uchar*) nptr;
218  register const uchar *e= (const uchar*) nptr + l;
219  const uchar *save;
220 
221  *err= 0;
222  do
223  {
224  if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
225  {
226  switch (wc)
227  {
228  case ' ' : break;
229  case '\t': break;
230  case '-' : negative= !negative; break;
231  case '+' : break;
232  default : goto bs;
233  }
234  }
235  else /* No more characters or bad multibyte sequence */
236  {
237  if (endptr !=NULL )
238  *endptr= (char*)s;
239  err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
240  return 0;
241  }
242  s+= cnv;
243  } while (1);
244 
245 bs:
246 
247  overflow= 0;
248  res= 0;
249  save= s;
250  cutoff= ((uint32)~0L) / (uint32) base;
251  cutlim= (uint) (((uint32)~0L) % (uint32) base);
252 
253  do
254  {
255  if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
256  {
257  s+= cnv;
258  if (wc >= '0' && wc <= '9')
259  wc-= '0';
260  else if (wc >= 'A' && wc <= 'Z')
261  wc= wc - 'A' + 10;
262  else if (wc >= 'a' && wc <= 'z')
263  wc= wc - 'a' + 10;
264  else
265  break;
266  if ((int) wc >= base)
267  break;
268  if (res > cutoff || (res == cutoff && wc > cutlim))
269  overflow = 1;
270  else
271  {
272  res*= (uint32) base;
273  res+= wc;
274  }
275  }
276  else if (cnv == MY_CS_ILSEQ)
277  {
278  if (endptr != NULL )
279  *endptr= (char*)s;
280  err[0]= EILSEQ;
281  return 0;
282  }
283  else
284  {
285  /* No more characters */
286  break;
287  }
288  } while(1);
289 
290  if (endptr != NULL)
291  *endptr= (char *) s;
292 
293  if (s == save)
294  {
295  err[0]= EDOM;
296  return 0L;
297  }
298 
299  if (overflow)
300  {
301  err[0]= (ERANGE);
302  return (~(uint32) 0);
303  }
304 
305  return (negative ? -((long) res) : (long) res);
306 }
307 
308 
309 static longlong
310 my_strntoll_mb2_or_mb4(const CHARSET_INFO *cs,
311  const char *nptr, size_t l, int base,
312  char **endptr, int *err)
313 {
314  int negative=0;
315  int overflow;
316  int cnv;
317  my_wc_t wc;
318  register ulonglong cutoff;
319  register unsigned int cutlim;
320  register ulonglong res;
321  register const uchar *s= (const uchar*) nptr;
322  register const uchar *e= (const uchar*) nptr+l;
323  const uchar *save;
324 
325  *err= 0;
326  do
327  {
328  if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
329  {
330  switch (wc)
331  {
332  case ' ' : break;
333  case '\t': break;
334  case '-' : negative= !negative; break;
335  case '+' : break;
336  default : goto bs;
337  }
338  }
339  else /* No more characters or bad multibyte sequence */
340  {
341  if (endptr !=NULL )
342  *endptr = (char*)s;
343  err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
344  return 0;
345  }
346  s+=cnv;
347  } while (1);
348 
349 bs:
350 
351  overflow = 0;
352  res = 0;
353  save = s;
354  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
355  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
356 
357  do {
358  if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
359  {
360  s+=cnv;
361  if ( wc>='0' && wc<='9')
362  wc -= '0';
363  else if ( wc>='A' && wc<='Z')
364  wc = wc - 'A' + 10;
365  else if ( wc>='a' && wc<='z')
366  wc = wc - 'a' + 10;
367  else
368  break;
369  if ((int)wc >= base)
370  break;
371  if (res > cutoff || (res == cutoff && wc > cutlim))
372  overflow = 1;
373  else
374  {
375  res *= (ulonglong) base;
376  res += wc;
377  }
378  }
379  else if (cnv==MY_CS_ILSEQ)
380  {
381  if (endptr !=NULL )
382  *endptr = (char*)s;
383  err[0]=EILSEQ;
384  return 0;
385  }
386  else
387  {
388  /* No more characters */
389  break;
390  }
391  } while(1);
392 
393  if (endptr != NULL)
394  *endptr = (char *) s;
395 
396  if (s == save)
397  {
398  err[0]=EDOM;
399  return 0L;
400  }
401 
402  if (negative)
403  {
404  if (res > (ulonglong) LONGLONG_MIN)
405  overflow = 1;
406  }
407  else if (res > (ulonglong) LONGLONG_MAX)
408  overflow = 1;
409 
410  if (overflow)
411  {
412  err[0]=ERANGE;
413  return negative ? LONGLONG_MIN : LONGLONG_MAX;
414  }
415 
416  return (negative ? -((longlong)res) : (longlong)res);
417 }
418 
419 
420 static ulonglong
421 my_strntoull_mb2_or_mb4(const CHARSET_INFO *cs,
422  const char *nptr, size_t l, int base,
423  char **endptr, int *err)
424 {
425  int negative= 0;
426  int overflow;
427  int cnv;
428  my_wc_t wc;
429  register ulonglong cutoff;
430  register unsigned int cutlim;
431  register ulonglong res;
432  register const uchar *s= (const uchar*) nptr;
433  register const uchar *e= (const uchar*) nptr + l;
434  const uchar *save;
435 
436  *err= 0;
437  do
438  {
439  if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0)
440  {
441  switch (wc)
442  {
443  case ' ' : break;
444  case '\t': break;
445  case '-' : negative= !negative; break;
446  case '+' : break;
447  default : goto bs;
448  }
449  }
450  else /* No more characters or bad multibyte sequence */
451  {
452  if (endptr !=NULL )
453  *endptr = (char*)s;
454  err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
455  return 0;
456  }
457  s+=cnv;
458  } while (1);
459 
460 bs:
461 
462  overflow = 0;
463  res = 0;
464  save = s;
465  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
466  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
467 
468  do
469  {
470  if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
471  {
472  s+=cnv;
473  if ( wc>='0' && wc<='9')
474  wc -= '0';
475  else if ( wc>='A' && wc<='Z')
476  wc = wc - 'A' + 10;
477  else if ( wc>='a' && wc<='z')
478  wc = wc - 'a' + 10;
479  else
480  break;
481  if ((int)wc >= base)
482  break;
483  if (res > cutoff || (res == cutoff && wc > cutlim))
484  overflow = 1;
485  else
486  {
487  res *= (ulonglong) base;
488  res += wc;
489  }
490  }
491  else if (cnv==MY_CS_ILSEQ)
492  {
493  if (endptr !=NULL )
494  *endptr = (char*)s;
495  err[0]= EILSEQ;
496  return 0;
497  }
498  else
499  {
500  /* No more characters */
501  break;
502  }
503  } while(1);
504 
505  if (endptr != NULL)
506  *endptr = (char *) s;
507 
508  if (s == save)
509  {
510  err[0]= EDOM;
511  return 0L;
512  }
513 
514  if (overflow)
515  {
516  err[0]= ERANGE;
517  return (~(ulonglong) 0);
518  }
519 
520  return (negative ? -((longlong) res) : (longlong) res);
521 }
522 
523 
524 static double
525 my_strntod_mb2_or_mb4(const CHARSET_INFO *cs,
526  char *nptr, size_t length,
527  char **endptr, int *err)
528 {
529  char buf[256];
530  double res;
531  register char *b= buf;
532  register const uchar *s= (const uchar*) nptr;
533  const uchar *end;
534  my_wc_t wc;
535  int cnv;
536 
537  *err= 0;
538  /* Cut too long strings */
539  if (length >= sizeof(buf))
540  length= sizeof(buf) - 1;
541  end= s + length;
542 
543  while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
544  {
545  s+= cnv;
546  if (wc > (int) (uchar) 'e' || !wc)
547  break; /* Can't be part of double */
548  *b++= (char) wc;
549  }
550 
551  *endptr= b;
552  res= my_strtod(buf, endptr, err);
553  *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
554  return res;
555 }
556 
557 
558 static ulonglong
559 my_strntoull10rnd_mb2_or_mb4(const CHARSET_INFO *cs,
560  const char *nptr, size_t length,
561  int unsign_fl,
562  char **endptr, int *err)
563 {
564  char buf[256], *b= buf;
565  ulonglong res;
566  const uchar *end, *s= (const uchar*) nptr;
567  my_wc_t wc;
568  int cnv;
569 
570  /* Cut too long strings */
571  if (length >= sizeof(buf))
572  length= sizeof(buf)-1;
573  end= s + length;
574 
575  while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
576  {
577  s+= cnv;
578  if (wc > (int) (uchar) 'e' || !wc)
579  break; /* Can't be a number part */
580  *b++= (char) wc;
581  }
582 
583  res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
584  *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
585  return res;
586 }
587 
588 
589 /*
590  This is a fast version optimized for the case of radix 10 / -10
591 */
592 
593 static size_t
594 my_l10tostr_mb2_or_mb4(const CHARSET_INFO *cs,
595  char *dst, size_t len, int radix, long int val)
596 {
597  char buffer[66];
598  register char *p, *db, *de;
599  long int new_val;
600  int sl= 0;
601  unsigned long int uval = (unsigned long int) val;
602 
603  p= &buffer[sizeof(buffer) - 1];
604  *p= '\0';
605 
606  if (radix < 0)
607  {
608  if (val < 0)
609  {
610  sl= 1;
611  /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
612  uval = (unsigned long int)0 - uval;
613  }
614  }
615 
616  new_val = (long) (uval / 10);
617  *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
618  val= new_val;
619 
620  while (val != 0)
621  {
622  new_val= val / 10;
623  *--p= '0' + (char) (val - new_val * 10);
624  val= new_val;
625  }
626 
627  if (sl)
628  {
629  *--p= '-';
630  }
631 
632  for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
633  {
634  int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
635  if (cnvres > 0)
636  dst+= cnvres;
637  else
638  break;
639  }
640  return (int) (dst - db);
641 }
642 
643 
644 static size_t
645 my_ll10tostr_mb2_or_mb4(const CHARSET_INFO *cs,
646  char *dst, size_t len, int radix, longlong val)
647 {
648  char buffer[65];
649  register char *p, *db, *de;
650  long long_val;
651  int sl= 0;
652  ulonglong uval= (ulonglong) val;
653 
654  if (radix < 0)
655  {
656  if (val < 0)
657  {
658  sl= 1;
659  /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
660  uval = (ulonglong)0 - uval;
661  }
662  }
663 
664  p= &buffer[sizeof(buffer)-1];
665  *p='\0';
666 
667  if (uval == 0)
668  {
669  *--p= '0';
670  goto cnv;
671  }
672 
673  while (uval > (ulonglong) LONG_MAX)
674  {
675  ulonglong quo= uval/(uint) 10;
676  uint rem= (uint) (uval- quo* (uint) 10);
677  *--p= '0' + rem;
678  uval= quo;
679  }
680 
681  long_val= (long) uval;
682  while (long_val != 0)
683  {
684  long quo= long_val/10;
685  *--p= (char) ('0' + (long_val - quo*10));
686  long_val= quo;
687  }
688 
689 cnv:
690  if (sl)
691  {
692  *--p= '-';
693  }
694 
695  for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
696  {
697  int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
698  if (cnvres > 0)
699  dst+= cnvres;
700  else
701  break;
702  }
703  return (int) (dst -db);
704 }
705 
706 #endif /* HAVE_CHARSET_mb2_or_mb4 */
707 
708 
709 #ifdef HAVE_CHARSET_mb2
710 static longlong
711 my_strtoll10_mb2(const CHARSET_INFO *cs,
712  const char *nptr, char **endptr, int *error)
713 {
714  const char *s, *end, *start, *n_end, *true_end;
715  uchar c;
716  unsigned long i, j, k;
717  ulonglong li;
718  int negative;
719  ulong cutoff, cutoff2, cutoff3;
720  my_wc_t wc;
721  int res;
722 
723  s= nptr;
724  /* If fixed length string */
725  if (endptr)
726  {
727  /*
728  Make sure string length is even.
729  Odd length indicates a bug in the caller.
730  Assert in debug, round in production.
731  */
732  DBUG_ASSERT((*endptr - s) % 2 == 0);
733  end= s + ((*endptr - s) / 2) * 2;
734 
735  for ( ; ; ) /* Skip leading spaces and tabs */
736  {
737  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
738  if (res <= 0)
739  goto no_conv;
740  s+= res;
741  if (wc != ' ' && wc != '\t')
742  break;
743  }
744  }
745  else
746  {
747  /* We don't support null terminated strings in UCS2 */
748  goto no_conv;
749  }
750 
751  /* Check for a sign. */
752  negative= 0;
753  if (wc == '-')
754  {
755  *error= -1; /* Mark as negative number */
756  negative= 1;
757  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
758  if (res < 0)
759  goto no_conv;
760  s+= res;
761  cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
762  cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
763  cutoff3= MAX_NEGATIVE_NUMBER % 100;
764  }
765  else
766  {
767  *error= 0;
768  if (wc == '+')
769  {
770  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
771  if (res < 0)
772  goto no_conv;
773  s+= res;
774  }
775  cutoff= ULONGLONG_MAX / LFACTOR2;
776  cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
777  cutoff3= ULONGLONG_MAX % 100;
778  }
779 
780 
781  /* Handle case where we have a lot of pre-zero */
782  if (wc == '0')
783  {
784  i= 0;
785  for ( ; ; s+= res)
786  {
787  if (s == end)
788  goto end_i; /* Return 0 */
789  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
790  if (res < 0)
791  goto no_conv;
792  if (wc != '0')
793  break;
794  }
795  while (wc == '0');
796  n_end= s + 2 * INIT_CNT;
797  }
798  else
799  {
800  /* Read first digit to check that it's a valid number */
801  if ((c= (wc - '0')) > 9)
802  goto no_conv;
803  i= c;
804  n_end= s + 2 * (INIT_CNT-1);
805  }
806 
807  /* Handle first 9 digits and store them in i */
808  if (n_end > end)
809  n_end= end;
810  for ( ; ; )
811  {
812  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) n_end);
813  if (res < 0)
814  break;
815  s+= res;
816  if ((c= (wc - '0')) > 9)
817  goto end_i;
818  i= i*10+c;
819  }
820  if (s == end)
821  goto end_i;
822 
823  /* Handle next 9 digits and store them in j */
824  j= 0;
825  start= s; /* Used to know how much to shift i */
826  n_end= true_end= s + 2 * INIT_CNT;
827  if (n_end > end)
828  n_end= end;
829  do
830  {
831  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
832  if (res < 0)
833  goto no_conv;
834  s+= res;
835  if ((c= (wc - '0')) > 9)
836  goto end_i_and_j;
837  j= j*10+c;
838  } while (s != n_end);
839  if (s == end)
840  {
841  if (s != true_end)
842  goto end_i_and_j;
843  goto end3;
844  }
845  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
846  if (res < 0)
847  goto no_conv;
848  s+= res;
849  if ((c= (wc - '0')) > 9)
850  goto end3;
851 
852  /* Handle the next 1 or 2 digits and store them in k */
853  k=c;
854  if (s == end)
855  goto end4;
856  res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
857  if (res < 0)
858  goto no_conv;
859  s+= res;
860  if ((c= (wc - '0')) > 9)
861  goto end4;
862  k= k*10+c;
863  *endptr= (char*) s;
864 
865  /* number string should have ended here */
866  if (s != end && (c= (wc - '0')) <= 9)
867  goto overflow;
868 
869  /* Check that we didn't get an overflow with the last digit */
870  if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
871  k > cutoff3)))
872  goto overflow;
873  li=i*LFACTOR2+ (ulonglong) j*100 + k;
874  return (longlong) li;
875 
876 overflow: /* *endptr is set here */
877  *error= MY_ERRNO_ERANGE;
878  return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
879 
880 end_i:
881  *endptr= (char*) s;
882  return (negative ? ((longlong) -(long) i) : (longlong) i);
883 
884 end_i_and_j:
885  li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
886  *endptr= (char*) s;
887  return (negative ? -((longlong) li) : (longlong) li);
888 
889 end3:
890  li=(ulonglong) i*LFACTOR+ (ulonglong) j;
891  *endptr= (char*) s;
892  return (negative ? -((longlong) li) : (longlong) li);
893 
894 end4:
895  li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
896  *endptr= (char*) s;
897  if (negative)
898  {
899  if (li > MAX_NEGATIVE_NUMBER)
900  goto overflow;
901  return -((longlong) li);
902  }
903  return (longlong) li;
904 
905 no_conv:
906  /* There was no number to convert. */
907  *error= MY_ERRNO_EDOM;
908  *endptr= (char *) nptr;
909  return 0;
910 }
911 
912 
913 static size_t
914 my_scan_mb2(const CHARSET_INFO *cs,
915  const char *str, const char *end, int sequence_type)
916 {
917  const char *str0= str;
918  my_wc_t wc;
919  int res;
920 
921  switch (sequence_type)
922  {
923  case MY_SEQ_SPACES:
924  for (res= cs->cset->mb_wc(cs, &wc,
925  (const uchar *) str, (const uchar *) end);
926  res > 0 && wc == ' ';
927  str+= res,
928  res= cs->cset->mb_wc(cs, &wc,
929  (const uchar *) str, (const uchar *) end))
930  {
931  }
932  return (size_t) (str - str0);
933  default:
934  return 0;
935  }
936 }
937 
938 
939 static void
940 my_fill_mb2(const CHARSET_INFO *cs, char *s, size_t slen, int fill)
941 {
942  char buf[10];
943  int buflen;
944 
945  DBUG_ASSERT((slen % 2) == 0);
946 
947  buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
948  (uchar*) buf + sizeof(buf));
949 
950  DBUG_ASSERT(buflen > 0);
951 
952  while (slen >= (size_t) buflen)
953  {
954  /* Enough space for the characer */
955  memcpy(s, buf, (size_t) buflen);
956  s+= buflen;
957  slen-= buflen;
958  }
959 
960  /*
961  If there are some more space which is not enough
962  for the whole multibyte character, then add trailing zeros.
963  */
964  for ( ; slen; slen--)
965  {
966  *s++= 0x00;
967  }
968 }
969 
970 
971 static int
972 my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
973 {
974  char *start=dst, *end= dst + n - 1;
975  for (; *fmt ; fmt++)
976  {
977  if (fmt[0] != '%')
978  {
979  if (dst == end) /* End of buffer */
980  break;
981 
982  *dst++='\0';
983  *dst++= *fmt; /* Copy ordinary char */
984  continue;
985  }
986 
987  fmt++;
988 
989  /* Skip if max size is used (to be compatible with printf) */
990  while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
991  fmt++;
992 
993  if (*fmt == 'l')
994  fmt++;
995 
996  if (*fmt == 's') /* String parameter */
997  {
998  char *par= va_arg(ap, char *);
999  size_t plen;
1000  size_t left_len= (size_t)(end-dst);
1001  if (!par)
1002  par= (char*) "(null)";
1003  plen= strlen(par);
1004  if (left_len <= plen * 2)
1005  plen = left_len / 2 - 1;
1006 
1007  for ( ; plen ; plen--, dst+=2, par++)
1008  {
1009  dst[0]= '\0';
1010  dst[1]= par[0];
1011  }
1012  continue;
1013  }
1014  else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
1015  {
1016  int iarg;
1017  char nbuf[16];
1018  char *pbuf= nbuf;
1019 
1020  if ((size_t) (end - dst) < 32)
1021  break;
1022  iarg= va_arg(ap, int);
1023  if (*fmt == 'd')
1024  int10_to_str((long) iarg, nbuf, -10);
1025  else
1026  int10_to_str((long) (uint) iarg, nbuf,10);
1027 
1028  for (; pbuf[0]; pbuf++)
1029  {
1030  *dst++= '\0';
1031  *dst++= *pbuf;
1032  }
1033  continue;
1034  }
1035 
1036  /* We come here on '%%', unknown code or too long parameter */
1037  if (dst == end)
1038  break;
1039  *dst++= '\0';
1040  *dst++= '%'; /* % used as % or unknown code */
1041  }
1042 
1043  DBUG_ASSERT(dst <= end);
1044  *dst='\0'; /* End of errmessage */
1045  return (size_t) (dst - start);
1046 }
1047 
1048 
1049 static size_t
1050 my_snprintf_mb2(const CHARSET_INFO *cs __attribute__((unused)),
1051  char* to, size_t n, const char* fmt, ...)
1052 {
1053  va_list args;
1054  va_start(args,fmt);
1055  return my_vsnprintf_mb2(to, n, fmt, args);
1056 }
1057 
1058 
1059 static size_t
1060 my_lengthsp_mb2(const CHARSET_INFO *cs __attribute__((unused)),
1061  const char *ptr, size_t length)
1062 {
1063  const char *end= ptr + length;
1064  while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
1065  end-= 2;
1066  return (size_t) (end - ptr);
1067 }
1068 
1069 #endif /* HAVE_CHARSET_mb2*/
1070 
1071 
1072 
1073 
1074 #ifdef HAVE_CHARSET_utf16
1075 
1076 /*
1077  D800..DB7F - Non-provate surrogate high (896 pages)
1078  DB80..DBFF - Private surrogate high (128 pages)
1079  DC00..DFFF - Surrogate low (1024 codes in a page)
1080 */
1081 #define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
1082 #define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
1083 #define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
1084 #define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
1085 
1086 #define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
1087 #define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
1088 #define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
1089 
1090 #define MY_UTF16_WC2(a, b) ((a << 8) + b)
1091 
1092 /*
1093  a= 110110?? (<< 18)
1094  b= ???????? (<< 10)
1095  c= 110111?? (<< 8)
1096  d= ???????? (<< 0)
1097 */
1098 #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
1099  ((c & 3) << 8) + d + 0x10000)
1100 
1101 static int
1102 my_utf16_uni(const CHARSET_INFO *cs __attribute__((unused)),
1103  my_wc_t *pwc, const uchar *s, const uchar *e)
1104 {
1105  if (s + 2 > e)
1106  return MY_CS_TOOSMALL2;
1107 
1108  /*
1109  High bytes: 0xD[89AB] = B'110110??'
1110  Low bytes: 0xD[CDEF] = B'110111??'
1111  Surrogate mask: 0xFC = B'11111100'
1112  */
1113 
1114  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
1115  {
1116  if (s + 4 > e)
1117  return MY_CS_TOOSMALL4;
1118 
1119  if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
1120  return MY_CS_ILSEQ;
1121 
1122  *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
1123  return 4;
1124  }
1125 
1126  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
1127  return MY_CS_ILSEQ;
1128 
1129  *pwc= MY_UTF16_WC2(s[0], s[1]);
1130  return 2;
1131 }
1132 
1133 
1134 static int
1135 my_uni_utf16(const CHARSET_INFO *cs __attribute__((unused)),
1136  my_wc_t wc, uchar *s, uchar *e)
1137 {
1138  if (wc <= 0xFFFF)
1139  {
1140  if (s + 2 > e)
1141  return MY_CS_TOOSMALL2;
1142  if (MY_UTF16_SURROGATE(wc))
1143  return MY_CS_ILUNI;
1144  *s++= (uchar) (wc >> 8);
1145  *s= (uchar) (wc & 0xFF);
1146  return 2;
1147  }
1148 
1149  if (wc <= 0x10FFFF)
1150  {
1151  if (s + 4 > e)
1152  return MY_CS_TOOSMALL4;
1153  *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
1154  *s++= (uchar) (wc >> 10) & 0xFF;
1155  *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
1156  *s= (uchar) wc & 0xFF;
1157  return 4;
1158  }
1159 
1160  return MY_CS_ILUNI;
1161 }
1162 
1163 
1164 static inline void
1165 my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1166 {
1168  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1169  *wc= page[*wc & 0xFF].tolower;
1170 }
1171 
1172 
1173 static inline void
1174 my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1175 {
1177  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1178  *wc= page[*wc & 0xFF].toupper;
1179 }
1180 
1181 
1182 static inline void
1183 my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1184 {
1185  if (*wc <= uni_plane->maxchar)
1186  {
1188  if ((page= uni_plane->page[*wc >> 8]))
1189  *wc= page[*wc & 0xFF].sort;
1190  }
1191  else
1192  {
1193  *wc= MY_CS_REPLACEMENT_CHARACTER;
1194  }
1195 }
1196 
1197 
1198 
1199 static size_t
1200 my_caseup_utf16(const CHARSET_INFO *cs, char *src, size_t srclen,
1201  char *dst __attribute__((unused)),
1202  size_t dstlen __attribute__((unused)))
1203 {
1204  my_wc_t wc;
1205  int res;
1206  char *srcend= src + srclen;
1207  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1208  DBUG_ASSERT(src == dst && srclen == dstlen);
1209 
1210  while ((src < srcend) &&
1211  (res= cs->cset->mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1212  {
1213  my_toupper_utf16(uni_plane, &wc);
1214  if (res != cs->cset->wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1215  break;
1216  src+= res;
1217  }
1218  return srclen;
1219 }
1220 
1221 
1222 static void
1223 my_hash_sort_utf16(const CHARSET_INFO *cs, const uchar *s, size_t slen,
1224  ulong *n1, ulong *n2)
1225 {
1226  my_wc_t wc;
1227  int res;
1228  const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
1229  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1230 
1231  while ((s < e) && (res= cs->cset->mb_wc(cs, &wc,
1232  (uchar *) s, (uchar *) e)) > 0)
1233  {
1234  my_tosort_utf16(uni_plane, &wc);
1235  n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
1236  n2[0]+= 3;
1237  n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
1238  n2[0]+= 3;
1239  s+= res;
1240  }
1241 }
1242 
1243 
1244 static size_t
1245 my_casedn_utf16(const CHARSET_INFO *cs, char *src, size_t srclen,
1246  char *dst __attribute__((unused)),
1247  size_t dstlen __attribute__((unused)))
1248 {
1249  my_wc_t wc;
1250  int res;
1251  char *srcend= src + srclen;
1252  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1253  DBUG_ASSERT(src == dst && srclen == dstlen);
1254 
1255  while ((src < srcend) &&
1256  (res= cs->cset->mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1257  {
1258  my_tolower_utf16(uni_plane, &wc);
1259  if (res != cs->cset->wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1260  break;
1261  src+= res;
1262  }
1263  return srclen;
1264 }
1265 
1266 
1267 static int
1268 my_strnncoll_utf16(const CHARSET_INFO *cs,
1269  const uchar *s, size_t slen,
1270  const uchar *t, size_t tlen,
1271  my_bool t_is_prefix)
1272 {
1273  int s_res, t_res;
1274  my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1275  const uchar *se= s + slen;
1276  const uchar *te= t + tlen;
1277  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1278 
1279  while (s < se && t < te)
1280  {
1281  s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1282  t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1283 
1284  if (s_res <= 0 || t_res <= 0)
1285  {
1286  /* Incorrect string, compare by char value */
1287  return my_bincmp(s, se, t, te);
1288  }
1289 
1290  my_tosort_utf16(uni_plane, &s_wc);
1291  my_tosort_utf16(uni_plane, &t_wc);
1292 
1293  if (s_wc != t_wc)
1294  {
1295  return s_wc > t_wc ? 1 : -1;
1296  }
1297 
1298  s+= s_res;
1299  t+= t_res;
1300  }
1301  return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1302 }
1303 
1304 
1331 static int
1332 my_strnncollsp_utf16(const CHARSET_INFO *cs,
1333  const uchar *s, size_t slen,
1334  const uchar *t, size_t tlen,
1335  my_bool diff_if_only_endspace_difference)
1336 {
1337  int res;
1338  my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1339  const uchar *se= s + slen, *te= t + tlen;
1340  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1341 
1342  DBUG_ASSERT((slen % 2) == 0);
1343  DBUG_ASSERT((tlen % 2) == 0);
1344 
1345 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1346  diff_if_only_endspace_difference= FALSE;
1347 #endif
1348 
1349  while (s < se && t < te)
1350  {
1351  int s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1352  int t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1353 
1354  if (s_res <= 0 || t_res <= 0)
1355  {
1356  /* Incorrect string, compare bytewise */
1357  return my_bincmp(s, se, t, te);
1358  }
1359 
1360  my_tosort_utf16(uni_plane, &s_wc);
1361  my_tosort_utf16(uni_plane, &t_wc);
1362 
1363  if (s_wc != t_wc)
1364  {
1365  return s_wc > t_wc ? 1 : -1;
1366  }
1367 
1368  s+= s_res;
1369  t+= t_res;
1370  }
1371 
1372  slen= (size_t) (se - s);
1373  tlen= (size_t) (te - t);
1374  res= 0;
1375 
1376  if (slen != tlen)
1377  {
1378  int s_res, swap= 1;
1379  if (diff_if_only_endspace_difference)
1380  res= 1; /* Assume 's' is bigger */
1381  if (slen < tlen)
1382  {
1383  slen= tlen;
1384  s= t;
1385  se= te;
1386  swap= -1;
1387  res= -res;
1388  }
1389 
1390  for ( ; s < se; s+= s_res)
1391  {
1392  if ((s_res= cs->cset->mb_wc(cs, &s_wc, s, se)) < 0)
1393  {
1394  DBUG_ASSERT(0);
1395  return 0;
1396  }
1397  if (s_wc != ' ')
1398  return (s_wc < ' ') ? -swap : swap;
1399  }
1400  }
1401  return res;
1402 }
1403 
1404 
1405 static uint
1406 my_ismbchar_utf16(const CHARSET_INFO *cs, const char *b, const char *e)
1407 {
1408  my_wc_t wc;
1409  int res= cs->cset->mb_wc(cs, &wc, (const uchar *) b, (const uchar *) e);
1410  return (uint) (res > 0 ? res : 0);
1411 }
1412 
1413 
1414 static uint
1415 my_mbcharlen_utf16(const CHARSET_INFO *cs __attribute__((unused)),
1416  uint c __attribute__((unused)))
1417 {
1418  DBUG_ASSERT(0);
1419  return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
1420 }
1421 
1422 
1423 static size_t
1424 my_numchars_utf16(const CHARSET_INFO *cs,
1425  const char *b, const char *e)
1426 {
1427  size_t nchars= 0;
1428  for ( ; ; nchars++)
1429  {
1430  size_t charlen= my_ismbchar_utf16(cs, b, e);
1431  if (!charlen)
1432  break;
1433  b+= charlen;
1434  }
1435  return nchars;
1436 }
1437 
1438 
1439 static size_t
1440 my_charpos_utf16(const CHARSET_INFO *cs,
1441  const char *b, const char *e, size_t pos)
1442 {
1443  const char *b0= b;
1444  uint charlen;
1445 
1446  for ( ; pos; b+= charlen, pos--)
1447  {
1448  if (!(charlen= my_ismbchar(cs, b, e)))
1449  return (e + 2 - b0); /* Error, return pos outside the string */
1450  }
1451  return (size_t) (pos ? (e + 2 - b0) : (b - b0));
1452 }
1453 
1454 
1455 static size_t
1456 my_well_formed_len_utf16(const CHARSET_INFO *cs,
1457  const char *b, const char *e,
1458  size_t nchars, int *error)
1459 {
1460  const char *b0= b;
1461  uint charlen;
1462  *error= 0;
1463 
1464  for ( ; nchars; b+= charlen, nchars--)
1465  {
1466  if (!(charlen= my_ismbchar(cs, b, e)))
1467  {
1468  *error= b < e ? 1 : 0;
1469  break;
1470  }
1471  }
1472  return (size_t) (b - b0);
1473 }
1474 
1475 
1476 static int
1477 my_wildcmp_utf16_ci(const CHARSET_INFO *cs,
1478  const char *str,const char *str_end,
1479  const char *wildstr,const char *wildend,
1480  int escape, int w_one, int w_many)
1481 {
1482  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1483  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1484  escape, w_one, w_many, uni_plane);
1485 }
1486 
1487 
1488 static int
1489 my_wildcmp_utf16_bin(const CHARSET_INFO *cs,
1490  const char *str,const char *str_end,
1491  const char *wildstr,const char *wildend,
1492  int escape, int w_one, int w_many)
1493 {
1494  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1495  escape, w_one, w_many, NULL);
1496 }
1497 
1498 
1499 static int
1500 my_strnncoll_utf16_bin(const CHARSET_INFO *cs,
1501  const uchar *s, size_t slen,
1502  const uchar *t, size_t tlen,
1503  my_bool t_is_prefix)
1504 {
1505  int s_res,t_res;
1506  my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1507  const uchar *se=s+slen;
1508  const uchar *te=t+tlen;
1509 
1510  while ( s < se && t < te )
1511  {
1512  s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1513  t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1514 
1515  if (s_res <= 0 || t_res <= 0)
1516  {
1517  /* Incorrect string, compare by char value */
1518  return my_bincmp(s, se, t, te);
1519  }
1520  if (s_wc != t_wc)
1521  {
1522  return s_wc > t_wc ? 1 : -1;
1523  }
1524 
1525  s+= s_res;
1526  t+= t_res;
1527  }
1528  return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1529 }
1530 
1531 
1532 static int
1533 my_strnncollsp_utf16_bin(const CHARSET_INFO *cs,
1534  const uchar *s, size_t slen,
1535  const uchar *t, size_t tlen,
1536  my_bool diff_if_only_endspace_difference)
1537 {
1538  int res;
1539  my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1540  const uchar *se= s + slen, *te= t + tlen;
1541 
1542  DBUG_ASSERT((slen % 2) == 0);
1543  DBUG_ASSERT((tlen % 2) == 0);
1544 
1545 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1546  diff_if_only_endspace_difference= FALSE;
1547 #endif
1548 
1549  while (s < se && t < te)
1550  {
1551  int s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1552  int t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1553 
1554  if (s_res <= 0 || t_res <= 0)
1555  {
1556  /* Incorrect string, compare bytewise */
1557  return my_bincmp(s, se, t, te);
1558  }
1559 
1560  if (s_wc != t_wc)
1561  {
1562  return s_wc > t_wc ? 1 : -1;
1563  }
1564 
1565  s+= s_res;
1566  t+= t_res;
1567  }
1568 
1569  slen= (size_t) (se - s);
1570  tlen= (size_t) (te - t);
1571  res= 0;
1572 
1573  if (slen != tlen)
1574  {
1575  int s_res, swap= 1;
1576  if (diff_if_only_endspace_difference)
1577  res= 1; /* Assume 's' is bigger */
1578  if (slen < tlen)
1579  {
1580  slen= tlen;
1581  s= t;
1582  se= te;
1583  swap= -1;
1584  res= -res;
1585  }
1586 
1587  for ( ; s < se; s+= s_res)
1588  {
1589  if ((s_res= cs->cset->mb_wc(cs, &s_wc, s, se)) < 0)
1590  {
1591  DBUG_ASSERT(0);
1592  return 0;
1593  }
1594  if (s_wc != ' ')
1595  return (s_wc < ' ') ? -swap : swap;
1596  }
1597  }
1598  return res;
1599 }
1600 
1601 
1602 static void
1603 my_hash_sort_utf16_bin(const CHARSET_INFO *cs,
1604  const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
1605 {
1606  const uchar *end= pos + cs->cset->lengthsp(cs, (const char *) pos, len);
1607  for ( ; pos < end ; pos++)
1608  {
1609  nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) *
1610  ((uint)*pos)) + (nr1[0] << 8);
1611  nr2[0]+= 3;
1612  }
1613 }
1614 
1615 
1616 static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
1617 {
1618  NULL, /* init */
1619  my_strnncoll_utf16,
1620  my_strnncollsp_utf16,
1621  my_strnxfrm_unicode,
1622  my_strnxfrmlen_simple,
1623  my_like_range_generic,
1624  my_wildcmp_utf16_ci,
1625  my_strcasecmp_mb2_or_mb4,
1626  my_instr_mb,
1627  my_hash_sort_utf16,
1628  my_propagate_simple
1629 };
1630 
1631 
1632 static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
1633 {
1634  NULL, /* init */
1635  my_strnncoll_utf16_bin,
1636  my_strnncollsp_utf16_bin,
1637  my_strnxfrm_unicode_full_bin,
1638  my_strnxfrmlen_unicode_full_bin,
1639  my_like_range_generic,
1640  my_wildcmp_utf16_bin,
1641  my_strcasecmp_mb2_or_mb4,
1642  my_instr_mb,
1643  my_hash_sort_utf16_bin,
1644  my_propagate_simple
1645 };
1646 
1647 
1648 MY_CHARSET_HANDLER my_charset_utf16_handler=
1649 {
1650  NULL, /* init */
1651  my_ismbchar_utf16, /* ismbchar */
1652  my_mbcharlen_utf16, /* mbcharlen */
1653  my_numchars_utf16,
1654  my_charpos_utf16,
1655  my_well_formed_len_utf16,
1656  my_lengthsp_mb2,
1657  my_numcells_mb,
1658  my_utf16_uni, /* mb_wc */
1659  my_uni_utf16, /* wc_mb */
1660  my_mb_ctype_mb,
1661  my_caseup_str_mb2_or_mb4,
1662  my_casedn_str_mb2_or_mb4,
1663  my_caseup_utf16,
1664  my_casedn_utf16,
1665  my_snprintf_mb2,
1666  my_l10tostr_mb2_or_mb4,
1667  my_ll10tostr_mb2_or_mb4,
1668  my_fill_mb2,
1669  my_strntol_mb2_or_mb4,
1670  my_strntoul_mb2_or_mb4,
1671  my_strntoll_mb2_or_mb4,
1672  my_strntoull_mb2_or_mb4,
1673  my_strntod_mb2_or_mb4,
1674  my_strtoll10_mb2,
1675  my_strntoull10rnd_mb2_or_mb4,
1676  my_scan_mb2
1677 };
1678 
1679 
1680 CHARSET_INFO my_charset_utf16_general_ci=
1681 {
1682  54,0,0, /* number */
1683  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1684  "utf16", /* cs name */
1685  "utf16_general_ci", /* name */
1686  "UTF-16 Unicode", /* comment */
1687  NULL, /* tailoring */
1688  NULL, /* ctype */
1689  NULL, /* to_lower */
1690  NULL, /* to_upper */
1691  NULL, /* sort_order */
1692  NULL, /* uca */
1693  NULL, /* tab_to_uni */
1694  NULL, /* tab_from_uni */
1695  &my_unicase_default, /* caseinfo */
1696  NULL, /* state_map */
1697  NULL, /* ident_map */
1698  1, /* strxfrm_multiply */
1699  1, /* caseup_multiply */
1700  1, /* casedn_multiply */
1701  2, /* mbminlen */
1702  4, /* mbmaxlen */
1703  0, /* min_sort_char */
1704  0xFFFF, /* max_sort_char */
1705  ' ', /* pad char */
1706  0, /* escape_with_backslash_is_dangerous */
1707  1, /* levels_for_compare */
1708  1, /* levels_for_order */
1709  &my_charset_utf16_handler,
1710  &my_collation_utf16_general_ci_handler
1711 };
1712 
1713 
1714 CHARSET_INFO my_charset_utf16_bin=
1715 {
1716  55,0,0, /* number */
1717  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1718  "utf16", /* cs name */
1719  "utf16_bin", /* name */
1720  "UTF-16 Unicode", /* comment */
1721  NULL, /* tailoring */
1722  NULL, /* ctype */
1723  NULL, /* to_lower */
1724  NULL, /* to_upper */
1725  NULL, /* sort_order */
1726  NULL, /* uca */
1727  NULL, /* tab_to_uni */
1728  NULL, /* tab_from_uni */
1729  &my_unicase_default, /* caseinfo */
1730  NULL, /* state_map */
1731  NULL, /* ident_map */
1732  1, /* strxfrm_multiply */
1733  1, /* caseup_multiply */
1734  1, /* casedn_multiply */
1735  2, /* mbminlen */
1736  4, /* mbmaxlen */
1737  0, /* min_sort_char */
1738  0xFFFF, /* max_sort_char */
1739  ' ', /* pad char */
1740  0, /* escape_with_backslash_is_dangerous */
1741  1, /* levels_for_compare */
1742  1, /* levels_for_order */
1743  &my_charset_utf16_handler,
1744  &my_collation_utf16_bin_handler
1745 };
1746 
1747 
1748 static int
1749 my_utf16le_uni(const CHARSET_INFO *cs __attribute__((unused)),
1750  my_wc_t *pwc, const uchar *s, const uchar *e)
1751 {
1752  my_wc_t lo;
1753 
1754  if (s + 2 > e)
1755  return MY_CS_TOOSMALL2;
1756 
1757  if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
1758  (*pwc > MY_UTF16_SURROGATE_LOW_LAST))
1759  return 2; /* [0000-D7FF,E000-FFFF] */
1760 
1761  if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
1762  return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
1763 
1764  if (s + 4 > e)
1765  return MY_CS_TOOSMALL4;
1766 
1767  s+= 2;
1768 
1769  if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
1770  lo > MY_UTF16_SURROGATE_LOW_LAST)
1771  return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
1772 
1773  *pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
1774  return 4;
1775 }
1776 
1777 
1778 static int
1779 my_uni_utf16le(const CHARSET_INFO *cs __attribute__((unused)),
1780  my_wc_t wc, uchar *s, uchar *e)
1781 {
1782  if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
1783  (wc > MY_UTF16_SURROGATE_LOW_LAST &&
1784  wc <= 0xFFFF))
1785  {
1786  if (s + 2 > e)
1787  return MY_CS_TOOSMALL2;
1788  int2store(s, wc);
1789  return 2; /* [0000-D7FF,E000-FFFF] */
1790  }
1791 
1792  if (wc < 0xFFFF || wc > 0x10FFFF)
1793  return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
1794 
1795  if (s + 4 > e)
1796  return MY_CS_TOOSMALL4;
1797 
1798  wc-= 0x10000;
1799  int2store(s, (0xD800 | ((wc >> 10) & 0x3FF))); s+= 2;
1800  int2store(s, (0xDC00 | (wc & 0x3FF)));
1801  return 4; /* [010000-10FFFF] */
1802 }
1803 
1804 
1805 static size_t
1806 my_lengthsp_utf16le(const CHARSET_INFO *cs __attribute__((unused)),
1807  const char *ptr, size_t length)
1808 {
1809  const char *end= ptr + length;
1810  while (end > ptr + 1 && uint2korr(end - 2) == 0x20)
1811  end-= 2;
1812  return (size_t) (end - ptr);
1813 }
1814 
1815 
1816 static MY_CHARSET_HANDLER my_charset_utf16le_handler=
1817 {
1818  NULL, /* init */
1819  my_ismbchar_utf16,
1820  my_mbcharlen_utf16,
1821  my_numchars_utf16,
1822  my_charpos_utf16,
1823  my_well_formed_len_utf16,
1824  my_lengthsp_utf16le,
1825  my_numcells_mb,
1826  my_utf16le_uni, /* mb_wc */
1827  my_uni_utf16le, /* wc_mb */
1828  my_mb_ctype_mb,
1829  my_caseup_str_mb2_or_mb4,
1830  my_casedn_str_mb2_or_mb4,
1831  my_caseup_utf16,
1832  my_casedn_utf16,
1833  my_snprintf_mb2,
1834  my_l10tostr_mb2_or_mb4,
1835  my_ll10tostr_mb2_or_mb4,
1836  my_fill_mb2,
1837  my_strntol_mb2_or_mb4,
1838  my_strntoul_mb2_or_mb4,
1839  my_strntoll_mb2_or_mb4,
1840  my_strntoull_mb2_or_mb4,
1841  my_strntod_mb2_or_mb4,
1842  my_strtoll10_mb2,
1843  my_strntoull10rnd_mb2_or_mb4,
1844  my_scan_mb2
1845 };
1846 
1847 
1848 CHARSET_INFO my_charset_utf16le_general_ci=
1849 {
1850  56,0,0, /* number */
1851  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1852  "utf16le", /* cs name */
1853  "utf16le_general_ci",/* name */
1854  "UTF-16LE Unicode", /* comment */
1855  NULL, /* tailoring */
1856  NULL, /* ctype */
1857  NULL, /* to_lower */
1858  NULL, /* to_upper */
1859  NULL, /* sort_order */
1860  NULL, /* uca */
1861  NULL, /* tab_to_uni */
1862  NULL, /* tab_from_uni */
1863  &my_unicase_default, /* caseinfo */
1864  NULL, /* state_map */
1865  NULL, /* ident_map */
1866  1, /* strxfrm_multiply */
1867  1, /* caseup_multiply */
1868  1, /* casedn_multiply */
1869  2, /* mbminlen */
1870  4, /* mbmaxlen */
1871  0, /* min_sort_char */
1872  0xFFFF, /* max_sort_char */
1873  ' ', /* pad char */
1874  0, /* escape_with_backslash_is_dangerous */
1875  1, /* levels_for_compare */
1876  1, /* levels_for_order */
1877  &my_charset_utf16le_handler,
1878  &my_collation_utf16_general_ci_handler
1879 };
1880 
1881 
1882 CHARSET_INFO my_charset_utf16le_bin=
1883 {
1884  62,0,0, /* number */
1885  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1886  "utf16le", /* cs name */
1887  "utf16le_bin", /* name */
1888  "UTF-16LE Unicode", /* comment */
1889  NULL, /* tailoring */
1890  NULL, /* ctype */
1891  NULL, /* to_lower */
1892  NULL, /* to_upper */
1893  NULL, /* sort_order */
1894  NULL, /* uca */
1895  NULL, /* tab_to_uni */
1896  NULL, /* tab_from_uni */
1897  &my_unicase_default, /* caseinfo */
1898  NULL, /* state_map */
1899  NULL, /* ident_map */
1900  1, /* strxfrm_multiply */
1901  1, /* caseup_multiply */
1902  1, /* casedn_multiply */
1903  2, /* mbminlen */
1904  4, /* mbmaxlen */
1905  0, /* min_sort_char */
1906  0xFFFF, /* max_sort_char */
1907  ' ', /* pad char */
1908  0, /* escape_with_backslash_is_dangerous */
1909  1, /* levels_for_compare */
1910  1, /* levels_for_order */
1911  &my_charset_utf16le_handler,
1912  &my_collation_utf16_bin_handler
1913 };
1914 
1915 
1916 #endif /* HAVE_CHARSET_utf16 */
1917 
1918 
1919 #ifdef HAVE_CHARSET_utf32
1920 
1921 static int
1922 my_utf32_uni(const CHARSET_INFO *cs __attribute__((unused)),
1923  my_wc_t *pwc, const uchar *s, const uchar *e)
1924 {
1925  if (s + 4 > e)
1926  return MY_CS_TOOSMALL4;
1927  *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
1928  return 4;
1929 }
1930 
1931 
1932 static int
1933 my_uni_utf32(const CHARSET_INFO *cs __attribute__((unused)),
1934  my_wc_t wc, uchar *s, uchar *e)
1935 {
1936  if (s + 4 > e)
1937  return MY_CS_TOOSMALL4;
1938 
1939  s[0]= (uchar) (wc >> 24);
1940  s[1]= (uchar) (wc >> 16) & 0xFF;
1941  s[2]= (uchar) (wc >> 8) & 0xFF;
1942  s[3]= (uchar) wc & 0xFF;
1943  return 4;
1944 }
1945 
1946 
1947 static inline void
1948 my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1949 {
1951  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1952  *wc= page[*wc & 0xFF].tolower;
1953 }
1954 
1955 
1956 static inline void
1957 my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1958 {
1960  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1961  *wc= page[*wc & 0xFF].toupper;
1962 }
1963 
1964 
1965 static inline void
1966 my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1967 {
1968  if (*wc <= uni_plane->maxchar)
1969  {
1971  if ((page= uni_plane->page[*wc >> 8]))
1972  *wc= page[*wc & 0xFF].sort;
1973  }
1974  else
1975  {
1976  *wc= MY_CS_REPLACEMENT_CHARACTER;
1977  }
1978 }
1979 
1980 
1981 static size_t
1982 my_caseup_utf32(const CHARSET_INFO *cs, char *src, size_t srclen,
1983  char *dst __attribute__((unused)),
1984  size_t dstlen __attribute__((unused)))
1985 {
1986  my_wc_t wc;
1987  int res;
1988  char *srcend= src + srclen;
1989  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1990  DBUG_ASSERT(src == dst && srclen == dstlen);
1991 
1992  while ((src < srcend) &&
1993  (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
1994  {
1995  my_toupper_utf32(uni_plane, &wc);
1996  if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
1997  break;
1998  src+= res;
1999  }
2000  return srclen;
2001 }
2002 
2003 
2004 static inline void
2005 my_hash_add(ulong *n1, ulong *n2, uint ch)
2006 {
2007  n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
2008  n2[0]+= 3;
2009 }
2010 
2011 
2012 static void
2013 my_hash_sort_utf32(const CHARSET_INFO *cs, const uchar *s, size_t slen,
2014  ulong *n1, ulong *n2)
2015 {
2016  my_wc_t wc;
2017  int res;
2018  const uchar *e= s + slen;
2019  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2020 
2021  /* Skip trailing spaces */
2022  while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
2023  e-= 4;
2024 
2025  while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
2026  {
2027  my_tosort_utf32(uni_plane, &wc);
2028  my_hash_add(n1, n2, (uint) (wc >> 24));
2029  my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
2030  my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
2031  my_hash_add(n1, n2, (uint) (wc & 0xFF));
2032  s+= res;
2033  }
2034 }
2035 
2036 
2037 static size_t
2038 my_casedn_utf32(const CHARSET_INFO *cs, char *src, size_t srclen,
2039  char *dst __attribute__((unused)),
2040  size_t dstlen __attribute__((unused)))
2041 {
2042  my_wc_t wc;
2043  int res;
2044  char *srcend= src + srclen;
2045  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2046  DBUG_ASSERT(src == dst && srclen == dstlen);
2047 
2048  while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
2049  {
2050  my_tolower_utf32(uni_plane,&wc);
2051  if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2052  break;
2053  src+= res;
2054  }
2055  return srclen;
2056 }
2057 
2058 
2059 static int
2060 my_strnncoll_utf32(const CHARSET_INFO *cs,
2061  const uchar *s, size_t slen,
2062  const uchar *t, size_t tlen,
2063  my_bool t_is_prefix)
2064 {
2065  my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
2066  const uchar *se= s + slen;
2067  const uchar *te= t + tlen;
2068  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2069 
2070  while (s < se && t < te)
2071  {
2072  int s_res= my_utf32_uni(cs, &s_wc, s, se);
2073  int t_res= my_utf32_uni(cs, &t_wc, t, te);
2074 
2075  if ( s_res <= 0 || t_res <= 0)
2076  {
2077  /* Incorrect string, compare by char value */
2078  return my_bincmp(s, se, t, te);
2079  }
2080 
2081  my_tosort_utf32(uni_plane, &s_wc);
2082  my_tosort_utf32(uni_plane, &t_wc);
2083 
2084  if (s_wc != t_wc)
2085  {
2086  return s_wc > t_wc ? 1 : -1;
2087  }
2088 
2089  s+= s_res;
2090  t+= t_res;
2091  }
2092  return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
2093 }
2094 
2095 
2123 static int
2124 my_strnncollsp_utf32(const CHARSET_INFO *cs,
2125  const uchar *s, size_t slen,
2126  const uchar *t, size_t tlen,
2127  my_bool diff_if_only_endspace_difference)
2128 {
2129  int res;
2130  my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
2131  const uchar *se= s + slen, *te= t + tlen;
2132  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2133 
2134  DBUG_ASSERT((slen % 4) == 0);
2135  DBUG_ASSERT((tlen % 4) == 0);
2136 
2137 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
2138  diff_if_only_endspace_difference= FALSE;
2139 #endif
2140 
2141  while ( s < se && t < te )
2142  {
2143  int s_res= my_utf32_uni(cs, &s_wc, s, se);
2144  int t_res= my_utf32_uni(cs, &t_wc, t, te);
2145 
2146  if ( s_res <= 0 || t_res <= 0 )
2147  {
2148  /* Incorrect string, compare bytewise */
2149  return my_bincmp(s, se, t, te);
2150  }
2151 
2152  my_tosort_utf32(uni_plane, &s_wc);
2153  my_tosort_utf32(uni_plane, &t_wc);
2154 
2155  if ( s_wc != t_wc )
2156  {
2157  return s_wc > t_wc ? 1 : -1;
2158  }
2159 
2160  s+= s_res;
2161  t+= t_res;
2162  }
2163 
2164  slen= (size_t) (se - s);
2165  tlen= (size_t) (te - t);
2166  res= 0;
2167 
2168  if (slen != tlen)
2169  {
2170  int s_res, swap= 1;
2171  if (diff_if_only_endspace_difference)
2172  res= 1; /* Assume 's' is bigger */
2173  if (slen < tlen)
2174  {
2175  slen= tlen;
2176  s= t;
2177  se= te;
2178  swap= -1;
2179  res= -res;
2180  }
2181 
2182  for ( ; s < se; s+= s_res)
2183  {
2184  if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0)
2185  {
2186  DBUG_ASSERT(0);
2187  return 0;
2188  }
2189  if (s_wc != ' ')
2190  return (s_wc < ' ') ? -swap : swap;
2191  }
2192  }
2193  return res;
2194 }
2195 
2196 
2197 static size_t
2198 my_strnxfrmlen_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2199  size_t len)
2200 {
2201  return len / 2;
2202 }
2203 
2204 
2205 static uint
2206 my_ismbchar_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2207  const char *b __attribute__((unused)),
2208  const char *e __attribute__((unused)))
2209 {
2210  return 4;
2211 }
2212 
2213 
2214 static uint
2215 my_mbcharlen_utf32(const CHARSET_INFO *cs __attribute__((unused)) ,
2216  uint c __attribute__((unused)))
2217 {
2218  return 4;
2219 }
2220 
2221 
2222 static int
2223 my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
2224 {
2225  char *start= dst, *end= dst + n;
2226  DBUG_ASSERT((n % 4) == 0);
2227  for (; *fmt ; fmt++)
2228  {
2229  if (fmt[0] != '%')
2230  {
2231  if (dst >= end) /* End of buffer */
2232  break;
2233 
2234  *dst++= '\0';
2235  *dst++= '\0';
2236  *dst++= '\0';
2237  *dst++= *fmt; /* Copy ordinary char */
2238  continue;
2239  }
2240 
2241  fmt++;
2242 
2243  /* Skip if max size is used (to be compatible with printf) */
2244  while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
2245  fmt++;
2246 
2247  if (*fmt == 'l')
2248  fmt++;
2249 
2250  if (*fmt == 's') /* String parameter */
2251  {
2252  reg2 char *par= va_arg(ap, char *);
2253  size_t plen;
2254  size_t left_len= (size_t)(end - dst);
2255  if (!par) par= (char*)"(null)";
2256  plen= strlen(par);
2257  if (left_len <= plen*4)
2258  plen= left_len / 4 - 1;
2259 
2260  for ( ; plen ; plen--, dst+= 4, par++)
2261  {
2262  dst[0]= '\0';
2263  dst[1]= '\0';
2264  dst[2]= '\0';
2265  dst[3]= par[0];
2266  }
2267  continue;
2268  }
2269  else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
2270  {
2271  register int iarg;
2272  char nbuf[16];
2273  char *pbuf= nbuf;
2274 
2275  if ((size_t) (end - dst) < 64)
2276  break;
2277  iarg= va_arg(ap, int);
2278  if (*fmt == 'd')
2279  int10_to_str((long) iarg, nbuf, -10);
2280  else
2281  int10_to_str((long) (uint) iarg,nbuf,10);
2282 
2283  for (; pbuf[0]; pbuf++)
2284  {
2285  *dst++= '\0';
2286  *dst++= '\0';
2287  *dst++= '\0';
2288  *dst++= *pbuf;
2289  }
2290  continue;
2291  }
2292 
2293  /* We come here on '%%', unknown code or too long parameter */
2294  if (dst == end)
2295  break;
2296  *dst++= '\0';
2297  *dst++= '\0';
2298  *dst++= '\0';
2299  *dst++= '%'; /* % used as % or unknown code */
2300  }
2301 
2302  DBUG_ASSERT(dst < end);
2303  *dst++= '\0';
2304  *dst++= '\0';
2305  *dst++= '\0';
2306  *dst++= '\0'; /* End of errmessage */
2307  return (size_t) (dst - start - 4);
2308 }
2309 
2310 
2311 static size_t
2312 my_snprintf_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2313  char* to, size_t n, const char* fmt, ...)
2314 {
2315  va_list args;
2316  va_start(args,fmt);
2317  return my_vsnprintf_utf32(to, n, fmt, args);
2318 }
2319 
2320 
2321 static longlong
2322 my_strtoll10_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2323  const char *nptr, char **endptr, int *error)
2324 {
2325  const char *s, *end, *start, *n_end, *true_end;
2326  uchar c;
2327  unsigned long i, j, k;
2328  ulonglong li;
2329  int negative;
2330  ulong cutoff, cutoff2, cutoff3;
2331 
2332  s= nptr;
2333  /* If fixed length string */
2334  if (endptr)
2335  {
2336  /* Make sure string length is even */
2337  end= s + ((*endptr - s) / 4) * 4;
2338  while (s < end && !s[0] && !s[1] && !s[2] &&
2339  (s[3] == ' ' || s[3] == '\t'))
2340  s+= 4;
2341  if (s == end)
2342  goto no_conv;
2343  }
2344  else
2345  {
2346  /* We don't support null terminated strings in UCS2 */
2347  goto no_conv;
2348  }
2349 
2350  /* Check for a sign. */
2351  negative= 0;
2352  if (!s[0] && !s[1] && !s[2] && s[3] == '-')
2353  {
2354  *error= -1; /* Mark as negative number */
2355  negative= 1;
2356  s+= 4;
2357  if (s == end)
2358  goto no_conv;
2359  cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
2360  cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
2361  cutoff3= MAX_NEGATIVE_NUMBER % 100;
2362  }
2363  else
2364  {
2365  *error= 0;
2366  if (!s[0] && !s[1] && !s[2] && s[3] == '+')
2367  {
2368  s+= 4;
2369  if (s == end)
2370  goto no_conv;
2371  }
2372  cutoff= ULONGLONG_MAX / LFACTOR2;
2373  cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
2374  cutoff3= ULONGLONG_MAX % 100;
2375  }
2376 
2377  /* Handle case where we have a lot of pre-zero */
2378  if (!s[0] && !s[1] && !s[2] && s[3] == '0')
2379  {
2380  i= 0;
2381  do
2382  {
2383  s+= 4;
2384  if (s == end)
2385  goto end_i; /* Return 0 */
2386  }
2387  while (!s[0] && !s[1] && !s[2] && s[3] == '0');
2388  n_end= s + 4 * INIT_CNT;
2389  }
2390  else
2391  {
2392  /* Read first digit to check that it's a valid number */
2393  if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2394  goto no_conv;
2395  i= c;
2396  s+= 4;
2397  n_end= s + 4 * (INIT_CNT-1);
2398  }
2399 
2400  /* Handle first 9 digits and store them in i */
2401  if (n_end > end)
2402  n_end= end;
2403  for (; s != n_end ; s+= 4)
2404  {
2405  if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2406  goto end_i;
2407  i= i * 10 + c;
2408  }
2409  if (s == end)
2410  goto end_i;
2411 
2412  /* Handle next 9 digits and store them in j */
2413  j= 0;
2414  start= s; /* Used to know how much to shift i */
2415  n_end= true_end= s + 4 * INIT_CNT;
2416  if (n_end > end)
2417  n_end= end;
2418  do
2419  {
2420  if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2421  goto end_i_and_j;
2422  j= j * 10 + c;
2423  s+= 4;
2424  } while (s != n_end);
2425  if (s == end)
2426  {
2427  if (s != true_end)
2428  goto end_i_and_j;
2429  goto end3;
2430  }
2431  if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2432  goto end3;
2433 
2434  /* Handle the next 1 or 2 digits and store them in k */
2435  k=c;
2436  s+= 4;
2437  if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2438  goto end4;
2439  k= k * 10 + c;
2440  s+= 2;
2441  *endptr= (char*) s;
2442 
2443  /* number string should have ended here */
2444  if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
2445  goto overflow;
2446 
2447  /* Check that we didn't get an overflow with the last digit */
2448  if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
2449  k > cutoff3)))
2450  goto overflow;
2451  li= i * LFACTOR2+ (ulonglong) j * 100 + k;
2452  return (longlong) li;
2453 
2454 overflow: /* *endptr is set here */
2455  *error= MY_ERRNO_ERANGE;
2456  return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
2457 
2458 end_i:
2459  *endptr= (char*) s;
2460  return (negative ? ((longlong) -(long) i) : (longlong) i);
2461 
2462 end_i_and_j:
2463  li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
2464  *endptr= (char*) s;
2465  return (negative ? -((longlong) li) : (longlong) li);
2466 
2467 end3:
2468  li= (ulonglong) i*LFACTOR+ (ulonglong) j;
2469  *endptr= (char*) s;
2470  return (negative ? -((longlong) li) : (longlong) li);
2471 
2472 end4:
2473  li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
2474  *endptr= (char*) s;
2475  if (negative)
2476  {
2477  if (li > MAX_NEGATIVE_NUMBER)
2478  goto overflow;
2479  return -((longlong) li);
2480  }
2481  return (longlong) li;
2482 
2483 no_conv:
2484  /* There was no number to convert. */
2485  *error= MY_ERRNO_EDOM;
2486  *endptr= (char *) nptr;
2487  return 0;
2488 }
2489 
2490 
2491 static size_t
2492 my_numchars_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2493  const char *b, const char *e)
2494 {
2495  return (size_t) (e - b) / 4;
2496 }
2497 
2498 
2499 static size_t
2500 my_charpos_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2501  const char *b, const char *e, size_t pos)
2502 {
2503  size_t string_length= (size_t) (e - b);
2504  return pos * 4 > string_length ? string_length + 4 : pos * 4;
2505 }
2506 
2507 
2508 static size_t
2509 my_well_formed_len_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2510  const char *b, const char *e,
2511  size_t nchars, int *error)
2512 {
2513  /* Ensure string length is divisible by 4 */
2514  const char *b0= b;
2515  size_t length= e - b;
2516  DBUG_ASSERT((length % 4) == 0);
2517  *error= 0;
2518  nchars*= 4;
2519  if (length > nchars)
2520  {
2521  length= nchars;
2522  e= b + nchars;
2523  }
2524  for (; b < e; b+= 4)
2525  {
2526  /* Don't accept characters greater than U+10FFFF */
2527  if (b[0] || (uchar) b[1] > 0x10)
2528  {
2529  *error= 1;
2530  return b - b0;
2531  }
2532  }
2533  return length;
2534 }
2535 
2536 
2537 static
2538 void my_fill_utf32(const CHARSET_INFO *cs,
2539  char *s, size_t slen, int fill)
2540 {
2541  char buf[10];
2542  uint buflen;
2543  char *e= s + slen;
2544 
2545  DBUG_ASSERT((slen % 4) == 0);
2546 
2547  buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
2548  (uchar*) buf + sizeof(buf));
2549  DBUG_ASSERT(buflen == 4);
2550  while (s < e)
2551  {
2552  memcpy(s, buf, 4);
2553  s+= 4;
2554  }
2555 }
2556 
2557 
2558 static size_t
2559 my_lengthsp_utf32(const CHARSET_INFO *cs __attribute__((unused)),
2560  const char *ptr, size_t length)
2561 {
2562  const char *end= ptr + length;
2563  DBUG_ASSERT((length % 4) == 0);
2564  while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
2565  end-= 4;
2566  return (size_t) (end - ptr);
2567 }
2568 
2569 
2570 static int
2571 my_wildcmp_utf32_ci(const CHARSET_INFO *cs,
2572  const char *str, const char *str_end,
2573  const char *wildstr, const char *wildend,
2574  int escape, int w_one, int w_many)
2575 {
2576  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2577  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2578  escape, w_one, w_many, uni_plane);
2579 }
2580 
2581 
2582 static int
2583 my_wildcmp_utf32_bin(const CHARSET_INFO *cs,
2584  const char *str,const char *str_end,
2585  const char *wildstr,const char *wildend,
2586  int escape, int w_one, int w_many)
2587 {
2588  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2589  escape, w_one, w_many, NULL);
2590 }
2591 
2592 
2593 static int
2594 my_strnncoll_utf32_bin(const CHARSET_INFO *cs,
2595  const uchar *s, size_t slen,
2596  const uchar *t, size_t tlen,
2597  my_bool t_is_prefix)
2598 {
2599  my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
2600  const uchar *se= s + slen;
2601  const uchar *te= t + tlen;
2602 
2603  while (s < se && t < te)
2604  {
2605  int s_res= my_utf32_uni(cs, &s_wc, s, se);
2606  int t_res= my_utf32_uni(cs, &t_wc, t, te);
2607 
2608  if (s_res <= 0 || t_res <= 0)
2609  {
2610  /* Incorrect string, compare by char value */
2611  return my_bincmp(s, se, t, te);
2612  }
2613  if (s_wc != t_wc)
2614  {
2615  return s_wc > t_wc ? 1 : -1;
2616  }
2617 
2618  s+= s_res;
2619  t+= t_res;
2620  }
2621  return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t)));
2622 }
2623 
2624 
2625 static inline my_wc_t
2626 my_utf32_get(const uchar *s)
2627 {
2628  return
2629  ((my_wc_t) s[0] << 24) +
2630  ((my_wc_t) s[1] << 16) +
2631  ((my_wc_t) s[2] << 8) +
2632  s[3];
2633 }
2634 
2635 
2636 static int
2637 my_strnncollsp_utf32_bin(const CHARSET_INFO *cs __attribute__((unused)),
2638  const uchar *s, size_t slen,
2639  const uchar *t, size_t tlen,
2640  my_bool diff_if_only_endspace_difference
2641  __attribute__((unused)))
2642 {
2643  const uchar *se, *te;
2644  size_t minlen;
2645 
2646  DBUG_ASSERT((slen % 4) == 0);
2647  DBUG_ASSERT((tlen % 4) == 0);
2648 
2649  se= s + slen;
2650  te= t + tlen;
2651 
2652  for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 4)
2653  {
2654  my_wc_t s_wc= my_utf32_get(s);
2655  my_wc_t t_wc= my_utf32_get(t);
2656  if (s_wc != t_wc)
2657  return s_wc > t_wc ? 1 : -1;
2658 
2659  s+= 4;
2660  t+= 4;
2661  }
2662 
2663  if (slen != tlen)
2664  {
2665  int swap= 1;
2666  if (slen < tlen)
2667  {
2668  s= t;
2669  se= te;
2670  swap= -1;
2671  }
2672 
2673  for ( ; s < se ; s+= 4)
2674  {
2675  my_wc_t s_wc= my_utf32_get(s);
2676  if (s_wc != ' ')
2677  return (s_wc < ' ') ? -swap : swap;
2678  }
2679  }
2680  return 0;
2681 }
2682 
2683 
2684 static size_t
2685 my_scan_utf32(const CHARSET_INFO *cs,
2686  const char *str, const char *end, int sequence_type)
2687 {
2688  const char *str0= str;
2689 
2690  switch (sequence_type)
2691  {
2692  case MY_SEQ_SPACES:
2693  for ( ; str < end; )
2694  {
2695  my_wc_t wc;
2696  int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
2697  if (res < 0 || wc != ' ')
2698  break;
2699  str+= res;
2700  }
2701  return (size_t) (str - str0);
2702  default:
2703  return 0;
2704  }
2705 }
2706 
2707 
2708 static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
2709 {
2710  NULL, /* init */
2711  my_strnncoll_utf32,
2712  my_strnncollsp_utf32,
2713  my_strnxfrm_unicode,
2714  my_strnxfrmlen_utf32,
2715  my_like_range_generic,
2716  my_wildcmp_utf32_ci,
2717  my_strcasecmp_mb2_or_mb4,
2718  my_instr_mb,
2719  my_hash_sort_utf32,
2720  my_propagate_simple
2721 };
2722 
2723 
2724 static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
2725 {
2726  NULL, /* init */
2727  my_strnncoll_utf32_bin,
2728  my_strnncollsp_utf32_bin,
2729  my_strnxfrm_unicode_full_bin,
2730  my_strnxfrmlen_unicode_full_bin,
2731  my_like_range_generic,
2732  my_wildcmp_utf32_bin,
2733  my_strcasecmp_mb2_or_mb4,
2734  my_instr_mb,
2735  my_hash_sort_utf32,
2736  my_propagate_simple
2737 };
2738 
2739 
2740 MY_CHARSET_HANDLER my_charset_utf32_handler=
2741 {
2742  NULL, /* init */
2743  my_ismbchar_utf32,
2744  my_mbcharlen_utf32,
2745  my_numchars_utf32,
2746  my_charpos_utf32,
2747  my_well_formed_len_utf32,
2748  my_lengthsp_utf32,
2749  my_numcells_mb,
2750  my_utf32_uni,
2751  my_uni_utf32,
2752  my_mb_ctype_mb,
2753  my_caseup_str_mb2_or_mb4,
2754  my_casedn_str_mb2_or_mb4,
2755  my_caseup_utf32,
2756  my_casedn_utf32,
2757  my_snprintf_utf32,
2758  my_l10tostr_mb2_or_mb4,
2759  my_ll10tostr_mb2_or_mb4,
2760  my_fill_utf32,
2761  my_strntol_mb2_or_mb4,
2762  my_strntoul_mb2_or_mb4,
2763  my_strntoll_mb2_or_mb4,
2764  my_strntoull_mb2_or_mb4,
2765  my_strntod_mb2_or_mb4,
2766  my_strtoll10_utf32,
2767  my_strntoull10rnd_mb2_or_mb4,
2768  my_scan_utf32
2769 };
2770 
2771 
2772 CHARSET_INFO my_charset_utf32_general_ci=
2773 {
2774  60,0,0, /* number */
2775  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2776  "utf32", /* cs name */
2777  "utf32_general_ci", /* name */
2778  "UTF-32 Unicode", /* comment */
2779  NULL, /* tailoring */
2780  NULL, /* ctype */
2781  NULL, /* to_lower */
2782  NULL, /* to_upper */
2783  NULL, /* sort_order */
2784  NULL, /* uca */
2785  NULL, /* tab_to_uni */
2786  NULL, /* tab_from_uni */
2787  &my_unicase_default, /* caseinfo */
2788  NULL, /* state_map */
2789  NULL, /* ident_map */
2790  1, /* strxfrm_multiply */
2791  1, /* caseup_multiply */
2792  1, /* casedn_multiply */
2793  4, /* mbminlen */
2794  4, /* mbmaxlen */
2795  0, /* min_sort_char */
2796  0xFFFF, /* max_sort_char */
2797  ' ', /* pad char */
2798  0, /* escape_with_backslash_is_dangerous */
2799  1, /* levels_for_compare */
2800  1, /* levels_for_order */
2801  &my_charset_utf32_handler,
2802  &my_collation_utf32_general_ci_handler
2803 };
2804 
2805 
2806 CHARSET_INFO my_charset_utf32_bin=
2807 {
2808  61,0,0, /* number */
2809  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2810  "utf32", /* cs name */
2811  "utf32_bin", /* name */
2812  "UTF-32 Unicode", /* comment */
2813  NULL, /* tailoring */
2814  NULL, /* ctype */
2815  NULL, /* to_lower */
2816  NULL, /* to_upper */
2817  NULL, /* sort_order */
2818  NULL, /* uca */
2819  NULL, /* tab_to_uni */
2820  NULL, /* tab_from_uni */
2821  &my_unicase_default, /* caseinfo */
2822  NULL, /* state_map */
2823  NULL, /* ident_map */
2824  1, /* strxfrm_multiply */
2825  1, /* caseup_multiply */
2826  1, /* casedn_multiply */
2827  4, /* mbminlen */
2828  4, /* mbmaxlen */
2829  0, /* min_sort_char */
2830  0xFFFF, /* max_sort_char */
2831  ' ', /* pad char */
2832  0, /* escape_with_backslash_is_dangerous */
2833  1, /* levels_for_compare */
2834  1, /* levels_for_order */
2835  &my_charset_utf32_handler,
2836  &my_collation_utf32_bin_handler
2837 };
2838 
2839 
2840 #endif /* HAVE_CHARSET_utf32 */
2841 
2842 
2843 #ifdef HAVE_CHARSET_ucs2
2844 
2845 static uchar ctype_ucs2[] = {
2846  0,
2847  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
2848  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2849  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
2850  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
2851  16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2852  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
2853  16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2854  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
2855  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2856  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2857  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2858  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2859  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2860  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2861  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2862  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2863 };
2864 
2865 static uchar to_lower_ucs2[] = {
2866  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2867  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2868  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2869  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2870  64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2871  112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
2872  96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2873  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
2874  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2875  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2876  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2877  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2878  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2879  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2880  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2881  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2882 };
2883 
2884 static uchar to_upper_ucs2[] = {
2885  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2886  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2887  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2888  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2889  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2890  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
2891  96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2892  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
2893  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2894  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2895  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2896  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2897  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2898  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2899  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2900  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2901 };
2902 
2903 
2904 static int my_ucs2_uni(const CHARSET_INFO *cs __attribute__((unused)),
2905  my_wc_t * pwc, const uchar *s, const uchar *e)
2906 {
2907  if (s+2 > e) /* Need 2 characters */
2908  return MY_CS_TOOSMALL2;
2909 
2910  *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
2911  return 2;
2912 }
2913 
2914 static int my_uni_ucs2(const CHARSET_INFO *cs __attribute__((unused)) ,
2915  my_wc_t wc, uchar *r, uchar *e)
2916 {
2917  if ( r+2 > e )
2918  return MY_CS_TOOSMALL2;
2919 
2920  if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
2921  return MY_CS_ILUNI;
2922 
2923  r[0]= (uchar) (wc >> 8);
2924  r[1]= (uchar) (wc & 0xFF);
2925  return 2;
2926 }
2927 
2928 
2929 static inline void
2930 my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2931 {
2933  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2934  *wc= page[*wc & 0xFF].tolower;
2935 }
2936 
2937 
2938 static inline void
2939 my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2940 {
2942  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2943  *wc= page[*wc & 0xFF].toupper;
2944 }
2945 
2946 
2947 static inline void
2948 my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2949 {
2951  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2952  *wc= page[*wc & 0xFF].sort;
2953 }
2954 
2955 
2956 static size_t my_caseup_ucs2(const CHARSET_INFO *cs, char *src, size_t srclen,
2957  char *dst __attribute__((unused)),
2958  size_t dstlen __attribute__((unused)))
2959 {
2960  my_wc_t wc;
2961  int res;
2962  char *srcend= src + srclen;
2963  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2964  DBUG_ASSERT(src == dst && srclen == dstlen);
2965 
2966  while ((src < srcend) &&
2967  (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
2968  {
2969  my_toupper_ucs2(uni_plane, &wc);
2970  if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
2971  break;
2972  src+= res;
2973  }
2974  return srclen;
2975 }
2976 
2977 
2978 static void my_hash_sort_ucs2(const CHARSET_INFO *cs, const uchar *s,
2979  size_t slen, ulong *n1, ulong *n2)
2980 {
2981  my_wc_t wc;
2982  int res;
2983  const uchar *e=s+slen;
2984  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2985 
2986  while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
2987  e-= 2;
2988 
2989  while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
2990  {
2991  my_tosort_ucs2(uni_plane, &wc);
2992  n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
2993  n2[0]+=3;
2994  n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
2995  n2[0]+=3;
2996  s+=res;
2997  }
2998 }
2999 
3000 
3001 static size_t my_casedn_ucs2(const CHARSET_INFO *cs, char *src, size_t srclen,
3002  char *dst __attribute__((unused)),
3003  size_t dstlen __attribute__((unused)))
3004 {
3005  my_wc_t wc;
3006  int res;
3007  char *srcend= src + srclen;
3008  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3009  DBUG_ASSERT(src == dst && srclen == dstlen);
3010 
3011  while ((src < srcend) &&
3012  (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
3013  {
3014  my_tolower_ucs2(uni_plane, &wc);
3015  if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3016  break;
3017  src+= res;
3018  }
3019  return srclen;
3020 }
3021 
3022 
3023 static void
3024 my_fill_ucs2(const CHARSET_INFO *cs __attribute__((unused)),
3025  char *s, size_t l, int fill)
3026 {
3027  DBUG_ASSERT(fill <= 0xFFFF);
3028  for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
3029 }
3030 
3031 
3032 static int my_strnncoll_ucs2(const CHARSET_INFO *cs,
3033  const uchar *s, size_t slen,
3034  const uchar *t, size_t tlen,
3035  my_bool t_is_prefix)
3036 {
3037  int s_res,t_res;
3038  my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
3039  const uchar *se=s+slen;
3040  const uchar *te=t+tlen;
3041  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3042 
3043  while ( s < se && t < te )
3044  {
3045  s_res=my_ucs2_uni(cs,&s_wc, s, se);
3046  t_res=my_ucs2_uni(cs,&t_wc, t, te);
3047 
3048  if ( s_res <= 0 || t_res <= 0 )
3049  {
3050  /* Incorrect string, compare by char value */
3051  return ((int)s[0]-(int)t[0]);
3052  }
3053 
3054  my_tosort_ucs2(uni_plane, &s_wc);
3055  my_tosort_ucs2(uni_plane, &t_wc);
3056 
3057  if ( s_wc != t_wc )
3058  {
3059  return s_wc > t_wc ? 1 : -1;
3060  }
3061 
3062  s+=s_res;
3063  t+=t_res;
3064  }
3065  return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3066 }
3067 
3068 /*
3069  Compare strings, discarding end space
3070 
3071  SYNOPSIS
3072  my_strnncollsp_ucs2()
3073  cs character set handler
3074  a First string to compare
3075  a_length Length of 'a'
3076  b Second string to compare
3077  b_length Length of 'b'
3078 
3079  IMPLEMENTATION
3080  If one string is shorter as the other, then we space extend the other
3081  so that the strings have equal length.
3082 
3083  This will ensure that the following things hold:
3084 
3085  "a" == "a "
3086  "a\0" < "a"
3087  "a\0" < "a "
3088 
3089  RETURN
3090  < 0 a < b
3091  = 0 a == b
3092  > 0 a > b
3093 */
3094 
3095 static int my_strnncollsp_ucs2(const CHARSET_INFO *cs __attribute__((unused)),
3096  const uchar *s, size_t slen,
3097  const uchar *t, size_t tlen,
3098  my_bool diff_if_only_endspace_difference
3099  __attribute__((unused)))
3100 {
3101  const uchar *se, *te;
3102  size_t minlen;
3103  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3104 
3105  /* extra safety to make sure the lengths are even numbers */
3106  slen&= ~1;
3107  tlen&= ~1;
3108 
3109  se= s + slen;
3110  te= t + tlen;
3111 
3112  for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
3113  {
3114  int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort :
3115  (((int) s[0]) << 8) + (int) s[1];
3116 
3117  int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort :
3118  (((int) t[0]) << 8) + (int) t[1];
3119  if ( s_wc != t_wc )
3120  return s_wc > t_wc ? 1 : -1;
3121 
3122  s+= 2;
3123  t+= 2;
3124  }
3125 
3126  if (slen != tlen)
3127  {
3128  int swap= 1;
3129  if (slen < tlen)
3130  {
3131  s= t;
3132  se= te;
3133  swap= -1;
3134  }
3135 
3136  for ( ; s < se ; s+= 2)
3137  {
3138  if (s[0] || s[1] != ' ')
3139  return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
3140  }
3141  }
3142  return 0;
3143 }
3144 
3145 
3146 static uint my_ismbchar_ucs2(const CHARSET_INFO *cs __attribute__((unused)),
3147  const char *b __attribute__((unused)),
3148  const char *e __attribute__((unused)))
3149 {
3150  return 2;
3151 }
3152 
3153 
3154 static uint my_mbcharlen_ucs2(const CHARSET_INFO *cs __attribute__((unused)) ,
3155  uint c __attribute__((unused)))
3156 {
3157  return 2;
3158 }
3159 
3160 
3161 static
3162 size_t my_numchars_ucs2(const CHARSET_INFO *cs __attribute__((unused)),
3163  const char *b, const char *e)
3164 {
3165  return (size_t) (e-b)/2;
3166 }
3167 
3168 
3169 static
3170 size_t my_charpos_ucs2(const CHARSET_INFO *cs __attribute__((unused)),
3171  const char *b __attribute__((unused)),
3172  const char *e __attribute__((unused)),
3173  size_t pos)
3174 {
3175  size_t string_length= (size_t) (e - b);
3176  return pos > string_length ? string_length + 2 : pos * 2;
3177 }
3178 
3179 
3180 static
3181 size_t my_well_formed_len_ucs2(const CHARSET_INFO *cs __attribute__((unused)),
3182  const char *b, const char *e,
3183  size_t nchars, int *error)
3184 {
3185  /* Ensure string length is dividable with 2 */
3186  size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
3187  *error= 0;
3188  nchars*= 2;
3189  return MY_MIN(nbytes, nchars);
3190 }
3191 
3192 
3193 static
3194 int my_wildcmp_ucs2_ci(const CHARSET_INFO *cs,
3195  const char *str,const char *str_end,
3196  const char *wildstr,const char *wildend,
3197  int escape, int w_one, int w_many)
3198 {
3199  MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3200  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3201  escape,w_one,w_many,uni_plane);
3202 }
3203 
3204 
3205 static
3206 int my_wildcmp_ucs2_bin(const CHARSET_INFO *cs,
3207  const char *str,const char *str_end,
3208  const char *wildstr,const char *wildend,
3209  int escape, int w_one, int w_many)
3210 {
3211  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3212  escape,w_one,w_many,NULL);
3213 }
3214 
3215 
3216 static
3217 int my_strnncoll_ucs2_bin(const CHARSET_INFO *cs,
3218  const uchar *s, size_t slen,
3219  const uchar *t, size_t tlen,
3220  my_bool t_is_prefix)
3221 {
3222  int s_res,t_res;
3223  my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
3224  const uchar *se=s+slen;
3225  const uchar *te=t+tlen;
3226 
3227  while ( s < se && t < te )
3228  {
3229  s_res=my_ucs2_uni(cs,&s_wc, s, se);
3230  t_res=my_ucs2_uni(cs,&t_wc, t, te);
3231 
3232  if ( s_res <= 0 || t_res <= 0 )
3233  {
3234  /* Incorrect string, compare by char value */
3235  return ((int)s[0]-(int)t[0]);
3236  }
3237  if ( s_wc != t_wc )
3238  {
3239  return s_wc > t_wc ? 1 : -1;
3240  }
3241 
3242  s+=s_res;
3243  t+=t_res;
3244  }
3245  return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3246 }
3247 
3248 static int my_strnncollsp_ucs2_bin(const CHARSET_INFO *cs
3249  __attribute__((unused)),
3250  const uchar *s, size_t slen,
3251  const uchar *t, size_t tlen,
3252  my_bool diff_if_only_endspace_difference
3253  __attribute__((unused)))
3254 {
3255  const uchar *se, *te;
3256  size_t minlen;
3257 
3258  /* extra safety to make sure the lengths are even numbers */
3259  slen= (slen >> 1) << 1;
3260  tlen= (tlen >> 1) << 1;
3261 
3262  se= s + slen;
3263  te= t + tlen;
3264 
3265  for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
3266  {
3267  int s_wc= s[0] * 256 + s[1];
3268  int t_wc= t[0] * 256 + t[1];
3269  if ( s_wc != t_wc )
3270  return s_wc > t_wc ? 1 : -1;
3271 
3272  s+= 2;
3273  t+= 2;
3274  }
3275 
3276  if (slen != tlen)
3277  {
3278  int swap= 1;
3279  if (slen < tlen)
3280  {
3281  s= t;
3282  se= te;
3283  swap= -1;
3284  }
3285 
3286  for ( ; s < se ; s+= 2)
3287  {
3288  if (s[0] || s[1] != ' ')
3289  return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
3290  }
3291  }
3292  return 0;
3293 }
3294 
3295 
3296 static
3297 void my_hash_sort_ucs2_bin(const CHARSET_INFO *cs __attribute__((unused)),
3298  const uchar *key, size_t len,ulong *nr1, ulong *nr2)
3299 {
3300  const uchar *pos = key;
3301 
3302  key+= len;
3303 
3304  while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0')
3305  key-= 2;
3306 
3307  for (; pos < (uchar*) key ; pos++)
3308  {
3309  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
3310  ((uint)*pos)) + (nr1[0] << 8);
3311  nr2[0]+=3;
3312  }
3313 }
3314 
3315 
3316 static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
3317 {
3318  NULL, /* init */
3319  my_strnncoll_ucs2,
3320  my_strnncollsp_ucs2,
3321  my_strnxfrm_unicode,
3322  my_strnxfrmlen_simple,
3323  my_like_range_generic,
3324  my_wildcmp_ucs2_ci,
3325  my_strcasecmp_mb2_or_mb4,
3326  my_instr_mb,
3327  my_hash_sort_ucs2,
3328  my_propagate_simple
3329 };
3330 
3331 
3332 static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
3333 {
3334  NULL, /* init */
3335  my_strnncoll_ucs2_bin,
3336  my_strnncollsp_ucs2_bin,
3337  my_strnxfrm_unicode,
3338  my_strnxfrmlen_simple,
3339  my_like_range_generic,
3340  my_wildcmp_ucs2_bin,
3341  my_strcasecmp_mb2_or_mb4,
3342  my_instr_mb,
3343  my_hash_sort_ucs2_bin,
3344  my_propagate_simple
3345 };
3346 
3347 
3348 MY_CHARSET_HANDLER my_charset_ucs2_handler=
3349 {
3350  NULL, /* init */
3351  my_ismbchar_ucs2, /* ismbchar */
3352  my_mbcharlen_ucs2, /* mbcharlen */
3353  my_numchars_ucs2,
3354  my_charpos_ucs2,
3355  my_well_formed_len_ucs2,
3356  my_lengthsp_mb2,
3357  my_numcells_mb,
3358  my_ucs2_uni, /* mb_wc */
3359  my_uni_ucs2, /* wc_mb */
3360  my_mb_ctype_mb,
3361  my_caseup_str_mb2_or_mb4,
3362  my_casedn_str_mb2_or_mb4,
3363  my_caseup_ucs2,
3364  my_casedn_ucs2,
3365  my_snprintf_mb2,
3366  my_l10tostr_mb2_or_mb4,
3367  my_ll10tostr_mb2_or_mb4,
3368  my_fill_ucs2,
3369  my_strntol_mb2_or_mb4,
3370  my_strntoul_mb2_or_mb4,
3371  my_strntoll_mb2_or_mb4,
3372  my_strntoull_mb2_or_mb4,
3373  my_strntod_mb2_or_mb4,
3374  my_strtoll10_mb2,
3375  my_strntoull10rnd_mb2_or_mb4,
3376  my_scan_mb2
3377 };
3378 
3379 
3380 CHARSET_INFO my_charset_ucs2_general_ci=
3381 {
3382  35,0,0, /* number */
3383  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3384  "ucs2", /* cs name */
3385  "ucs2_general_ci", /* name */
3386  "", /* comment */
3387  NULL, /* tailoring */
3388  ctype_ucs2, /* ctype */
3389  to_lower_ucs2, /* to_lower */
3390  to_upper_ucs2, /* to_upper */
3391  to_upper_ucs2, /* sort_order */
3392  NULL, /* uca */
3393  NULL, /* tab_to_uni */
3394  NULL, /* tab_from_uni */
3395  &my_unicase_default,/* caseinfo */
3396  NULL, /* state_map */
3397  NULL, /* ident_map */
3398  1, /* strxfrm_multiply */
3399  1, /* caseup_multiply */
3400  1, /* casedn_multiply */
3401  2, /* mbminlen */
3402  2, /* mbmaxlen */
3403  0, /* min_sort_char */
3404  0xFFFF, /* max_sort_char */
3405  ' ', /* pad char */
3406  0, /* escape_with_backslash_is_dangerous */
3407  1, /* levels_for_compare */
3408  1, /* levels_for_order */
3409  &my_charset_ucs2_handler,
3410  &my_collation_ucs2_general_ci_handler
3411 };
3412 
3413 
3414 CHARSET_INFO my_charset_ucs2_general_mysql500_ci=
3415 {
3416  159, 0, 0, /* number */
3417  MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
3418  "ucs2", /* cs name */
3419  "ucs2_general_mysql500_ci", /* name */
3420  "", /* comment */
3421  NULL, /* tailoring */
3422  ctype_ucs2, /* ctype */
3423  to_lower_ucs2, /* to_lower */
3424  to_upper_ucs2, /* to_upper */
3425  to_upper_ucs2, /* sort_order */
3426  NULL, /* uca */
3427  NULL, /* tab_to_uni */
3428  NULL, /* tab_from_uni */
3429  &my_unicase_mysql500, /* caseinfo */
3430  NULL, /* state_map */
3431  NULL, /* ident_map */
3432  1, /* strxfrm_multiply */
3433  1, /* caseup_multiply */
3434  1, /* casedn_multiply */
3435  2, /* mbminlen */
3436  2, /* mbmaxlen */
3437  0, /* min_sort_char */
3438  0xFFFF, /* max_sort_char */
3439  ' ', /* pad char */
3440  0, /* escape_with_backslash_is_dangerous */
3441  1, /* levels_for_compare */
3442  1, /* levels_for_order */
3443  &my_charset_ucs2_handler,
3444  &my_collation_ucs2_general_ci_handler
3445 };
3446 
3447 
3448 CHARSET_INFO my_charset_ucs2_bin=
3449 {
3450  90,0,0, /* number */
3451  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
3452  "ucs2", /* cs name */
3453  "ucs2_bin", /* name */
3454  "", /* comment */
3455  NULL, /* tailoring */
3456  ctype_ucs2, /* ctype */
3457  to_lower_ucs2, /* to_lower */
3458  to_upper_ucs2, /* to_upper */
3459  NULL, /* sort_order */
3460  NULL, /* uca */
3461  NULL, /* tab_to_uni */
3462  NULL, /* tab_from_uni */
3463  &my_unicase_default,/* caseinfo */
3464  NULL, /* state_map */
3465  NULL, /* ident_map */
3466  1, /* strxfrm_multiply */
3467  1, /* caseup_multiply */
3468  1, /* casedn_multiply */
3469  2, /* mbminlen */
3470  2, /* mbmaxlen */
3471  0, /* min_sort_char */
3472  0xFFFF, /* max_sort_char */
3473  ' ', /* pad char */
3474  0, /* escape_with_backslash_is_dangerous */
3475  1, /* levels_for_compare */
3476  1, /* levels_for_order */
3477  &my_charset_ucs2_handler,
3478  &my_collation_ucs2_bin_handler
3479 };
3480 
3481 
3482 #endif /* HAVE_CHARSET_ucs2 */