MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ctype-bin.c
1 /* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. & tommy@valley.ne.jp.
2 
3  This library is free software; you can redistribute it and/or
4  modify it under the terms of the GNU Library General Public
5  License as published by the Free Software Foundation; version 2
6  of the License.
7 
8  This library is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  Library General Public License for more details.
12 
13  You should have received a copy of the GNU Library General Public
14  License along with this library; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16 
17 /* This file is for binary pseudo charset, created by bar@mysql.com */
18 
19 
20 #include <my_global.h>
21 #include "m_string.h"
22 #include "m_ctype.h"
23 
24 static uchar ctype_bin[]=
25 {
26  0,
27  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
28  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
29  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
30  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
31  16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
32  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
33  16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
35  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 };
44 
45 
46 /* Dummy array for toupper / tolower / sortorder */
47 
48 static uchar bin_char_array[] =
49 {
50  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
51  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
52  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
53  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
54  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
55  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
56  96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
57  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
58  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
59  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
60  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
61  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
62  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
63  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
64  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
65  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
66 };
67 
68 
69 static my_bool
70 my_coll_init_8bit_bin(CHARSET_INFO *cs,
71  MY_CHARSET_LOADER *loader __attribute__((unused)))
72 {
73  cs->max_sort_char=255;
74  return FALSE;
75 }
76 
77 static int my_strnncoll_binary(const CHARSET_INFO *cs __attribute__((unused)),
78  const uchar *s, size_t slen,
79  const uchar *t, size_t tlen,
80  my_bool t_is_prefix)
81 {
82  size_t len= MY_MIN(slen,tlen);
83  int cmp= memcmp(s,t,len);
84  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
85 }
86 
87 
88 size_t my_lengthsp_binary(const CHARSET_INFO *cs __attribute__((unused)),
89  const char *ptr __attribute__((unused)),
90  size_t length)
91 {
92  return length;
93 }
94 
95 
96 /*
97  Compare two strings. Result is sign(first_argument - second_argument)
98 
99  SYNOPSIS
100  my_strnncollsp_binary()
101  cs Chararacter set
102  s String to compare
103  slen Length of 's'
104  t String to compare
105  tlen Length of 't'
106 
107  NOTE
108  This function is used for real binary strings, i.e. for
109  BLOB, BINARY(N) and VARBINARY(N).
110  It compares trailing spaces as spaces.
111 
112  RETURN
113  < 0 s < t
114  0 s == t
115  > 0 s > t
116 */
117 
118 static int my_strnncollsp_binary(const CHARSET_INFO *cs
119  __attribute__((unused)),
120  const uchar *s, size_t slen,
121  const uchar *t, size_t tlen,
122  my_bool diff_if_only_endspace_difference
123  __attribute__((unused)))
124 {
125  return my_strnncoll_binary(cs,s,slen,t,tlen,0);
126 }
127 
128 
129 static int my_strnncoll_8bit_bin(const CHARSET_INFO *cs
130  __attribute__((unused)),
131  const uchar *s, size_t slen,
132  const uchar *t, size_t tlen,
133  my_bool t_is_prefix)
134 {
135  size_t len=MY_MIN(slen,tlen);
136  int cmp= memcmp(s,t,len);
137  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
138 }
139 
140 
141 /*
142  Compare two strings. Result is sign(first_argument - second_argument)
143 
144  SYNOPSIS
145  my_strnncollsp_8bit_bin()
146  cs Chararacter set
147  s String to compare
148  slen Length of 's'
149  t String to compare
150  tlen Length of 't'
151  diff_if_only_endspace_difference
152  Set to 1 if the strings should be regarded as different
153  if they only difference in end space
154 
155  NOTE
156  This function is used for character strings with binary collations.
157  The shorter string is extended with end space to be as long as the longer
158  one.
159 
160  RETURN
161  < 0 s < t
162  0 s == t
163  > 0 s > t
164 */
165 
166 static int my_strnncollsp_8bit_bin(const CHARSET_INFO *cs
167  __attribute__((unused)),
168  const uchar *a, size_t a_length,
169  const uchar *b, size_t b_length,
170  my_bool diff_if_only_endspace_difference)
171 {
172  const uchar *end;
173  size_t length;
174  int res;
175 
176 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
177  diff_if_only_endspace_difference= 0;
178 #endif
179 
180  end= a + (length= MY_MIN(a_length, b_length));
181  while (a < end)
182  {
183  if (*a++ != *b++)
184  return ((int) a[-1] - (int) b[-1]);
185  }
186  res= 0;
187  if (a_length != b_length)
188  {
189  int swap= 1;
190  /*
191  Check the next not space character of the longer key. If it's < ' ',
192  then it's smaller than the other key.
193  */
194  if (diff_if_only_endspace_difference)
195  res= 1; /* Assume 'a' is bigger */
196  if (a_length < b_length)
197  {
198  /* put shorter key in s */
199  a_length= b_length;
200  a= b;
201  swap= -1; /* swap sign of result */
202  res= -res;
203  }
204  for (end= a + a_length-length; a < end ; a++)
205  {
206  if (*a != ' ')
207  return (*a < ' ') ? -swap : swap;
208  }
209  }
210  return res;
211 }
212 
213 
214 /* This function is used for all conversion functions */
215 
216 static size_t my_case_str_bin(const CHARSET_INFO *cs __attribute__((unused)),
217  char *str __attribute__((unused)))
218 {
219  return 0;
220 }
221 
222 
223 static size_t my_case_bin(const CHARSET_INFO *cs __attribute__((unused)),
224  char *src __attribute__((unused)),
225  size_t srclen,
226  char *dst __attribute__((unused)),
227  size_t dstlen __attribute__((unused)))
228 {
229  return srclen;
230 }
231 
232 
233 static int my_strcasecmp_bin(const CHARSET_INFO *cs __attribute__((unused)),
234  const char *s, const char *t)
235 {
236  return strcmp(s,t);
237 }
238 
239 
240 uint my_mbcharlen_8bit(const CHARSET_INFO *cs __attribute__((unused)),
241  uint c __attribute__((unused)))
242 {
243  return 1;
244 }
245 
246 
247 static int my_mb_wc_bin(const CHARSET_INFO *cs __attribute__((unused)),
248  my_wc_t *wc,
249  const uchar *str,
250  const uchar *end __attribute__((unused)))
251 {
252  if (str >= end)
253  return MY_CS_TOOSMALL;
254 
255  *wc=str[0];
256  return 1;
257 }
258 
259 
260 static int my_wc_mb_bin(const CHARSET_INFO *cs __attribute__((unused)),
261  my_wc_t wc,
262  uchar *s,
263  uchar *e __attribute__((unused)))
264 {
265  if (s >= e)
266  return MY_CS_TOOSMALL;
267 
268  if (wc < 256)
269  {
270  s[0]= (char) wc;
271  return 1;
272  }
273  return MY_CS_ILUNI;
274 }
275 
276 
277 void my_hash_sort_8bit_bin(const CHARSET_INFO *cs __attribute__((unused)),
278  const uchar *key, size_t len,
279  ulong *nr1, ulong *nr2)
280 {
281  const uchar *pos = key;
282 
283  /*
284  Remove trailing spaces. We have to do this to be able to compare
285  'A ' and 'A' as identical
286  */
287  key= skip_trailing_space(key, len);
288 
289  for (; pos < (uchar*) key ; pos++)
290  {
291  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
292  ((uint)*pos)) + (nr1[0] << 8);
293  nr2[0]+=3;
294  }
295 }
296 
297 
298 void my_hash_sort_bin(const CHARSET_INFO *cs __attribute__((unused)),
299  const uchar *key, size_t len,ulong *nr1, ulong *nr2)
300 {
301  const uchar *pos = key;
302 
303  key+= len;
304 
305  for (; pos < (uchar*) key ; pos++)
306  {
307  nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
308  ((uint)*pos)) + (nr1[0] << 8);
309  nr2[0]+=3;
310  }
311 }
312 
313 
314 /*
315  The following defines is here to keep the following code identical to
316  the one in ctype-simple.c
317 */
318 
319 #define likeconv(s,A) (A)
320 #define INC_PTR(cs,A,B) (A)++
321 
322 
323 static
324 int my_wildcmp_bin_impl(const CHARSET_INFO *cs,
325  const char *str,const char *str_end,
326  const char *wildstr,const char *wildend,
327  int escape, int w_one, int w_many, int recurse_level)
328 {
329  int result= -1; /* Not found, using wildcards */
330 
331  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
332  return 1;
333  while (wildstr != wildend)
334  {
335  while (*wildstr != w_many && *wildstr != w_one)
336  {
337  if (*wildstr == escape && wildstr+1 != wildend)
338  wildstr++;
339  if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
340  return(1); /* No match */
341  if (wildstr == wildend)
342  return(str != str_end); /* Match if both are at end */
343  result=1; /* Found an anchor char */
344  }
345  if (*wildstr == w_one)
346  {
347  do
348  {
349  if (str == str_end) /* Skip one char if possible */
350  return(result);
351  INC_PTR(cs,str,str_end);
352  } while (++wildstr < wildend && *wildstr == w_one);
353  if (wildstr == wildend)
354  break;
355  }
356  if (*wildstr == w_many)
357  { /* Found w_many */
358  uchar cmp;
359  wildstr++;
360  /* Remove any '%' and '_' from the wild search string */
361  for (; wildstr != wildend ; wildstr++)
362  {
363  if (*wildstr == w_many)
364  continue;
365  if (*wildstr == w_one)
366  {
367  if (str == str_end)
368  return(-1);
369  INC_PTR(cs,str,str_end);
370  continue;
371  }
372  break; /* Not a wild character */
373  }
374  if (wildstr == wildend)
375  return(0); /* match if w_many is last */
376  if (str == str_end)
377  return(-1);
378 
379  if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
380  cmp= *++wildstr;
381 
382  INC_PTR(cs,wildstr,wildend); /* This is compared through cmp */
383  cmp=likeconv(cs,cmp);
384  do
385  {
386  while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
387  str++;
388  if (str++ == str_end)
389  return(-1);
390  {
391  int tmp=my_wildcmp_bin_impl(cs,str,str_end,
392  wildstr,wildend,escape,
393  w_one, w_many, recurse_level + 1);
394  if (tmp <= 0)
395  return(tmp);
396  }
397  } while (str != str_end && wildstr[0] != w_many);
398  return(-1);
399  }
400  }
401  return(str != str_end ? 1 : 0);
402 }
403 
404 int my_wildcmp_bin(const CHARSET_INFO *cs,
405  const char *str,const char *str_end,
406  const char *wildstr,const char *wildend,
407  int escape, int w_one, int w_many)
408 {
409  return my_wildcmp_bin_impl(cs, str, str_end,
410  wildstr, wildend,
411  escape, w_one, w_many, 1);
412 }
413 
414 
415 static size_t
416 my_strnxfrm_8bit_bin(const CHARSET_INFO *cs,
417  uchar * dst, size_t dstlen, uint nweights,
418  const uchar *src, size_t srclen, uint flags)
419 {
420  set_if_smaller(srclen, dstlen);
421  set_if_smaller(srclen, nweights);
422  if (dst != src)
423  memcpy(dst, src, srclen);
424  return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
425  nweights - srclen, flags, 0);
426 }
427 
428 
429 static
430 uint my_instr_bin(const CHARSET_INFO *cs __attribute__((unused)),
431  const char *b, size_t b_length,
432  const char *s, size_t s_length,
433  my_match_t *match, uint nmatch)
434 {
435  register const uchar *str, *search, *end, *search_end;
436 
437  if (s_length <= b_length)
438  {
439  if (!s_length)
440  {
441  if (nmatch)
442  {
443  match->beg= 0;
444  match->end= 0;
445  match->mb_len= 0;
446  }
447  return 1; /* Empty string is always found */
448  }
449 
450  str= (const uchar*) b;
451  search= (const uchar*) s;
452  end= (const uchar*) b+b_length-s_length+1;
453  search_end= (const uchar*) s + s_length;
454 
455 skip:
456  while (str != end)
457  {
458  if ( (*str++) == (*search))
459  {
460  register const uchar *i,*j;
461 
462  i= str;
463  j= search+1;
464 
465  while (j != search_end)
466  if ((*i++) != (*j++))
467  goto skip;
468 
469  if (nmatch > 0)
470  {
471  match[0].beg= 0;
472  match[0].end= (size_t) (str- (const uchar*)b-1);
473  match[0].mb_len= match[0].end;
474 
475  if (nmatch > 1)
476  {
477  match[1].beg= match[0].end;
478  match[1].end= match[0].end+s_length;
479  match[1].mb_len= match[1].end-match[1].beg;
480  }
481  }
482  return 2;
483  }
484  }
485  }
486  return 0;
487 }
488 
489 
490 MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
491 {
492  my_coll_init_8bit_bin,
493  my_strnncoll_8bit_bin,
494  my_strnncollsp_8bit_bin,
495  my_strnxfrm_8bit_bin,
496  my_strnxfrmlen_simple,
497  my_like_range_simple,
498  my_wildcmp_bin,
499  my_strcasecmp_bin,
500  my_instr_bin,
501  my_hash_sort_8bit_bin,
502  my_propagate_simple
503 };
504 
505 
506 static MY_COLLATION_HANDLER my_collation_binary_handler =
507 {
508  NULL, /* init */
509  my_strnncoll_binary,
510  my_strnncollsp_binary,
511  my_strnxfrm_8bit_bin,
512  my_strnxfrmlen_simple,
513  my_like_range_simple,
514  my_wildcmp_bin,
515  my_strcasecmp_bin,
516  my_instr_bin,
517  my_hash_sort_bin,
518  my_propagate_simple
519 };
520 
521 
522 static MY_CHARSET_HANDLER my_charset_handler=
523 {
524  NULL, /* init */
525  NULL, /* ismbchar */
526  my_mbcharlen_8bit, /* mbcharlen */
527  my_numchars_8bit,
528  my_charpos_8bit,
529  my_well_formed_len_8bit,
530  my_lengthsp_binary,
531  my_numcells_8bit,
532  my_mb_wc_bin,
533  my_wc_mb_bin,
534  my_mb_ctype_8bit,
535  my_case_str_bin,
536  my_case_str_bin,
537  my_case_bin,
538  my_case_bin,
539  my_snprintf_8bit,
540  my_long10_to_str_8bit,
541  my_longlong10_to_str_8bit,
542  my_fill_8bit,
543  my_strntol_8bit,
544  my_strntoul_8bit,
545  my_strntoll_8bit,
546  my_strntoull_8bit,
547  my_strntod_8bit,
548  my_strtoll10_8bit,
549  my_strntoull10rnd_8bit,
550  my_scan_8bit
551 };
552 
553 
554 CHARSET_INFO my_charset_bin =
555 {
556  63,0,0, /* number */
557  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
558  "binary", /* cs name */
559  "binary", /* name */
560  "", /* comment */
561  NULL, /* tailoring */
562  ctype_bin, /* ctype */
563  bin_char_array, /* to_lower */
564  bin_char_array, /* to_upper */
565  NULL, /* sort_order */
566  NULL, /* uca */
567  NULL, /* tab_to_uni */
568  NULL, /* tab_from_uni */
569  &my_unicase_default, /* caseinfo */
570  NULL, /* state_map */
571  NULL, /* ident_map */
572  1, /* strxfrm_multiply */
573  1, /* caseup_multiply */
574  1, /* casedn_multiply */
575  1, /* mbminlen */
576  1, /* mbmaxlen */
577  0, /* min_sort_char */
578  255, /* max_sort_char */
579  0, /* pad char */
580  0, /* escape_with_backslash_is_dangerous */
581  1, /* levels_for_compare */
582  1, /* levels_for_order */
583  &my_charset_handler,
584  &my_collation_binary_handler
585 };