MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
m_ctype.h
1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /*
17  A better inplementation of the UNIX ctype(3) library.
18 */
19 
20 #ifndef _m_ctype_h
21 #define _m_ctype_h
22 
23 #include <my_attribute.h>
24 #include "my_global.h" /* uint16, uchar */
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
30 #define MY_CS_NAME_SIZE 32
31 #define MY_CS_CTYPE_TABLE_SIZE 257
32 #define MY_CS_TO_LOWER_TABLE_SIZE 256
33 #define MY_CS_TO_UPPER_TABLE_SIZE 256
34 #define MY_CS_SORT_ORDER_TABLE_SIZE 256
35 #define MY_CS_TO_UNI_TABLE_SIZE 256
36 
37 #define CHARSET_DIR "charsets/"
38 
39 #define my_wc_t ulong
40 
41 #define MY_CS_REPLACEMENT_CHARACTER 0xFFFD
42 
43 /*
44  On i386 we store Unicode->CS conversion tables for
45  some character sets using Big-endian order,
46  to copy two bytes at onces.
47  This gives some performance improvement.
48 */
49 #ifdef __i386__
50 #define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8))
51 #define MY_PUT_MB2(s, code) { *((uint16*)(s))= (code); }
52 #else
53 #define MB2(x) (x)
54 #define MY_PUT_MB2(s, code) { (s)[0]= code >> 8; (s)[1]= code & 0xFF; }
55 #endif
56 
57 
58 
59 typedef struct unicase_info_char_st
60 {
61  uint32 toupper;
62  uint32 tolower;
63  uint32 sort;
65 
66 
67 typedef struct unicase_info_st
68 {
69  my_wc_t maxchar;
70  MY_UNICASE_CHARACTER **page;
72 
73 
74 extern MY_UNICASE_INFO my_unicase_default;
75 extern MY_UNICASE_INFO my_unicase_turkish;
76 extern MY_UNICASE_INFO my_unicase_mysql500;
77 extern MY_UNICASE_INFO my_unicase_unicode520;
78 
79 #define MY_UCA_MAX_CONTRACTION 6
80 #define MY_UCA_MAX_WEIGHT_SIZE 8
81 #define MY_UCA_WEIGHT_LEVELS 1
82 
83 typedef struct my_contraction_t
84 {
85  my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */
86  uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */
87  my_bool with_context;
89 
90 
91 
92 typedef struct my_contraction_list_t
93 {
94  size_t nitems; /* Number of items in the list */
95  MY_CONTRACTION *item; /* List of contractions */
96  char *flags; /* Character flags, e.g. "is contraction head") */
98 
99 
100 my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
101 my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
102 uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
103  my_wc_t wc1, my_wc_t wc2);
104 
105 
106 /* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
107 typedef struct my_uca_level_info_st
108 {
109  my_wc_t maxchar;
110  uchar *lengths;
111  uint16 **weights;
112  MY_CONTRACTIONS contractions;
114 
115 
116 typedef struct uca_info_st
117 {
118  MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
119 
120  /* Logical positions */
121  my_wc_t first_non_ignorable;
122  my_wc_t last_non_ignorable;
123  my_wc_t first_primary_ignorable;
124  my_wc_t last_primary_ignorable;
125  my_wc_t first_secondary_ignorable;
126  my_wc_t last_secondary_ignorable;
127  my_wc_t first_tertiary_ignorable;
128  my_wc_t last_tertiary_ignorable;
129  my_wc_t first_trailing;
130  my_wc_t last_trailing;
131  my_wc_t first_variable;
132  my_wc_t last_variable;
133 
134 } MY_UCA_INFO;
135 
136 
137 
138 extern MY_UCA_INFO my_uca_v400;
139 
140 
141 typedef struct uni_ctype_st
142 {
143  uchar pctype;
144  uchar *ctype;
145 } MY_UNI_CTYPE;
146 
147 extern MY_UNI_CTYPE my_uni_ctype[256];
148 
149 /* wm_wc and wc_mb return codes */
150 #define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
151 #define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
152 #define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
153 #define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
154 #define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
155 /* These following three are currently not really used */
156 #define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
157 #define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
158 #define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
159 /* A helper macros for "need at least n bytes" */
160 #define MY_CS_TOOSMALLN(n) (-100-(n))
161 
162 #define MY_SEQ_INTTAIL 1
163 #define MY_SEQ_SPACES 2
164 
165  /* My charsets_list flags */
166 #define MY_CS_COMPILED 1 /* compiled-in sets */
167 #define MY_CS_CONFIG 2 /* sets that have a *.conf file */
168 #define MY_CS_INDEX 4 /* sets listed in the Index file */
169 #define MY_CS_LOADED 8 /* sets that are currently loaded */
170 #define MY_CS_BINSORT 16 /* if binary sort order */
171 #define MY_CS_PRIMARY 32 /* if primary collation */
172 #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
173 #define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
174 #define MY_CS_READY 256 /* if a charset is initialized */
175 #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
176 #define MY_CS_CSSORT 1024 /* if case sensitive sort order */
177 #define MY_CS_HIDDEN 2048 /* don't display in SHOW */
178 #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
179 #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
180 #define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
181 #define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
182 #define MY_CHARSET_UNDEFINED 0
183 
184 /* Character repertoire flags */
185 #define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */
186 #define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
187 #define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
188 
189 /* Flags for strxfrm */
190 #define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
191 #define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
192 #define MY_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */
193 #define MY_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */
194 #define MY_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */
195 #define MY_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */
196 #define MY_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */
197 #define MY_STRXFRM_NLEVELS 6 /* Number of possible levels*/
198 
199 #define MY_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */
200 #define MY_STRXFRM_PAD_TO_MAXLEN 0x00000080 /* if pad tail(for filesort) */
201 
202 #define MY_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */
203 #define MY_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */
204 #define MY_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */
205 #define MY_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */
206 #define MY_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */
207 #define MY_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */
208 #define MY_STRXFRM_DESC_SHIFT 8
209 
210 #define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */
211 #define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */
212 
213 #define MY_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */
214 #define MY_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */
215 #define MY_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */
216 #define MY_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */
217 #define MY_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */
218 #define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
219 #define MY_STRXFRM_REVERSE_SHIFT 16
220 
221 
222 typedef struct my_uni_idx_st
223 {
224  uint16 from;
225  uint16 to;
226  uchar *tab;
227 } MY_UNI_IDX;
228 
229 typedef struct
230 {
231  uint beg;
232  uint end;
233  uint mb_len;
234 } my_match_t;
235 
236 enum my_lex_states
237 {
238  MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
239  MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
240  MY_LEX_REAL, MY_LEX_HEX_NUMBER, MY_LEX_BIN_NUMBER,
241  MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
242  MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
243  MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
244  MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
245  MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
246  MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
247  MY_LEX_IDENT_OR_KEYWORD,
248  MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN, MY_LEX_IDENT_OR_NCHAR,
249  MY_LEX_STRING_OR_DELIMITER
250 };
251 
252 struct charset_info_st;
253 
254 typedef struct my_charset_loader_st
255 {
256  char error[128];
257  void *(*once_alloc)(size_t);
258  void *(*malloc)(size_t);
259  void *(*realloc)(void *, size_t);
260  void (*free)(void *);
261  void (*reporter)(enum loglevel, const char *format, ...);
262  int (*add_collation)(struct charset_info_st *cs);
264 
265 
266 extern int (*my_string_stack_guard)(int);
267 
268 /* See strings/CHARSET_INFO.txt for information about this structure */
270 {
271  my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *);
272  /* Collation routines */
273  int (*strnncoll)(const struct charset_info_st *,
274  const uchar *, size_t, const uchar *, size_t, my_bool);
275  int (*strnncollsp)(const struct charset_info_st *,
276  const uchar *, size_t, const uchar *, size_t,
277  my_bool diff_if_only_endspace_difference);
278  size_t (*strnxfrm)(const struct charset_info_st *,
279  uchar *dst, size_t dstlen, uint nweights,
280  const uchar *src, size_t srclen, uint flags);
281  size_t (*strnxfrmlen)(const struct charset_info_st *, size_t);
282  my_bool (*like_range)(const struct charset_info_st *,
283  const char *s, size_t s_length,
284  pchar w_prefix, pchar w_one, pchar w_many,
285  size_t res_length,
286  char *min_str, char *max_str,
287  size_t *min_len, size_t *max_len);
288  int (*wildcmp)(const struct charset_info_st *,
289  const char *str,const char *str_end,
290  const char *wildstr,const char *wildend,
291  int escape,int w_one, int w_many);
292 
293  int (*strcasecmp)(const struct charset_info_st *, const char *,
294  const char *);
295 
296  uint (*instr)(const struct charset_info_st *,
297  const char *b, size_t b_length,
298  const char *s, size_t s_length,
299  my_match_t *match, uint nmatch);
300 
301  /* Hash calculation */
302  void (*hash_sort)(const struct charset_info_st *cs, const uchar *key,
303  size_t len, ulong *nr1, ulong *nr2);
304  my_bool (*propagate)(const struct charset_info_st *cs, const uchar *str,
305  size_t len);
307 
308 extern MY_COLLATION_HANDLER my_collation_mb_bin_handler;
309 extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
310 extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
311 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
312 
313 /* Some typedef to make it easy for C++ to make function pointers */
314 typedef int (*my_charset_conv_mb_wc)(const struct charset_info_st *,
315  my_wc_t *, const uchar *, const uchar *);
316 typedef int (*my_charset_conv_wc_mb)(const struct charset_info_st *, my_wc_t,
317  uchar *, uchar *);
318 typedef size_t (*my_charset_conv_case)(const struct charset_info_st *,
319  char *, size_t, char *, size_t);
320 
321 
322 /* See strings/CHARSET_INFO.txt about information on this structure */
323 typedef struct my_charset_handler_st
324 {
325  my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
326  /* Multibyte routines */
327  uint (*ismbchar)(const struct charset_info_st *, const char *,
328  const char *);
329  uint (*mbcharlen)(const struct charset_info_st *, uint c);
330  size_t (*numchars)(const struct charset_info_st *, const char *b,
331  const char *e);
332  size_t (*charpos)(const struct charset_info_st *, const char *b,
333  const char *e, size_t pos);
334  size_t (*well_formed_len)(const struct charset_info_st *,
335  const char *b,const char *e,
336  size_t nchars, int *error);
337  size_t (*lengthsp)(const struct charset_info_st *, const char *ptr,
338  size_t length);
339  size_t (*numcells)(const struct charset_info_st *, const char *b,
340  const char *e);
341 
342  /* Unicode conversion */
343  my_charset_conv_mb_wc mb_wc;
344  my_charset_conv_wc_mb wc_mb;
345 
346  /* CTYPE scanner */
347  int (*ctype)(const struct charset_info_st *cs, int *ctype,
348  const uchar *s, const uchar *e);
349 
350  /* Functions for case and sort conversion */
351  size_t (*caseup_str)(const struct charset_info_st *, char *);
352  size_t (*casedn_str)(const struct charset_info_st *, char *);
353 
354  my_charset_conv_case caseup;
355  my_charset_conv_case casedn;
356 
357  /* Charset dependant snprintf() */
358  size_t (*snprintf)(const struct charset_info_st *, char *to, size_t n,
359  const char *fmt,
360  ...) ATTRIBUTE_FORMAT_FPTR(printf, 4, 5);
361  size_t (*long10_to_str)(const struct charset_info_st *, char *to, size_t n,
362  int radix, long int val);
363  size_t (*longlong10_to_str)(const struct charset_info_st *, char *to,
364  size_t n, int radix, longlong val);
365 
366  void (*fill)(const struct charset_info_st *, char *to, size_t len,
367  int fill);
368 
369  /* String-to-number conversion routines */
370  long (*strntol)(const struct charset_info_st *, const char *s,
371  size_t l, int base, char **e, int *err);
372  ulong (*strntoul)(const struct charset_info_st *, const char *s,
373  size_t l, int base, char **e, int *err);
374  longlong (*strntoll)(const struct charset_info_st *, const char *s,
375  size_t l, int base, char **e, int *err);
376  ulonglong (*strntoull)(const struct charset_info_st *, const char *s,
377  size_t l, int base, char **e, int *err);
378  double (*strntod)(const struct charset_info_st *, char *s,
379  size_t l, char **e, int *err);
380  longlong (*strtoll10)(const struct charset_info_st *cs,
381  const char *nptr, char **endptr, int *error);
382  ulonglong (*strntoull10rnd)(const struct charset_info_st *cs,
383  const char *str, size_t length,
384  int unsigned_fl,
385  char **endptr, int *error);
386  size_t (*scan)(const struct charset_info_st *, const char *b,
387  const char *e, int sq);
389 
390 extern MY_CHARSET_HANDLER my_charset_8bit_handler;
391 extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
392 
393 
394 /*
395  We define this CHARSET_INFO_DEFINED here to prevent a repeat of the
396  typedef in hash.c, which will cause a compiler error.
397 */
398 #define CHARSET_INFO_DEFINED
399 
400 /* See strings/CHARSET_INFO.txt about information on this structure */
401 typedef struct charset_info_st
402 {
403  uint number;
404  uint primary_number;
405  uint binary_number;
406  uint state;
407  const char *csname;
408  const char *name;
409  const char *comment;
410  const char *tailoring;
411  uchar *ctype;
412  uchar *to_lower;
413  uchar *to_upper;
414  uchar *sort_order;
415  MY_UCA_INFO *uca;
416  uint16 *tab_to_uni;
417  MY_UNI_IDX *tab_from_uni;
418  MY_UNICASE_INFO *caseinfo;
419  uchar *state_map;
420  uchar *ident_map;
421  uint strxfrm_multiply;
422  uchar caseup_multiply;
423  uchar casedn_multiply;
424  uint mbminlen;
425  uint mbmaxlen;
426  my_wc_t min_sort_char;
427  my_wc_t max_sort_char; /* For LIKE optimization */
428  uchar pad_char;
429  my_bool escape_with_backslash_is_dangerous;
430  uchar levels_for_compare;
431  uchar levels_for_order;
432 
434  MY_COLLATION_HANDLER *coll;
435 
436 } CHARSET_INFO;
437 #define ILLEGAL_CHARSET_INFO_NUMBER (~0U)
438 
439 
440 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
441 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
442 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
443 
444 extern CHARSET_INFO my_charset_big5_chinese_ci;
445 extern CHARSET_INFO my_charset_big5_bin;
446 extern CHARSET_INFO my_charset_cp932_japanese_ci;
447 extern CHARSET_INFO my_charset_cp932_bin;
448 extern CHARSET_INFO my_charset_cp1250_czech_ci;
449 extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
450 extern CHARSET_INFO my_charset_eucjpms_bin;
451 extern CHARSET_INFO my_charset_euckr_korean_ci;
452 extern CHARSET_INFO my_charset_euckr_bin;
453 extern CHARSET_INFO my_charset_gb2312_chinese_ci;
454 extern CHARSET_INFO my_charset_gb2312_bin;
455 extern CHARSET_INFO my_charset_gbk_chinese_ci;
456 extern CHARSET_INFO my_charset_gbk_bin;
457 extern CHARSET_INFO my_charset_latin1_german2_ci;
458 extern CHARSET_INFO my_charset_latin1_bin;
459 extern CHARSET_INFO my_charset_latin2_czech_ci;
460 extern CHARSET_INFO my_charset_sjis_japanese_ci;
461 extern CHARSET_INFO my_charset_sjis_bin;
462 extern CHARSET_INFO my_charset_tis620_thai_ci;
463 extern CHARSET_INFO my_charset_tis620_bin;
464 extern CHARSET_INFO my_charset_ucs2_general_ci;
465 extern CHARSET_INFO my_charset_ucs2_bin;
466 extern CHARSET_INFO my_charset_ucs2_unicode_ci;
467 extern CHARSET_INFO my_charset_ucs2_general_mysql500_ci;
468 extern CHARSET_INFO my_charset_ujis_japanese_ci;
469 extern CHARSET_INFO my_charset_ujis_bin;
470 extern CHARSET_INFO my_charset_utf16_bin;
471 extern CHARSET_INFO my_charset_utf16_general_ci;
472 extern CHARSET_INFO my_charset_utf16_unicode_ci;
473 extern CHARSET_INFO my_charset_utf16le_bin;
474 extern CHARSET_INFO my_charset_utf16le_general_ci;
475 extern CHARSET_INFO my_charset_utf32_bin;
476 extern CHARSET_INFO my_charset_utf32_general_ci;
477 extern CHARSET_INFO my_charset_utf32_unicode_ci;
478 
479 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_utf8_general_ci;
480 extern CHARSET_INFO my_charset_utf8_tolower_ci;
481 extern CHARSET_INFO my_charset_utf8_unicode_ci;
482 extern CHARSET_INFO my_charset_utf8_bin;
483 extern CHARSET_INFO my_charset_utf8_general_mysql500_ci;
484 extern CHARSET_INFO my_charset_utf8mb4_bin;
485 extern CHARSET_INFO my_charset_utf8mb4_general_ci;
486 extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
487 #define MY_UTF8MB3 "utf8"
488 #define MY_UTF8MB4 "utf8mb4"
489 
490 
491 /* declarations for simple charsets */
492 extern size_t my_strnxfrm_simple(const CHARSET_INFO *,
493  uchar *dst, size_t dstlen, uint nweights,
494  const uchar *src, size_t srclen, uint flags);
495 size_t my_strnxfrmlen_simple(const CHARSET_INFO *, size_t);
496 extern int my_strnncoll_simple(const CHARSET_INFO *, const uchar *, size_t,
497  const uchar *, size_t, my_bool);
498 
499 extern int my_strnncollsp_simple(const CHARSET_INFO *, const uchar *, size_t,
500  const uchar *, size_t,
501  my_bool diff_if_only_endspace_difference);
502 
503 extern void my_hash_sort_simple(const CHARSET_INFO *cs,
504  const uchar *key, size_t len,
505  ulong *nr1, ulong *nr2);
506 
507 extern size_t my_lengthsp_8bit(const CHARSET_INFO *cs, const char *ptr,
508  size_t length);
509 
510 extern uint my_instr_simple(const struct charset_info_st *,
511  const char *b, size_t b_length,
512  const char *s, size_t s_length,
513  my_match_t *match, uint nmatch);
514 
515 
516 /* Functions for 8bit */
517 extern size_t my_caseup_str_8bit(const CHARSET_INFO *, char *);
518 extern size_t my_casedn_str_8bit(const CHARSET_INFO *, char *);
519 extern size_t my_caseup_8bit(const CHARSET_INFO *, char *src, size_t srclen,
520  char *dst, size_t dstlen);
521 extern size_t my_casedn_8bit(const CHARSET_INFO *, char *src, size_t srclen,
522  char *dst, size_t dstlen);
523 
524 extern int my_strcasecmp_8bit(const CHARSET_INFO * cs, const char *,
525  const char *);
526 
527 int my_mb_wc_8bit(const CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,
528  const uchar *e);
529 int my_wc_mb_8bit(const CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
530 
531 int my_mb_ctype_8bit(const CHARSET_INFO *,int *, const uchar *,const uchar *);
532 int my_mb_ctype_mb(const CHARSET_INFO *,int *, const uchar *,const uchar *);
533 
534 size_t my_scan_8bit(const CHARSET_INFO *cs, const char *b, const char *e,
535  int sq);
536 
537 size_t my_snprintf_8bit(const struct charset_info_st *, char *to, size_t n,
538  const char *fmt, ...)
539  ATTRIBUTE_FORMAT(printf, 4, 5);
540 
541 long my_strntol_8bit(const CHARSET_INFO *, const char *s, size_t l,
542  int base, char **e, int *err);
543 ulong my_strntoul_8bit(const CHARSET_INFO *, const char *s, size_t l,
544  int base, char **e, int *err);
545 longlong my_strntoll_8bit(const CHARSET_INFO *, const char *s, size_t l,
546  int base, char **e, int *err);
547 ulonglong my_strntoull_8bit(const CHARSET_INFO *, const char *s, size_t l,
548  int base, char **e, int *err);
549 double my_strntod_8bit(const CHARSET_INFO *, char *s, size_t l, char **e,
550  int *err);
551 size_t my_long10_to_str_8bit(const CHARSET_INFO *, char *to, size_t l,
552  int radix, long int val);
553 size_t my_longlong10_to_str_8bit(const CHARSET_INFO *, char *to, size_t l,
554  int radix, longlong val);
555 
556 longlong my_strtoll10_8bit(const CHARSET_INFO *cs,
557  const char *nptr, char **endptr, int *error);
558 longlong my_strtoll10_ucs2(const CHARSET_INFO *cs,
559  const char *nptr, char **endptr, int *error);
560 
561 ulonglong my_strntoull10rnd_8bit(const CHARSET_INFO *cs,
562  const char *str, size_t length, int
563  unsigned_fl, char **endptr, int *error);
564 ulonglong my_strntoull10rnd_ucs2(const CHARSET_INFO *cs,
565  const char *str, size_t length,
566  int unsigned_fl, char **endptr, int *error);
567 
568 void my_fill_8bit(const CHARSET_INFO *cs, char* to, size_t l, int fill);
569 
570 /* For 8-bit character set */
571 my_bool my_like_range_simple(const CHARSET_INFO *cs,
572  const char *ptr, size_t ptr_length,
573  pbool escape, pbool w_one, pbool w_many,
574  size_t res_length,
575  char *min_str, char *max_str,
576  size_t *min_length, size_t *max_length);
577 
578 /* For ASCII-based multi-byte character sets with mbminlen=1 */
579 my_bool my_like_range_mb(const CHARSET_INFO *cs,
580  const char *ptr, size_t ptr_length,
581  pbool escape, pbool w_one, pbool w_many,
582  size_t res_length,
583  char *min_str, char *max_str,
584  size_t *min_length, size_t *max_length);
585 
586 /* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
587 my_bool my_like_range_generic(const CHARSET_INFO *cs,
588  const char *ptr, size_t ptr_length,
589  pbool escape, pbool w_one, pbool w_many,
590  size_t res_length,
591  char *min_str, char *max_str,
592  size_t *min_length, size_t *max_length);
593 
594 int my_wildcmp_8bit(const CHARSET_INFO *,
595  const char *str,const char *str_end,
596  const char *wildstr,const char *wildend,
597  int escape, int w_one, int w_many);
598 
599 int my_wildcmp_bin(const CHARSET_INFO *,
600  const char *str,const char *str_end,
601  const char *wildstr,const char *wildend,
602  int escape, int w_one, int w_many);
603 
604 size_t my_numchars_8bit(const CHARSET_INFO *, const char *b, const char *e);
605 size_t my_numcells_8bit(const CHARSET_INFO *, const char *b, const char *e);
606 size_t my_charpos_8bit(const CHARSET_INFO *, const char *b, const char *e,
607  size_t pos);
608 size_t my_well_formed_len_8bit(const CHARSET_INFO *, const char *b,
609  const char *e, size_t pos, int *error);
610 uint my_mbcharlen_8bit(const CHARSET_INFO *, uint c);
611 
612 
613 /* Functions for multibyte charsets */
614 extern size_t my_caseup_str_mb(const CHARSET_INFO *, char *);
615 extern size_t my_casedn_str_mb(const CHARSET_INFO *, char *);
616 extern size_t my_caseup_mb(const CHARSET_INFO *, char *src, size_t srclen,
617  char *dst, size_t dstlen);
618 extern size_t my_casedn_mb(const CHARSET_INFO *, char *src, size_t srclen,
619  char *dst, size_t dstlen);
620 extern size_t my_caseup_mb_varlen(const CHARSET_INFO *, char *src,
621  size_t srclen, char *dst, size_t dstlen);
622 extern size_t my_casedn_mb_varlen(const CHARSET_INFO *, char *src,
623  size_t srclen, char *dst, size_t dstlen);
624 extern size_t my_caseup_ujis(const CHARSET_INFO *, char *src, size_t srclen,
625  char *dst, size_t dstlen);
626 extern size_t my_casedn_ujis(const CHARSET_INFO *, char *src, size_t srclen,
627  char *dst, size_t dstlen);
628 extern int my_strcasecmp_mb(const CHARSET_INFO * cs,const char *,
629  const char *);
630 
631 int my_wildcmp_mb(const CHARSET_INFO *,
632  const char *str,const char *str_end,
633  const char *wildstr,const char *wildend,
634  int escape, int w_one, int w_many);
635 size_t my_numchars_mb(const CHARSET_INFO *, const char *b, const char *e);
636 size_t my_numcells_mb(const CHARSET_INFO *, const char *b, const char *e);
637 size_t my_charpos_mb(const CHARSET_INFO *, const char *b, const char *e,
638  size_t pos);
639 size_t my_well_formed_len_mb(const CHARSET_INFO *, const char *b,
640  const char *e, size_t pos, int *error);
641 uint my_instr_mb(const struct charset_info_st *,
642  const char *b, size_t b_length,
643  const char *s, size_t s_length,
644  my_match_t *match, uint nmatch);
645 
646 int my_strnncoll_mb_bin(const CHARSET_INFO * cs,
647  const uchar *s, size_t slen,
648  const uchar *t, size_t tlen,
649  my_bool t_is_prefix);
650 
651 int my_strnncollsp_mb_bin(const CHARSET_INFO *cs,
652  const uchar *a, size_t a_length,
653  const uchar *b, size_t b_length,
654  my_bool diff_if_only_endspace_difference);
655 
656 int my_wildcmp_mb_bin(const CHARSET_INFO *cs,
657  const char *str,const char *str_end,
658  const char *wildstr,const char *wildend,
659  int escape, int w_one, int w_many);
660 
661 int my_strcasecmp_mb_bin(const CHARSET_INFO * cs __attribute__((unused)),
662  const char *s, const char *t);
663 
664 void my_hash_sort_mb_bin(const CHARSET_INFO *cs __attribute__((unused)),
665  const uchar *key, size_t len,ulong *nr1, ulong *nr2);
666 
667 size_t my_strnxfrm_mb(const CHARSET_INFO *,
668  uchar *dst, size_t dstlen, uint nweights,
669  const uchar *src, size_t srclen, uint flags);
670 
671 size_t my_strnxfrm_unicode(const CHARSET_INFO *,
672  uchar *dst, size_t dstlen, uint nweights,
673  const uchar *src, size_t srclen, uint flags);
674 
675 size_t my_strnxfrm_unicode_full_bin(const CHARSET_INFO *,
676  uchar *dst, size_t dstlen, uint nweights,
677  const uchar *src, size_t srclen, uint flags);
678 size_t my_strnxfrmlen_unicode_full_bin(const CHARSET_INFO *, size_t);
679 
680 int my_wildcmp_unicode(const CHARSET_INFO *cs,
681  const char *str, const char *str_end,
682  const char *wildstr, const char *wildend,
683  int escape, int w_one, int w_many,
684  MY_UNICASE_INFO *weights);
685 
686 extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader,
687  const char *buf, size_t buflen);
688 extern char *my_strchr(const CHARSET_INFO *cs, const char *str,
689  const char *end, pchar c);
690 extern size_t my_strcspn(const CHARSET_INFO *cs, const char *str,
691  const char *end, const char *accept);
692 
693 my_bool my_propagate_simple(const CHARSET_INFO *cs, const uchar *str,
694  size_t len);
695 my_bool my_propagate_complex(const CHARSET_INFO *cs, const uchar *str,
696  size_t len);
697 
698 
699 uint my_string_repertoire(const CHARSET_INFO *cs, const char *str, ulong len);
700 my_bool my_charset_is_ascii_based(const CHARSET_INFO *cs);
701 my_bool my_charset_is_8bit_pure_ascii(const CHARSET_INFO *cs);
702 uint my_charset_repertoire(const CHARSET_INFO *cs);
703 
704 
705 uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
706 void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
707  uint flags, uint level);
708 size_t my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO *cs,
709  uchar *str, uchar *frmend, uchar *strend,
710  uint nweights, uint flags, uint level);
711 
712 my_bool my_charset_is_ascii_compatible(const CHARSET_INFO *cs);
713 
714 const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs,
715  int level);
716 
717 extern size_t my_vsnprintf_ex(const CHARSET_INFO *cs, char *to, size_t n,
718  const char* fmt, va_list ap);
719 
720 uint32 my_convert(char *to, uint32 to_length, const CHARSET_INFO *to_cs,
721  const char *from, uint32 from_length,
722  const CHARSET_INFO *from_cs, uint *errors);
723 
724 #define _MY_U 01 /* Upper case */
725 #define _MY_L 02 /* Lower case */
726 #define _MY_NMR 04 /* Numeral (digit) */
727 #define _MY_SPC 010 /* Spacing character */
728 #define _MY_PNT 020 /* Punctuation */
729 #define _MY_CTR 040 /* Control character */
730 #define _MY_B 0100 /* Blank */
731 #define _MY_X 0200 /* heXadecimal digit */
732 
733 
734 #define my_isascii(c) (!((c) & ~0177))
735 #define my_toascii(c) ((c) & 0177)
736 #define my_tocntrl(c) ((c) & 31)
737 #define my_toprint(c) ((c) | 64)
738 #define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
739 #define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
740 #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L))
741 #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U)
742 #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L)
743 #define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR)
744 #define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X)
745 #define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR))
746 #define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC)
747 #define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT)
748 #define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B))
749 #define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR))
750 #define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR)
751 
752 /* Some macros that should be cleaned up a little */
753 #define my_isvar(s,c) (my_isalnum(s,c) || (c) == '_')
754 #define my_isvar_start(s,c) (my_isalpha(s,c) || (c) == '_')
755 
756 #define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
757 #define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
758 #define my_strnxfrm(cs, d, dl, s, sl) \
759  ((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE))
760 #define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
761 #define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
762  ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
763 #define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
764 #define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
765 #define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
766 
767 
768 #define use_mb(s) ((s)->cset->ismbchar != NULL)
769 #define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b)))
770 #ifdef USE_MB
771 #define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
772 #else
773 #define my_mbcharlen(s, a) 1
774 #endif
775 
776 #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
777 #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
778 #define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e)))
779 #define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e)))
780 #define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e)))
781 #define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e)))
782 #define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d)))
783 
784 
785 /* XXX: still need to take care of this one */
786 #ifdef MY_CHARSET_TIS620
787 #error The TIS620 charset is broken at the moment. Tell tim to fix it.
788 #define USE_TIS620
789 #include "t_ctype.h"
790 #endif
791 
792 #ifdef __cplusplus
793 }
794 #endif
795 
796 #endif /* _m_ctype_h */