MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
strfunc.cc
1 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* Some useful string utility functions used by the MySQL server */
17 
18 #include "sql_priv.h"
19 #include "unireg.h"
20 #include "strfunc.h"
21 #include "sql_class.h"
22 #include "typelib.h" // TYPELIB
23 #include "m_ctype.h" // my_charset_latin1
24 #include "mysqld.h" // system_charset_info
25 
26 /*
27  Return bitmap for strings used in a set
28 
29  SYNOPSIS
30  find_set()
31  lib Strings in set
32  str Strings of set-strings separated by ','
33  err_pos If error, set to point to start of wrong set string
34  err_len If error, set to the length of wrong set string
35  set_warning Set to 1 if some string in set couldn't be used
36 
37  NOTE
38  We delete all end space from str before comparison
39 
40  RETURN
41  bitmap of all sets found in x.
42  set_warning is set to 1 if there was any sets that couldn't be set
43 */
44 
45 static const char field_separator=',';
46 
47 ulonglong find_set(TYPELIB *lib, const char *str, uint length,
48  const CHARSET_INFO *cs,
49  char **err_pos, uint *err_len, bool *set_warning)
50 {
51  const CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
52  const char *end= str + strip->cset->lengthsp(strip, str, length);
53  ulonglong found= 0;
54  *err_pos= 0; // No error yet
55  *err_len= 0;
56  if (str != end)
57  {
58  const char *start= str;
59  for (;;)
60  {
61  const char *pos= start;
62  uint var_len;
63  int mblen= 1;
64 
65  if (cs && cs->mbminlen > 1)
66  {
67  for ( ; pos < end; pos+= mblen)
68  {
69  my_wc_t wc;
70  if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
71  (const uchar *) end)) < 1)
72  mblen= 1; // Not to hang on a wrong multibyte sequence
73  if (wc == (my_wc_t) field_separator)
74  break;
75  }
76  }
77  else
78  for (; pos != end && *pos != field_separator; pos++) ;
79  var_len= (uint) (pos - start);
80  uint find= cs ? find_type2(lib, start, var_len, cs) :
81  find_type(lib, start, var_len, (bool) 0);
82  if (!find && *err_len == 0) // report the first error with length > 0
83  {
84  *err_pos= (char*) start;
85  *err_len= var_len;
86  *set_warning= 1;
87  }
88  else
89  found|= 1ULL << (find - 1);
90  if (pos >= end)
91  break;
92  start= pos + mblen;
93  }
94  }
95  return found;
96 }
97 
98 /*
99  Function to find a string in a TYPELIB
100  (similar to find_type() of mysys/typelib.c)
101 
102  SYNOPSIS
103  find_type()
104  lib TYPELIB (struct of pointer to values + count)
105  find String to find
106  length Length of string to find
107  part_match Allow part matching of value
108 
109  RETURN
110  0 error
111  > 0 position in TYPELIB->type_names +1
112 */
113 
114 uint find_type(const TYPELIB *lib, const char *find, uint length,
115  bool part_match)
116 {
117  uint found_count=0, found_pos=0;
118  const char *end= find+length;
119  const char *i;
120  const char *j;
121  for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
122  {
123  for (i=find ; i != end &&
124  my_toupper(system_charset_info,*i) ==
125  my_toupper(system_charset_info,*j) ; i++, j++) ;
126  if (i == end)
127  {
128  if (! *j)
129  return(pos);
130  found_count++;
131  found_pos= pos;
132  }
133  }
134  return(found_count == 1 && part_match ? found_pos : 0);
135 }
136 
137 
138 /*
139  Find a string in a list of strings according to collation
140 
141  SYNOPSIS
142  find_type2()
143  lib TYPELIB (struct of pointer to values + count)
144  x String to find
145  length String length
146  cs Character set + collation to use for comparison
147 
148  NOTES
149 
150  RETURN
151  0 No matching value
152  >0 Offset+1 in typelib for matched string
153 */
154 
155 uint find_type2(const TYPELIB *typelib, const char *x, uint length,
156  const CHARSET_INFO *cs)
157 {
158  int pos;
159  const char *j;
160  DBUG_ENTER("find_type2");
161  DBUG_PRINT("enter",("x: '%.*s' lib: 0x%lx", length, x, (long) typelib));
162 
163  if (!typelib->count)
164  {
165  DBUG_PRINT("exit",("no count"));
166  DBUG_RETURN(0);
167  }
168 
169  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
170  {
171  if (!my_strnncoll(cs, (const uchar*) x, length,
172  (const uchar*) j, typelib->type_lengths[pos]))
173  DBUG_RETURN(pos+1);
174  }
175  DBUG_PRINT("exit",("Couldn't find type"));
176  DBUG_RETURN(0);
177 } /* find_type */
178 
179 
180 /*
181  Un-hex all elements in a typelib
182 
183  SYNOPSIS
184  unhex_type2()
185  interval TYPELIB (struct of pointer to values + lengths + count)
186 
187  NOTES
188 
189  RETURN
190  N/A
191 */
192 
193 void unhex_type2(TYPELIB *interval)
194 {
195  for (uint pos= 0; pos < interval->count; pos++)
196  {
197  char *from, *to;
198  for (from= to= (char*) interval->type_names[pos]; *from; )
199  {
200  /*
201  Note, hexchar_to_int(*from++) doesn't work
202  one some compilers, e.g. IRIX. Looks like a compiler
203  bug in inline functions in combination with arguments
204  that have a side effect. So, let's use from[0] and from[1]
205  and increment 'from' by two later.
206  */
207 
208  *to++= (char) (hexchar_to_int(from[0]) << 4) +
209  hexchar_to_int(from[1]);
210  from+= 2;
211  }
212  interval->type_lengths[pos] /= 2;
213  }
214 }
215 
216 
217 /*
218  Check if the first word in a string is one of the ones in TYPELIB
219 
220  SYNOPSIS
221  check_word()
222  lib TYPELIB
223  val String to check
224  end End of input
225  end_of_word Store value of last used byte here if we found word
226 
227  RETURN
228  0 No matching value
229  > 1 lib->type_names[#-1] matched
230  end_of_word will point to separator character/end in 'val'
231 */
232 
233 uint check_word(TYPELIB *lib, const char *val, const char *end,
234  const char **end_of_word)
235 {
236  int res;
237  const char *ptr;
238 
239  /* Fiend end of word */
240  for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
241  ;
242  if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
243  *end_of_word= ptr;
244  return res;
245 }
246 
247 
248 /*
249  Converts a string between character sets
250 
251  SYNOPSIS
252  strconvert()
253  from_cs source character set
254  from source, a null terminated string
255  to destination buffer
256  to_length destination buffer length
257 
258  NOTES
259  'to' is always terminated with a '\0' character.
260  If there is no enough space to convert whole string,
261  only prefix is converted, and terminated with '\0'.
262 
263  RETURN VALUES
264  result string length
265 */
266 
267 
268 uint strconvert(CHARSET_INFO *from_cs, const char *from,
269  CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
270 {
271  int cnvres;
272  my_wc_t wc;
273  char *to_start= to;
274  uchar *to_end= (uchar*) to + to_length - 1;
275  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
276  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
277  uint error_count= 0;
278 
279  while (1)
280  {
281  /*
282  Using 'from + 10' is safe:
283  - it is enough to scan a single character in any character set.
284  - if remaining string is shorter than 10, then mb_wc will return
285  with error because of unexpected '\0' character.
286  */
287  if ((cnvres= (*mb_wc)(from_cs, &wc,
288  (uchar*) from, (uchar*) from + 10)) > 0)
289  {
290  if (!wc)
291  break;
292  from+= cnvres;
293  }
294  else if (cnvres == MY_CS_ILSEQ)
295  {
296  error_count++;
297  from++;
298  wc= '?';
299  }
300  else
301  break; // Impossible char.
302 
303 outp:
304 
305  if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
306  to+= cnvres;
307  else if (cnvres == MY_CS_ILUNI && wc != '?')
308  {
309  error_count++;
310  wc= '?';
311  goto outp;
312  }
313  else
314  break;
315  }
316  *to= '\0';
317  *errors= error_count;
318  return (uint32) (to - to_start);
319 
320 }
321 
322 
323 /*
324  Searches for a LEX_STRING in an LEX_STRING array.
325 
326  SYNOPSIS
327  find_string_in_array()
328  heap The array
329  needle The string to search for
330 
331  NOTE
332  The last LEX_STRING in the array should have str member set to NULL
333 
334  RETURN VALUES
335  -1 Not found
336  >=0 Ordinal position
337 */
338 
339 int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
340  CHARSET_INFO * const cs)
341 {
342  const LEX_STRING *pos;
343  for (pos= haystack; pos->str; pos++)
344  if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
345  (uchar *) needle->str, needle->length, 0))
346  {
347  return (pos - haystack);
348  }
349  return -1;
350 }
351 
352 
353 char *set_to_string(THD *thd, LEX_STRING *result, ulonglong set,
354  const char *lib[])
355 {
356  char buff[STRING_BUFFER_USUAL_SIZE*8];
357  String tmp(buff, sizeof(buff), &my_charset_latin1);
358  LEX_STRING unused;
359 
360  if (!result)
361  result= &unused;
362 
363  tmp.length(0);
364 
365  for (uint i= 0; set; i++, set >>= 1)
366  if (set & 1) {
367  tmp.append(lib[i]);
368  tmp.append(',');
369  }
370 
371  if (tmp.length())
372  {
373  result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
374  result->length= tmp.length()-1;
375  }
376  else
377  {
378  result->str= const_cast<char*>("");
379  result->length= 0;
380  }
381  return result->str;
382 }
383 
384 char *flagset_to_string(THD *thd, LEX_STRING *result, ulonglong set,
385  const char *lib[])
386 {
387  char buff[STRING_BUFFER_USUAL_SIZE*8];
388  String tmp(buff, sizeof(buff), &my_charset_latin1);
389  LEX_STRING unused;
390 
391  if (!result) result= &unused;
392 
393  tmp.length(0);
394 
395  // note that the last element is always "default", and it's ignored below
396  for (uint i= 0; lib[i+1]; i++, set >>= 1)
397  {
398  tmp.append(lib[i]);
399  tmp.append(set & 1 ? "=on," : "=off,");
400  }
401 
402  result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
403  result->length= tmp.length()-1;
404 
405  return result->str;
406 }
407