MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sql_string.cc
1 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* This file is originally from the mysql distribution. Coded by monty */
17 
18 #include <my_global.h>
19 #include <my_sys.h>
20 #include <m_string.h>
21 #include <m_ctype.h>
22 #include <mysql_com.h>
23 
24 #include "sql_string.h"
25 
26 #include <algorithm>
27 
28 using std::min;
29 using std::max;
30 
31 /*****************************************************************************
32 ** String functions
33 *****************************************************************************/
34 
35 bool String::real_alloc(uint32 length)
36 {
37  uint32 arg_length= ALIGN_SIZE(length + 1);
38  DBUG_ASSERT(arg_length > length);
39  if (arg_length <= length)
40  return TRUE; /* Overflow */
41  str_length=0;
42  if (Alloced_length < arg_length)
43  {
44  free();
45  if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME))))
46  return TRUE;
47  Alloced_length=arg_length;
48  alloced=1;
49  }
50  Ptr[0]=0;
51  return FALSE;
52 }
53 
54 
82 bool String::realloc(uint32 alloc_length)
83 {
84  uint32 len=ALIGN_SIZE(alloc_length+1);
85  DBUG_ASSERT(len > alloc_length);
86  if (len <= alloc_length)
87  return TRUE; /* Overflow */
88  if (Alloced_length < len)
89  {
90  char *new_ptr;
91  if (alloced)
92  {
93  if (!(new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
94  return TRUE; // Signal error
95  }
96  else if ((new_ptr= (char*) my_malloc(len,MYF(MY_WME))))
97  {
98  if (str_length > len - 1)
99  str_length= 0;
100  if (str_length) // Avoid bugs in memcpy on AIX
101  memcpy(new_ptr,Ptr,str_length);
102  new_ptr[str_length]=0;
103  alloced=1;
104  }
105  else
106  return TRUE; // Signal error
107  Ptr= new_ptr;
108  Alloced_length= len;
109  }
110  Ptr[alloc_length]=0; // This make other funcs shorter
111  return FALSE;
112 }
113 
114 bool String::set_int(longlong num, bool unsigned_flag, const CHARSET_INFO *cs)
115 {
116  uint l=20*cs->mbmaxlen+1;
117  int base= unsigned_flag ? 10 : -10;
118 
119  if (alloc(l))
120  return TRUE;
121  str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
122  str_charset=cs;
123  return FALSE;
124 }
125 
126 bool String::set_real(double num,uint decimals, const CHARSET_INFO *cs)
127 {
128  char buff[FLOATING_POINT_BUFFER];
129  uint dummy_errors;
130  size_t len;
131 
132  str_charset=cs;
133  if (decimals >= NOT_FIXED_DEC)
134  {
135  len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
136  return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
137  }
138  len= my_fcvt(num, decimals, buff, NULL);
139  return copy(buff, (uint32) len, &my_charset_latin1, cs,
140  &dummy_errors);
141 }
142 
143 
144 bool String::copy()
145 {
146  if (!alloced)
147  {
148  Alloced_length=0; // Force realloc
149  return realloc(str_length);
150  }
151  return FALSE;
152 }
153 
165 bool String::copy(const String &str)
166 {
167  if (alloc(str.str_length))
168  return TRUE;
169  str_length=str.str_length;
170  bmove(Ptr,str.Ptr,str_length); // May be overlapping
171  Ptr[str_length]=0;
172  str_charset=str.str_charset;
173  return FALSE;
174 }
175 
176 bool String::copy(const char *str,uint32 arg_length,
177  const CHARSET_INFO *cs)
178 {
179  if (alloc(arg_length))
180  return TRUE;
181  if ((str_length=arg_length))
182  memcpy(Ptr,str,arg_length);
183  Ptr[arg_length]=0;
184  str_charset=cs;
185  return FALSE;
186 }
187 
188 
189 /*
190  Checks that the source string can be just copied to the destination string
191  without conversion.
192 
193  SYNPOSIS
194 
195  needs_conversion()
196  arg_length Length of string to copy.
197  from_cs Character set to copy from
198  to_cs Character set to copy to
199  uint32 *offset Returns number of unaligned characters.
200 
201  RETURN
202  0 No conversion needed
203  1 Either character set conversion or adding leading zeros
204  (e.g. for UCS-2) must be done
205 
206  NOTE
207  to_cs may be NULL for "no conversion" if the system variable
208  character_set_results is NULL.
209 */
210 
211 bool String::needs_conversion(uint32 arg_length,
212  const CHARSET_INFO *from_cs,
213  const CHARSET_INFO *to_cs,
214  uint32 *offset)
215 {
216  *offset= 0;
217  if (!to_cs ||
218  (to_cs == &my_charset_bin) ||
219  (to_cs == from_cs) ||
220  my_charset_same(from_cs, to_cs) ||
221  ((from_cs == &my_charset_bin) &&
222  (!(*offset=(arg_length % to_cs->mbminlen)))))
223  return FALSE;
224  return TRUE;
225 }
226 
227 
228 /*
229  Copy a multi-byte character sets with adding leading zeros.
230 
231  SYNOPSIS
232 
233  copy_aligned()
234  str String to copy
235  arg_length Length of string. This should NOT be dividable with
236  cs->mbminlen.
237  offset arg_length % cs->mb_minlength
238  cs Character set for 'str'
239 
240  NOTES
241  For real multi-byte, ascii incompatible charactser sets,
242  like UCS-2, add leading zeros if we have an incomplete character.
243  Thus,
244  SELECT _ucs2 0xAA
245  will automatically be converted into
246  SELECT _ucs2 0x00AA
247 
248  RETURN
249  0 ok
250  1 error
251 */
252 
253 bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
254  const CHARSET_INFO *cs)
255 {
256  /* How many bytes are in incomplete character */
257  offset= cs->mbminlen - offset; /* How many zeros we should prepend */
258  DBUG_ASSERT(offset && offset != cs->mbminlen);
259 
260  uint32 aligned_length= arg_length + offset;
261  if (alloc(aligned_length))
262  return TRUE;
263 
264  /*
265  Note, this is only safe for big-endian UCS-2.
266  If we add little-endian UCS-2 sometimes, this code
267  will be more complicated. But it's OK for now.
268  */
269  memset(Ptr, 0, offset);
270  memcpy(Ptr + offset, str, arg_length);
271  Ptr[aligned_length]=0;
272  /* str_length is always >= 0 as arg_length is != 0 */
273  str_length= aligned_length;
274  str_charset= cs;
275  return FALSE;
276 }
277 
278 
279 bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
280  const CHARSET_INFO *cs)
281 {
282  /* How many bytes are in incomplete character */
283  uint32 offset= (arg_length % cs->mbminlen);
284 
285  if (!offset) /* All characters are complete, just copy */
286  {
287  set(str, arg_length, cs);
288  return FALSE;
289  }
290  return copy_aligned(str, arg_length, offset, cs);
291 }
292 
293 
304 bool String::copy(const char *str, uint32 arg_length,
305  const CHARSET_INFO *from_cs, const CHARSET_INFO *to_cs, uint *errors)
306 {
307  uint32 offset;
308 
309  DBUG_ASSERT(!str || str != Ptr);
310 
311  if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
312  {
313  *errors= 0;
314  return copy(str, arg_length, to_cs);
315  }
316  if ((from_cs == &my_charset_bin) && offset)
317  {
318  *errors= 0;
319  return copy_aligned(str, arg_length, offset, to_cs);
320  }
321  uint32 new_length= to_cs->mbmaxlen*arg_length;
322  if (alloc(new_length))
323  return TRUE;
324  str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
325  str, arg_length, from_cs, errors);
326  str_charset=to_cs;
327  return FALSE;
328 }
329 
330 
331 /*
332  Set a string to the value of a latin1-string, keeping the original charset
333 
334  SYNOPSIS
335  copy_or_set()
336  str String of a simple charset (latin1)
337  arg_length Length of string
338 
339  IMPLEMENTATION
340  If string object is of a simple character set, set it to point to the
341  given string.
342  If not, make a copy and convert it to the new character set.
343 
344  RETURN
345  0 ok
346  1 Could not allocate result buffer
347 
348 */
349 
350 bool String::set_ascii(const char *str, uint32 arg_length)
351 {
352  if (str_charset->mbminlen == 1)
353  {
354  set(str, arg_length, str_charset);
355  return 0;
356  }
357  uint dummy_errors;
358  return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
359 }
360 
361 
362 /* This is used by mysql.cc */
363 
364 bool String::fill(uint32 max_length,char fill_char)
365 {
366  if (str_length > max_length)
367  Ptr[str_length=max_length]=0;
368  else
369  {
370  if (realloc(max_length))
371  return TRUE;
372  memset(Ptr+str_length, fill_char, max_length-str_length);
373  str_length=max_length;
374  }
375  return FALSE;
376 }
377 
378 void String::strip_sp()
379 {
380  while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
381  str_length--;
382 }
383 
384 bool String::append(const String &s)
385 {
386  if (s.length())
387  {
388  if (realloc(str_length+s.length()))
389  return TRUE;
390  memcpy(Ptr+str_length,s.ptr(),s.length());
391  str_length+=s.length();
392  }
393  return FALSE;
394 }
395 
396 
397 /*
398  Append an ASCII string to the a string of the current character set
399 */
400 
401 bool String::append(const char *s,uint32 arg_length)
402 {
403  if (!arg_length)
404  return FALSE;
405 
406  /*
407  For an ASCII incompatible string, e.g. UCS-2, we need to convert
408  */
409  if (str_charset->mbminlen > 1)
410  {
411  uint32 add_length=arg_length * str_charset->mbmaxlen;
412  uint dummy_errors;
413  if (realloc(str_length+ add_length))
414  return TRUE;
415  str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
416  s, arg_length, &my_charset_latin1,
417  &dummy_errors);
418  return FALSE;
419  }
420 
421  /*
422  For an ASCII compatinble string we can just append.
423  */
424  if (realloc(str_length+arg_length))
425  return TRUE;
426  memcpy(Ptr+str_length,s,arg_length);
427  str_length+=arg_length;
428  return FALSE;
429 }
430 
431 
432 /*
433  Append a 0-terminated ASCII string
434 */
435 
436 bool String::append(const char *s)
437 {
438  return append(s, (uint) strlen(s));
439 }
440 
441 
442 
443 bool String::append_ulonglong(ulonglong val)
444 {
445  if (realloc(str_length+MAX_BIGINT_WIDTH+2))
446  return TRUE;
447  char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
448  str_length= end - Ptr;
449  return FALSE;
450 }
451 
452 /*
453  Append a string in the given charset to the string
454  with character set recoding
455 */
456 
457 bool String::append(const char *s,uint32 arg_length, const CHARSET_INFO *cs)
458 {
459  uint32 offset;
460 
461  if (needs_conversion(arg_length, cs, str_charset, &offset))
462  {
463  uint32 add_length;
464  if ((cs == &my_charset_bin) && offset)
465  {
466  DBUG_ASSERT(str_charset->mbminlen > offset);
467  offset= str_charset->mbminlen - offset; // How many characters to pad
468  add_length= arg_length + offset;
469  if (realloc(str_length + add_length))
470  return TRUE;
471  memset(Ptr + str_length, 0, offset);
472  memcpy(Ptr + str_length + offset, s, arg_length);
473  str_length+= add_length;
474  return FALSE;
475  }
476 
477  add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
478  uint dummy_errors;
479  if (realloc(str_length + add_length))
480  return TRUE;
481  str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
482  s, arg_length, cs, &dummy_errors);
483  }
484  else
485  {
486  if (realloc(str_length + arg_length))
487  return TRUE;
488  memcpy(Ptr + str_length, s, arg_length);
489  str_length+= arg_length;
490  }
491  return FALSE;
492 }
493 
494 bool String::append(IO_CACHE* file, uint32 arg_length)
495 {
496  if (realloc(str_length+arg_length))
497  return TRUE;
498  if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
499  {
500  shrink(str_length);
501  return TRUE;
502  }
503  str_length+=arg_length;
504  return FALSE;
505 }
506 
507 
515 bool String::append_parenthesized(long nr, int radix)
516 {
517  char buff[64], *end;
518  buff[0]= '(';
519  end= int10_to_str(nr, buff + 1, radix);
520  *end++ = ')';
521  return append(buff, (uint) (end - buff));
522 }
523 
524 
525 bool String::append_with_prefill(const char *s,uint32 arg_length,
526  uint32 full_length, char fill_char)
527 {
528  int t_length= arg_length > full_length ? arg_length : full_length;
529 
530  if (realloc(str_length + t_length))
531  return TRUE;
532  t_length= full_length - arg_length;
533  if (t_length > 0)
534  {
535  memset(Ptr+str_length, fill_char, t_length);
536  str_length=str_length + t_length;
537  }
538  append(s, arg_length);
539  return FALSE;
540 }
541 
542 uint32 String::numchars() const
543 {
544  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
545 }
546 
547 int String::charpos(int i,uint32 offset)
548 {
549  if (i <= 0)
550  return i;
551  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
552 }
553 
554 int String::strstr(const String &s,uint32 offset)
555 {
556  if (s.length()+offset <= str_length)
557  {
558  if (!s.length())
559  return ((int) offset); // Empty string is always found
560 
561  register const char *str = Ptr+offset;
562  register const char *search=s.ptr();
563  const char *end=Ptr+str_length-s.length()+1;
564  const char *search_end=s.ptr()+s.length();
565 skip:
566  while (str != end)
567  {
568  if (*str++ == *search)
569  {
570  register char *i,*j;
571  i=(char*) str; j=(char*) search+1;
572  while (j != search_end)
573  if (*i++ != *j++) goto skip;
574  return (int) (str-Ptr) -1;
575  }
576  }
577  }
578  return -1;
579 }
580 
581 /*
582 ** Search string from end. Offset is offset to the end of string
583 */
584 
585 int String::strrstr(const String &s,uint32 offset)
586 {
587  if (s.length() <= offset && offset <= str_length)
588  {
589  if (!s.length())
590  return offset; // Empty string is always found
591  register const char *str = Ptr+offset-1;
592  register const char *search=s.ptr()+s.length()-1;
593 
594  const char *end=Ptr+s.length()-2;
595  const char *search_end=s.ptr()-1;
596 skip:
597  while (str != end)
598  {
599  if (*str-- == *search)
600  {
601  register char *i,*j;
602  i=(char*) str; j=(char*) search-1;
603  while (j != search_end)
604  if (*i-- != *j--) goto skip;
605  return (int) (i-Ptr) +1;
606  }
607  }
608  }
609  return -1;
610 }
611 
612 /*
613  Replace substring with string
614  If wrong parameter or not enough memory, do nothing
615 */
616 
617 bool String::replace(uint32 offset,uint32 arg_length,const String &to)
618 {
619  return replace(offset,arg_length,to.ptr(),to.length());
620 }
621 
622 bool String::replace(uint32 offset,uint32 arg_length,
623  const char *to, uint32 to_length)
624 {
625  long diff = (long) to_length-(long) arg_length;
626  if (offset+arg_length <= str_length)
627  {
628  if (diff < 0)
629  {
630  if (to_length)
631  memcpy(Ptr+offset,to,to_length);
632  bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
633  str_length-offset-arg_length);
634  }
635  else
636  {
637  if (diff)
638  {
639  if (realloc(str_length+(uint32) diff))
640  return TRUE;
641  bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
642  str_length-offset-arg_length);
643  }
644  if (to_length)
645  memcpy(Ptr+offset,to,to_length);
646  }
647  str_length+=(uint32) diff;
648  }
649  return FALSE;
650 }
651 
652 
653 // added by Holyfoot for "geometry" needs
654 int String::reserve(uint32 space_needed, uint32 grow_by)
655 {
656  if (Alloced_length < str_length + space_needed)
657  {
658  if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
659  return TRUE;
660  }
661  return FALSE;
662 }
663 
664 void String::qs_append(const char *str, uint32 len)
665 {
666  memcpy(Ptr + str_length, str, len + 1);
667  str_length += len;
668 }
669 
670 void String::qs_append(double d)
671 {
672  char *buff = Ptr + str_length;
673  str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
674  NULL);
675 }
676 
677 void String::qs_append(double *d)
678 {
679  double ld;
680  float8get(ld, (char*) d);
681  qs_append(ld);
682 }
683 
684 void String::qs_append(int i)
685 {
686  char *buff= Ptr + str_length;
687  char *end= int10_to_str(i, buff, -10);
688  str_length+= (int) (end-buff);
689 }
690 
691 void String::qs_append(uint i)
692 {
693  char *buff= Ptr + str_length;
694  char *end= int10_to_str(i, buff, 10);
695  str_length+= (int) (end-buff);
696 }
697 
698 /*
699  Compare strings according to collation, without end space.
700 
701  SYNOPSIS
702  sortcmp()
703  s First string
704  t Second string
705  cs Collation
706 
707  NOTE:
708  Normally this is case sensitive comparison
709 
710  RETURN
711  < 0 s < t
712  0 s == t
713  > 0 s > t
714 */
715 
716 
717 int sortcmp(const String *s,const String *t, const CHARSET_INFO *cs)
718 {
719  return cs->coll->strnncollsp(cs,
720  (uchar *) s->ptr(),s->length(),
721  (uchar *) t->ptr(),t->length(), 0);
722 }
723 
724 
725 /*
726  Compare strings byte by byte. End spaces are also compared.
727 
728  SYNOPSIS
729  stringcmp()
730  s First string
731  t Second string
732 
733  NOTE:
734  Strings are compared as a stream of uchars
735 
736  RETURN
737  < 0 s < t
738  0 s == t
739  > 0 s > t
740 */
741 
742 
743 int stringcmp(const String *s,const String *t)
744 {
745  uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
746  int cmp= memcmp(s->ptr(), t->ptr(), len);
747  return (cmp) ? cmp : (int) (s_len - t_len);
748 }
749 
750 
751 String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
752 {
753  if (from->Alloced_length >= from_length)
754  return from;
755  if ((from->alloced && (from->Alloced_length != 0)) || !to || from == to)
756  {
757  (void) from->realloc(from_length);
758  return from;
759  }
760  if (to->realloc(from_length))
761  return from; // Actually an error
762  if ((to->str_length=min(from->str_length,from_length)))
763  memcpy(to->Ptr,from->Ptr,to->str_length);
764  to->str_charset=from->str_charset;
765  return to;
766 }
767 
768 
769 /****************************************************************************
770  Help functions
771 ****************************************************************************/
772 
793 size_t
794 my_copy_with_hex_escaping(const CHARSET_INFO *cs,
795  char *dst, size_t dstlen,
796  const char *src, size_t srclen)
797 {
798  const char *srcend= src + srclen;
799  char *dst0= dst;
800 
801  for ( ; src < srcend ; )
802  {
803  size_t chlen;
804  if ((chlen= my_ismbchar(cs, src, srcend)))
805  {
806  if (dstlen < chlen)
807  break; /* purecov: inspected */
808  memcpy(dst, src, chlen);
809  src+= chlen;
810  dst+= chlen;
811  dstlen-= chlen;
812  }
813  else if (*src & 0x80)
814  {
815  if (dstlen < 4)
816  break; /* purecov: inspected */
817  *dst++= '\\';
818  *dst++= 'x';
819  *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
820  *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
821  src++;
822  dstlen-= 4;
823  }
824  else
825  {
826  if (dstlen < 1)
827  break; /* purecov: inspected */
828  *dst++= *src++;
829  dstlen--;
830  }
831  }
832  return dst - dst0;
833 }
834 
835 /*
836  copy a string,
837  with optional character set conversion,
838  with optional left padding (for binary -> UCS2 conversion)
839 
840  SYNOPSIS
841  well_formed_copy_nchars()
842  to Store result here
843  to_length Maxinum length of "to" string
844  to_cs Character set of "to" string
845  from Copy from here
846  from_length Length of from string
847  from_cs From character set
848  nchars Copy not more that nchars characters
849  well_formed_error_pos Return position when "from" is not well formed
850  or NULL otherwise.
851  cannot_convert_error_pos Return position where a not convertable
852  character met, or NULL otherwise.
853  from_end_pos Return position where scanning of "from"
854  string stopped.
855  NOTES
856 
857  RETURN
858  length of bytes copied to 'to'
859 */
860 
861 
862 uint32
863 well_formed_copy_nchars(const CHARSET_INFO *to_cs,
864  char *to, uint to_length,
865  const CHARSET_INFO *from_cs,
866  const char *from, uint from_length,
867  uint nchars,
868  const char **well_formed_error_pos,
869  const char **cannot_convert_error_pos,
870  const char **from_end_pos)
871 {
872  uint res;
873 
874  if ((to_cs == &my_charset_bin) ||
875  (from_cs == &my_charset_bin) ||
876  (to_cs == from_cs) ||
877  my_charset_same(from_cs, to_cs))
878  {
879  if (to_length < to_cs->mbminlen || !nchars)
880  {
881  *from_end_pos= from;
882  *cannot_convert_error_pos= NULL;
883  *well_formed_error_pos= NULL;
884  return 0;
885  }
886 
887  if (to_cs == &my_charset_bin)
888  {
889  res= min(min(nchars, to_length), from_length);
890  memmove(to, from, res);
891  *from_end_pos= from + res;
892  *well_formed_error_pos= NULL;
893  *cannot_convert_error_pos= NULL;
894  }
895  else
896  {
897  int well_formed_error;
898  uint from_offset;
899 
900  if ((from_offset= (from_length % to_cs->mbminlen)) &&
901  (from_cs == &my_charset_bin))
902  {
903  /*
904  Copying from BINARY to UCS2 needs to prepend zeros sometimes:
905  INSERT INTO t1 (ucs2_column) VALUES (0x01);
906  0x01 -> 0x0001
907  */
908  uint pad_length= to_cs->mbminlen - from_offset;
909  memset(to, 0, pad_length);
910  memmove(to + pad_length, from, from_offset);
911  /*
912  In some cases left zero-padding can create an incorrect character.
913  For example:
914  INSERT INTO t1 (utf32_column) VALUES (0x110000);
915  We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
916  The valid characters range is limited to 0x00000000..0x0010FFFF.
917 
918  Make sure we didn't pad to an incorrect character.
919  */
920  if (to_cs->cset->well_formed_len(to_cs,
921  to, to + to_cs->mbminlen, 1,
922  &well_formed_error) !=
923  to_cs->mbminlen)
924  {
925  *from_end_pos= *well_formed_error_pos= from;
926  *cannot_convert_error_pos= NULL;
927  return 0;
928  }
929  nchars--;
930  from+= from_offset;
931  from_length-= from_offset;
932  to+= to_cs->mbminlen;
933  to_length-= to_cs->mbminlen;
934  }
935 
936  set_if_smaller(from_length, to_length);
937  res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
938  nchars, &well_formed_error);
939  memmove(to, from, res);
940  *from_end_pos= from + res;
941  *well_formed_error_pos= well_formed_error ? from + res : NULL;
942  *cannot_convert_error_pos= NULL;
943  if (from_offset)
944  res+= to_cs->mbminlen;
945  }
946  }
947  else
948  {
949  int cnvres;
950  my_wc_t wc;
951  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
952  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
953  const uchar *from_end= (const uchar*) from + from_length;
954  uchar *to_end= (uchar*) to + to_length;
955  char *to_start= to;
956  *well_formed_error_pos= NULL;
957  *cannot_convert_error_pos= NULL;
958 
959  for ( ; nchars; nchars--)
960  {
961  const char *from_prev= from;
962  if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
963  from+= cnvres;
964  else if (cnvres == MY_CS_ILSEQ)
965  {
966  if (!*well_formed_error_pos)
967  *well_formed_error_pos= from;
968  from++;
969  wc= '?';
970  }
971  else if (cnvres > MY_CS_TOOSMALL)
972  {
973  /*
974  A correct multibyte sequence detected
975  But it doesn't have Unicode mapping.
976  */
977  if (!*cannot_convert_error_pos)
978  *cannot_convert_error_pos= from;
979  from+= (-cnvres);
980  wc= '?';
981  }
982  else
983  break; // Not enough characters
984 
985 outp:
986  if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
987  to+= cnvres;
988  else if (cnvres == MY_CS_ILUNI && wc != '?')
989  {
990  if (!*cannot_convert_error_pos)
991  *cannot_convert_error_pos= from_prev;
992  wc= '?';
993  goto outp;
994  }
995  else
996  {
997  from= from_prev;
998  break;
999  }
1000  }
1001  *from_end_pos= from;
1002  res= (uint) (to - to_start);
1003  }
1004  return (uint32) res;
1005 }
1006 
1007 
1008 
1009 
1010 void String::print(String *str)
1011 {
1012  char *st= (char*)Ptr, *end= st+str_length;
1013  for (; st < end; st++)
1014  {
1015  uchar c= *st;
1016  switch (c)
1017  {
1018  case '\\':
1019  str->append(STRING_WITH_LEN("\\\\"));
1020  break;
1021  case '\0':
1022  str->append(STRING_WITH_LEN("\\0"));
1023  break;
1024  case '\'':
1025  str->append(STRING_WITH_LEN("\\'"));
1026  break;
1027  case '\n':
1028  str->append(STRING_WITH_LEN("\\n"));
1029  break;
1030  case '\r':
1031  str->append(STRING_WITH_LEN("\\r"));
1032  break;
1033  case '\032': // Ctrl-Z
1034  str->append(STRING_WITH_LEN("\\Z"));
1035  break;
1036  default:
1037  str->append(c);
1038  }
1039  }
1040 }
1041 
1042 
1043 /*
1044  Exchange state of this object and argument.
1045 
1046  SYNOPSIS
1047  String::swap()
1048 
1049  RETURN
1050  Target string will contain state of this object and vice versa.
1051 */
1052 
1053 void String::swap(String &s)
1054 {
1055  swap_variables(char *, Ptr, s.Ptr);
1056  swap_variables(uint32, str_length, s.str_length);
1057  swap_variables(uint32, Alloced_length, s.Alloced_length);
1058  swap_variables(bool, alloced, s.alloced);
1059  swap_variables(const CHARSET_INFO *, str_charset, s.str_charset);
1060 }
1061 
1062 
1083 uint convert_to_printable(char *to, size_t to_len,
1084  const char *from, size_t from_len,
1085  const CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1086 {
1087  /* needs at least 8 bytes for '\xXX...' and zero byte */
1088  DBUG_ASSERT(to_len >= 8);
1089 
1090  char *t= to;
1091  char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1092  const char *f= from;
1093  const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
1094  char *dots= to; // last safe place to append '...'
1095 
1096  if (!f || t == t_end)
1097  return 0;
1098 
1099  for (; t < t_end && f < f_end; f++)
1100  {
1101  /*
1102  If the source string is ASCII compatible (mbminlen==1)
1103  and the source character is in ASCII printable range (0x20..0x7F),
1104  then display the character as is.
1105 
1106  Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1107  or the source character is not in the printable range,
1108  then print the character using HEX notation.
1109  */
1110  if (((unsigned char) *f) >= 0x20 &&
1111  ((unsigned char) *f) <= 0x7F &&
1112  from_cs->mbminlen == 1)
1113  {
1114  *t++= *f;
1115  }
1116  else
1117  {
1118  if (t_end - t < 4) // \xXX
1119  break;
1120  *t++= '\\';
1121  *t++= 'x';
1122  *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
1123  *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
1124  }
1125  if (t_end - t >= 3) // '...'
1126  dots= t;
1127  }
1128  if (f < from + from_len)
1129  memcpy(dots, STRING_WITH_LEN("...\0"));
1130  else
1131  *t= '\0';
1132  return t - to;
1133 }