Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
string.c
Go to the documentation of this file.
1 /*
2 ** string.c - String class
3 **
4 ** See Copyright Notice in mruby.h
5 */
6 
7 #include <ctype.h>
8 #ifndef SIZE_MAX
9  /* Some versions of VC++
10  * has SIZE_MAX in stdint.h
11  */
12 # include <limits.h>
13 #endif
14 #include <stddef.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include "mruby.h"
18 #include "mruby/array.h"
19 #include "mruby/class.h"
20 #include "mruby/range.h"
21 #include "mruby/string.h"
22 #include "re.h"
23 
24 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
25 
26 typedef struct mrb_shared_string {
28  int refcnt;
29  char *ptr;
32 
33 #define MRB_STR_SHARED 1
34 #define MRB_STR_NOFREE 2
35 
36 static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
37 static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
38 
39 #define RESIZE_CAPA(s,capacity) do {\
40  s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
41  s->aux.capa = capacity;\
42 } while(0)
43 
44 static void
45 str_decref(mrb_state *mrb, mrb_shared_string *shared)
46 {
47  shared->refcnt--;
48  if (shared->refcnt == 0) {
49  if (!shared->nofree) {
50  mrb_free(mrb, shared->ptr);
51  }
52  mrb_free(mrb, shared);
53  }
54 }
55 
56 void
58 {
59  if (s->flags & MRB_STR_SHARED) {
60  mrb_shared_string *shared = s->aux.shared;
61 
62  if (shared->refcnt == 1 && s->ptr == shared->ptr) {
63  s->ptr = shared->ptr;
64  s->aux.capa = shared->len;
65  s->ptr[s->len] = '\0';
66  mrb_free(mrb, shared);
67  }
68  else {
69  char *ptr, *p;
70  mrb_int len;
71 
72  p = s->ptr;
73  len = s->len;
74  ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
75  if (p) {
76  memcpy(ptr, p, len);
77  }
78  ptr[len] = '\0';
79  s->ptr = ptr;
80  s->aux.capa = len;
81  str_decref(mrb, shared);
82  }
83  s->flags &= ~MRB_STR_SHARED;
84  return;
85  }
86  if (s->flags & MRB_STR_NOFREE) {
87  char *p = s->ptr;
88 
89  s->ptr = (char *)mrb_malloc(mrb, (size_t)s->len+1);
90  if (p) {
91  memcpy(s->ptr, p, s->len);
92  }
93  s->ptr[s->len] = '\0';
94  s->aux.capa = s->len;
95  return;
96  }
97 }
98 
101 {
102  int slen;
103  struct RString *s = mrb_str_ptr(str);
104 
105  mrb_str_modify(mrb, s);
106  slen = s->len;
107  if (len != slen) {
108  if (slen < len || slen - len > 256) {
109  RESIZE_CAPA(s, len);
110  }
111  s->len = len;
112  s->ptr[len] = '\0'; /* sentinel */
113  }
114  return str;
115 }
116 
117 static inline void
118 str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
119 {
120  struct RString *s = mrb_str_ptr(str);
121 
122  if (s->ptr != p || s->len != len) {
123  mrb_raise(mrb, E_RUNTIME_ERROR, "string modified");
124  }
125 }
126 
127 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
128 
129 /* char offset to byte offset */
130 int
131 mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
132 {
133  return pos;
134 }
135 
136 static struct RString*
137 str_new(mrb_state *mrb, const char *p, mrb_int len)
138 {
139  struct RString *s;
140 
141  s = mrb_obj_alloc_string(mrb);
142  s->len = len;
143  s->aux.capa = len;
144  s->ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
145  if (p) {
146  memcpy(s->ptr, p, len);
147  }
148  s->ptr[len] = '\0';
149  return s;
150 }
151 
152 void
154 {
155  s->c = mrb_str_ptr(obj)->c;
156 }
157 
158 static mrb_value
159 mrb_str_new_empty(mrb_state *mrb, mrb_value str)
160 {
161  struct RString *s = str_new(mrb, 0, 0);
162 
163  str_with_class(mrb, s, str);
164  return mrb_obj_value(s);
165 }
166 
167 #ifndef MRB_STR_BUF_MIN_SIZE
168 # define MRB_STR_BUF_MIN_SIZE 128
169 #endif
170 
171 mrb_value
173 {
174  struct RString *s;
175 
176  s = mrb_obj_alloc_string(mrb);
177 
178  if (capa < MRB_STR_BUF_MIN_SIZE) {
179  capa = MRB_STR_BUF_MIN_SIZE;
180  }
181  s->len = 0;
182  s->aux.capa = capa;
183  s->ptr = (char *)mrb_malloc(mrb, capa+1);
184  s->ptr[0] = '\0';
185 
186  return mrb_obj_value(s);
187 }
188 
189 static void
190 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
191 {
192  mrb_int capa;
193  mrb_int total;
194  ptrdiff_t off = -1;
195 
196  mrb_str_modify(mrb, s);
197  if (ptr >= s->ptr && ptr <= s->ptr + s->len) {
198  off = ptr - s->ptr;
199  }
200  if (len == 0) return;
201  capa = s->aux.capa;
202  if (s->len >= MRB_INT_MAX - (mrb_int)len) {
203  mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
204  }
205  total = s->len+len;
206  if (capa <= total) {
207  while (total > capa) {
208  if (capa + 1 >= MRB_INT_MAX / 2) {
209  capa = (total + 4095) / 4096;
210  break;
211  }
212  capa = (capa + 1) * 2;
213  }
214  RESIZE_CAPA(s, capa);
215  }
216  if (off != -1) {
217  ptr = s->ptr + off;
218  }
219  memcpy(s->ptr + s->len, ptr, len);
220  s->len = total;
221  s->ptr[total] = '\0'; /* sentinel */
222 }
223 
224 mrb_value
225 mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
226 {
227  if (len == 0) return str;
228  str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
229  return str;
230 }
231 
232 mrb_value
233 mrb_str_new(mrb_state *mrb, const char *p, size_t len)
234 {
235  struct RString *s;
236 
237  s = str_new(mrb, p, len);
238  return mrb_obj_value(s);
239 }
240 
241 /*
242  * call-seq: (Caution! NULL string)
243  * String.new(str="") => new_str
244  *
245  * Returns a new string object containing a copy of <i>str</i>.
246  */
247 
248 mrb_value
249 mrb_str_new_cstr(mrb_state *mrb, const char *p)
250 {
251  struct RString *s;
252  size_t len;
253 
254  if (p) {
255  len = strlen(p);
256  if ((mrb_int)len < 0) {
257  mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
258  }
259  }
260  else {
261  len = 0;
262  }
263 
264  s = str_new(mrb, p, len);
265 
266  return mrb_obj_value(s);
267 }
268 
269 mrb_value
270 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
271 {
272  struct RString *s;
273 
274  s = mrb_obj_alloc_string(mrb);
275  s->len = len;
276  s->aux.capa = 0; /* nofree */
277  s->ptr = (char *)p;
278  s->flags = MRB_STR_NOFREE;
279  return mrb_obj_value(s);
280 }
281 
282 void
284 {
285  if (str->flags & MRB_STR_SHARED)
286  str_decref(mrb, str->aux.shared);
287  else if ((str->flags & MRB_STR_NOFREE) == 0)
288  mrb_free(mrb, str->ptr);
289 }
290 
291 char *
293 {
294  struct RString *s;
295 
296  if (!mrb_string_p(str0)) {
297  mrb_raise(mrb, E_TYPE_ERROR, "expected String");
298  }
299 
300  s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
301  if ((strlen(s->ptr) ^ s->len) != 0) {
302  mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
303  }
304  return s->ptr;
305 }
306 
307 static void
308 str_make_shared(mrb_state *mrb, struct RString *s)
309 {
310  if (!(s->flags & MRB_STR_SHARED)) {
312 
313  shared->refcnt = 1;
314  if (s->flags & MRB_STR_NOFREE) {
315  shared->nofree = TRUE;
316  shared->ptr = s->ptr;
317  s->flags &= ~MRB_STR_NOFREE;
318  }
319  else {
320  shared->nofree = FALSE;
321  if (s->aux.capa > s->len) {
322  s->ptr = shared->ptr = (char *)mrb_realloc(mrb, s->ptr, s->len+1);
323  }
324  else {
325  shared->ptr = s->ptr;
326  }
327  }
328  shared->len = s->len;
329  s->aux.shared = shared;
330  s->flags |= MRB_STR_SHARED;
331  }
332 }
333 
334 /*
335  * call-seq: (Caution! string literal)
336  * String.new(str="") => new_str
337  *
338  * Returns a new string object containing a copy of <i>str</i>.
339  */
340 
341 mrb_value
343 {
344  struct RString *s, *orig;
346 
347  s = mrb_obj_alloc_string(mrb);
348  orig = mrb_str_ptr(str);
349  if (!(orig->flags & MRB_STR_SHARED)) {
350  str_make_shared(mrb, orig);
351  }
352  shared = orig->aux.shared;
353  shared->refcnt++;
354  s->ptr = shared->ptr;
355  s->len = shared->len;
356  s->aux.shared = shared;
357  s->flags |= MRB_STR_SHARED;
358 
359  return mrb_obj_value(s);
360 }
361 
362 /*
363  * call-seq:
364  * char* str = String("abcd"), len=strlen("abcd")
365  *
366  * Returns a new string object containing a copy of <i>str</i>.
367  */
368 const char*
369 mrb_str_body(mrb_value str, int *len_p)
370 {
371  struct RString *s = mrb_str_ptr(str);
372 
373  *len_p = s->len;
374  return s->ptr;
375 }
376 
377 /*
378  * call-seq: (Caution! String("abcd") change)
379  * String("abcdefg") = String("abcd") + String("efg")
380  *
381  * Returns a new string object containing a copy of <i>str</i>.
382  */
383 void
385 {
386  struct RString *s1 = mrb_str_ptr(self), *s2;
387  mrb_int len;
388 
389  mrb_str_modify(mrb, s1);
390  if (!mrb_string_p(other)) {
391  other = mrb_str_to_str(mrb, other);
392  }
393  s2 = mrb_str_ptr(other);
394  len = s1->len + s2->len;
395 
396  if (s1->aux.capa < len) {
397  s1->aux.capa = len;
398  s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, len+1);
399  }
400  memcpy(s1->ptr+s1->len, s2->ptr, s2->len);
401  s1->len = len;
402  s1->ptr[len] = '\0';
403 }
404 
405 /*
406  * call-seq: (Caution! String("abcd") remain)
407  * String("abcdefg") = String("abcd") + String("efg")
408  *
409  * Returns a new string object containing a copy of <i>str</i>.
410  */
411 mrb_value
413 {
414  struct RString *s = mrb_str_ptr(a);
415  struct RString *s2 = mrb_str_ptr(b);
416  struct RString *t;
417 
418  t = str_new(mrb, 0, s->len + s2->len);
419  memcpy(t->ptr, s->ptr, s->len);
420  memcpy(t->ptr + s->len, s2->ptr, s2->len);
421 
422  return mrb_obj_value(t);
423 }
424 
425 /* 15.2.10.5.2 */
426 
427 /*
428  * call-seq: (Caution! String("abcd") remain) for stack_argument
429  * String("abcdefg") = String("abcd") + String("efg")
430  *
431  * Returns a new string object containing a copy of <i>str</i>.
432  */
433 static mrb_value
434 mrb_str_plus_m(mrb_state *mrb, mrb_value self)
435 {
436  mrb_value str;
437 
438  mrb_get_args(mrb, "S", &str);
439  return mrb_str_plus(mrb, self, str);
440 }
441 
442 /*
443  * call-seq:
444  * len = strlen(String("abcd"))
445  *
446  * Returns a new string object containing a copy of <i>str</i>.
447  */
448 static mrb_value
449 mrb_str_bytesize(mrb_state *mrb, mrb_value self)
450 {
451  struct RString *s = mrb_str_ptr(self);
452  return mrb_fixnum_value(s->len);
453 }
454 
455 /* 15.2.10.5.26 */
456 /* 15.2.10.5.33 */
457 /*
458  * call-seq:
459  * len = strlen(String("abcd"))
460  *
461  * Returns a new string object containing a copy of <i>str</i>.
462  */
463 mrb_value
465 {
466  struct RString *s = mrb_str_ptr(self);
467  return mrb_fixnum_value(s->len);
468 }
469 
470 /* 15.2.10.5.1 */
471 /*
472  * call-seq:
473  * str * integer => new_str
474  *
475  * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
476  * the receiver.
477  *
478  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
479  */
480 static mrb_value
481 mrb_str_times(mrb_state *mrb, mrb_value self)
482 {
483  mrb_int n,len,times;
484  struct RString *str2;
485  char *p;
486 
487  mrb_get_args(mrb, "i", &times);
488  if (times < 0) {
489  mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
490  }
491  if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
492  mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
493  }
494 
495  len = RSTRING_LEN(self)*times;
496  str2 = str_new(mrb, 0, len);
497  str_with_class(mrb, str2, self);
498  p = str2->ptr;
499  if (len > 0) {
500  n = RSTRING_LEN(self);
501  memcpy(p, RSTRING_PTR(self), n);
502  while (n <= len/2) {
503  memcpy(p + n, p, n);
504  n *= 2;
505  }
506  memcpy(p + n, p, len-n);
507  }
508  p[str2->len] = '\0';
509 
510  return mrb_obj_value(str2);
511 }
512 /* -------------------------------------------------------------- */
513 
514 #define lesser(a,b) (((a)>(b))?(b):(a))
515 
516 /* ---------------------------*/
517 /*
518  * call-seq:
519  * mrb_value str1 <=> mrb_value str2 => int
520  * > 1
521  * = 0
522  * < -1
523  */
524 int
526 {
527  mrb_int len;
528  mrb_int retval;
529  struct RString *s1 = mrb_str_ptr(str1);
530  struct RString *s2 = mrb_str_ptr(str2);
531 
532  len = lesser(s1->len, s2->len);
533  retval = memcmp(s1->ptr, s2->ptr, len);
534  if (retval == 0) {
535  if (s1->len == s2->len) return 0;
536  if (s1->len > s2->len) return 1;
537  return -1;
538  }
539  if (retval > 0) return 1;
540  return -1;
541 }
542 
543 /* 15.2.10.5.3 */
544 
545 /*
546  * call-seq:
547  * str <=> other_str => -1, 0, +1
548  *
549  * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
550  * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
551  * <i>str</i>. If the strings are of different lengths, and the strings are
552  * equal when compared up to the shortest length, then the longer string is
553  * considered greater than the shorter one. If the variable <code>$=</code> is
554  * <code>false</code>, the comparison is based on comparing the binary values
555  * of each character in the string. In older versions of Ruby, setting
556  * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
557  * in favor of using <code>String#casecmp</code>.
558  *
559  * <code><=></code> is the basis for the methods <code><</code>,
560  * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
561  * included from module <code>Comparable</code>. The method
562  * <code>String#==</code> does not use <code>Comparable#==</code>.
563  *
564  * "abcdef" <=> "abcde" #=> 1
565  * "abcdef" <=> "abcdef" #=> 0
566  * "abcdef" <=> "abcdefg" #=> -1
567  * "abcdef" <=> "ABCDEF" #=> 1
568  */
569 static mrb_value
570 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
571 {
572  mrb_value str2;
573  mrb_int result;
574 
575  mrb_get_args(mrb, "o", &str2);
576  if (!mrb_string_p(str2)) {
577  if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "to_s", 4))) {
578  return mrb_nil_value();
579  }
580  else if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "<=>", 3))) {
581  return mrb_nil_value();
582  }
583  else {
584  mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
585 
586  if (mrb_nil_p(tmp)) return mrb_nil_value();
587  if (!mrb_fixnum(tmp)) {
588  return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
589  }
590  result = -mrb_fixnum(tmp);
591  }
592  }
593  else {
594  result = mrb_str_cmp(mrb, str1, str2);
595  }
596  return mrb_fixnum_value(result);
597 }
598 
599 static mrb_bool
600 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
601 {
602  const mrb_int len = RSTRING_LEN(str1);
603 
604  if (len != RSTRING_LEN(str2)) return FALSE;
605  if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
606  return TRUE;
607  return FALSE;
608 }
609 
610 mrb_bool
612 {
613  if (mrb_obj_equal(mrb, str1, str2)) return TRUE;
614  if (!mrb_string_p(str2)) {
615  if (mrb_nil_p(str2)) return FALSE;
616  if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "to_str", 6))) {
617  return FALSE;
618  }
619  str2 = mrb_funcall(mrb, str2, "to_str", 0);
620  return mrb_equal(mrb, str2, str1);
621  }
622  return str_eql(mrb, str1, str2);
623 }
624 
625 /* 15.2.10.5.4 */
626 /*
627  * call-seq:
628  * str == obj => true or false
629  *
630  * Equality---
631  * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
632  * Otherwise, returns <code>false</code> or <code>true</code>
633  *
634  * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
635  */
636 static mrb_value
637 mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
638 {
639  mrb_value str2;
640  mrb_bool equal_p;
641 
642  mrb_get_args(mrb, "o", &str2);
643  equal_p = mrb_str_equal(mrb, str1, str2);
644 
645  return mrb_bool_value(equal_p);
646 }
647 /* ---------------------------------- */
648 mrb_value
650 {
651  mrb_value s;
652 
653  if (!mrb_string_p(str)) {
654  s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
655  if (mrb_nil_p(s)) {
656  s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
657  }
658  return s;
659  }
660  return str;
661 }
662 
663 char *
665 {
666  mrb_value str = mrb_str_to_str(mrb, ptr);
667  return RSTRING_PTR(str);
668 }
669 
670 static mrb_value
671 noregexp(mrb_state *mrb, mrb_value self)
672 {
673  mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
674  return mrb_nil_value();
675 }
676 
677 static void
678 regexp_check(mrb_state *mrb, mrb_value obj)
679 {
680  if (!memcmp(mrb_obj_classname(mrb, obj), REGEXP_CLASS, sizeof(REGEXP_CLASS) - 1)) {
681  noregexp(mrb, obj);
682  }
683 }
684 
685 static inline mrb_int
686 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
687 {
688  const unsigned char *x = xs, *xe = xs + m;
689  const unsigned char *y = ys;
690  int i, qstable[256];
691 
692  /* Preprocessing */
693  for (i = 0; i < 256; ++i)
694  qstable[i] = m + 1;
695  for (; x < xe; ++x)
696  qstable[*x] = xe - x;
697  /* Searching */
698  for (; y + m <= ys + n; y += *(qstable + y[m])) {
699  if (*xs == *y && memcmp(xs, y, m) == 0)
700  return y - ys;
701  }
702  return -1;
703 }
704 
705 static mrb_int
706 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
707 {
708  const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
709 
710  if (m > n) return -1;
711  else if (m == n) {
712  return memcmp(x0, y0, m) == 0 ? 0 : -1;
713  }
714  else if (m < 1) {
715  return 0;
716  }
717  else if (m == 1) {
718  const unsigned char *ys = y, *ye = ys + n;
719  for (; y < ye; ++y) {
720  if (*x == *y)
721  return y - ys;
722  }
723  return -1;
724  }
725  return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
726 }
727 
728 static mrb_int
729 mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
730 {
731  mrb_int pos;
732  char *s, *sptr;
733  mrb_int len, slen;
734 
735  len = RSTRING_LEN(str);
736  slen = RSTRING_LEN(sub);
737  if (offset < 0) {
738  offset += len;
739  if (offset < 0) return -1;
740  }
741  if (len - offset < slen) return -1;
742  s = RSTRING_PTR(str);
743  if (offset) {
744  s += offset;
745  }
746  if (slen == 0) return offset;
747  /* need proceed one character at a time */
748  sptr = RSTRING_PTR(sub);
749  slen = RSTRING_LEN(sub);
750  len = RSTRING_LEN(str) - offset;
751  pos = mrb_memsearch(sptr, slen, s, len);
752  if (pos < 0) return pos;
753  return pos + offset;
754 }
755 
756 mrb_value
758 {
759  /* should return shared string */
760  struct RString *s = mrb_str_ptr(str);
761 
762  return mrb_str_new(mrb, s->ptr, s->len);
763 }
764 
765 static mrb_value
766 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
767 {
768  mrb_int idx;
769 
770  regexp_check(mrb, indx);
771  switch (mrb_type(indx)) {
772  case MRB_TT_FIXNUM:
773  idx = mrb_fixnum(indx);
774 
775 num_index:
776  str = mrb_str_substr(mrb, str, idx, 1);
777  if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
778  return str;
779 
780  case MRB_TT_STRING:
781  if (mrb_str_index(mrb, str, indx, 0) != -1)
782  return mrb_str_dup(mrb, indx);
783  return mrb_nil_value();
784 
785  case MRB_TT_RANGE:
786  /* check if indx is Range */
787  {
788  mrb_int beg, len;
789  mrb_value tmp;
790 
791  len = RSTRING_LEN(str);
792  if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
793  tmp = mrb_str_subseq(mrb, str, beg, len);
794  return tmp;
795  }
796  else {
797  return mrb_nil_value();
798  }
799  }
800  default:
801  idx = mrb_fixnum(indx);
802  goto num_index;
803  }
804  return mrb_nil_value(); /* not reached */
805 }
806 
807 /* 15.2.10.5.6 */
808 /* 15.2.10.5.34 */
809 /*
810  * call-seq:
811  * str[fixnum] => fixnum or nil
812  * str[fixnum, fixnum] => new_str or nil
813  * str[range] => new_str or nil
814  * str[regexp] => new_str or nil
815  * str[regexp, fixnum] => new_str or nil
816  * str[other_str] => new_str or nil
817  * str.slice(fixnum) => fixnum or nil
818  * str.slice(fixnum, fixnum) => new_str or nil
819  * str.slice(range) => new_str or nil
820  * str.slice(regexp) => new_str or nil
821  * str.slice(regexp, fixnum) => new_str or nil
822  * str.slice(other_str) => new_str or nil
823  *
824  * Element Reference---If passed a single <code>Fixnum</code>, returns the code
825  * of the character at that position. If passed two <code>Fixnum</code>
826  * objects, returns a substring starting at the offset given by the first, and
827  * a length given by the second. If given a range, a substring containing
828  * characters at offsets given by the range is returned. In all three cases, if
829  * an offset is negative, it is counted from the end of <i>str</i>. Returns
830  * <code>nil</code> if the initial offset falls outside the string, the length
831  * is negative, or the beginning of the range is greater than the end.
832  *
833  * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
834  * returned. If a numeric parameter follows the regular expression, that
835  * component of the <code>MatchData</code> is returned instead. If a
836  * <code>String</code> is given, that string is returned if it occurs in
837  * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
838  * match.
839  *
840  * a = "hello there"
841  * a[1] #=> 101(1.8.7) "e"(1.9.2)
842  * a[1,3] #=> "ell"
843  * a[1..3] #=> "ell"
844  * a[-3,2] #=> "er"
845  * a[-4..-2] #=> "her"
846  * a[12..-1] #=> nil
847  * a[-2..-4] #=> ""
848  * a[/[aeiou](.)\1/] #=> "ell"
849  * a[/[aeiou](.)\1/, 0] #=> "ell"
850  * a[/[aeiou](.)\1/, 1] #=> "l"
851  * a[/[aeiou](.)\1/, 2] #=> nil
852  * a["lo"] #=> "lo"
853  * a["bye"] #=> nil
854  */
855 static mrb_value
856 mrb_str_aref_m(mrb_state *mrb, mrb_value str)
857 {
858  mrb_value a1, a2;
859  int argc;
860 
861  argc = mrb_get_args(mrb, "o|o", &a1, &a2);
862  if (argc == 2) {
863  regexp_check(mrb, a1);
864  return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
865  }
866  if (argc != 1) {
867  mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
868  }
869  return mrb_str_aref(mrb, str, a1);
870 }
871 
872 /* 15.2.10.5.8 */
873 /*
874  * call-seq:
875  * str.capitalize! => str or nil
876  *
877  * Modifies <i>str</i> by converting the first character to uppercase and the
878  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
879  *
880  * a = "hello"
881  * a.capitalize! #=> "Hello"
882  * a #=> "Hello"
883  * a.capitalize! #=> nil
884  */
885 static mrb_value
886 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
887 {
888  char *p, *pend;
889  int modify = 0;
890  struct RString *s = mrb_str_ptr(str);
891 
892  mrb_str_modify(mrb, s);
893  if (s->len == 0 || !s->ptr) return mrb_nil_value();
894  p = s->ptr; pend = s->ptr + s->len;
895  if (ISLOWER(*p)) {
896  *p = TOUPPER(*p);
897  modify = 1;
898  }
899  while (++p < pend) {
900  if (ISUPPER(*p)) {
901  *p = TOLOWER(*p);
902  modify = 1;
903  }
904  }
905  if (modify) return str;
906  return mrb_nil_value();
907 }
908 
909 /* 15.2.10.5.7 */
910 /*
911  * call-seq:
912  * str.capitalize => new_str
913  *
914  * Returns a copy of <i>str</i> with the first character converted to uppercase
915  * and the remainder to lowercase.
916  *
917  * "hello".capitalize #=> "Hello"
918  * "HELLO".capitalize #=> "Hello"
919  * "123ABC".capitalize #=> "123abc"
920  */
921 static mrb_value
922 mrb_str_capitalize(mrb_state *mrb, mrb_value self)
923 {
924  mrb_value str;
925 
926  str = mrb_str_dup(mrb, self);
927  mrb_str_capitalize_bang(mrb, str);
928  return str;
929 }
930 
931 /* 15.2.10.5.10 */
932 /*
933  * call-seq:
934  * str.chomp!(separator=$/) => str or nil
935  *
936  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
937  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
938  */
939 static mrb_value
940 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
941 {
942  mrb_value rs;
943  mrb_int newline;
944  char *p, *pp;
945  mrb_int rslen;
946  mrb_int len;
947  struct RString *s = mrb_str_ptr(str);
948 
949  mrb_str_modify(mrb, s);
950  len = s->len;
951  if (mrb_get_args(mrb, "|S", &rs) == 0) {
952  if (len == 0) return mrb_nil_value();
953  smart_chomp:
954  if (s->ptr[len-1] == '\n') {
955  s->len--;
956  if (s->len > 0 &&
957  s->ptr[s->len-1] == '\r') {
958  s->len--;
959  }
960  }
961  else if (s->ptr[len-1] == '\r') {
962  s->len--;
963  }
964  else {
965  return mrb_nil_value();
966  }
967  s->ptr[s->len] = '\0';
968  return str;
969  }
970 
971  if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
972  p = s->ptr;
973  rslen = RSTRING_LEN(rs);
974  if (rslen == 0) {
975  while (len>0 && p[len-1] == '\n') {
976  len--;
977  if (len>0 && p[len-1] == '\r')
978  len--;
979  }
980  if (len < s->len) {
981  s->len = len;
982  p[len] = '\0';
983  return str;
984  }
985  return mrb_nil_value();
986  }
987  if (rslen > len) return mrb_nil_value();
988  newline = RSTRING_PTR(rs)[rslen-1];
989  if (rslen == 1 && newline == '\n')
990  newline = RSTRING_PTR(rs)[rslen-1];
991  if (rslen == 1 && newline == '\n')
992  goto smart_chomp;
993 
994  pp = p + len - rslen;
995  if (p[len-1] == newline &&
996  (rslen <= 1 ||
997  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
998  s->len = len - rslen;
999  p[s->len] = '\0';
1000  return str;
1001  }
1002  return mrb_nil_value();
1003 }
1004 
1005 /* 15.2.10.5.9 */
1006 /*
1007  * call-seq:
1008  * str.chomp(separator=$/) => new_str
1009  *
1010  * Returns a new <code>String</code> with the given record separator removed
1011  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
1012  * changed from the default Ruby record separator, then <code>chomp</code> also
1013  * removes carriage return characters (that is it will remove <code>\n</code>,
1014  * <code>\r</code>, and <code>\r\n</code>).
1015  *
1016  * "hello".chomp #=> "hello"
1017  * "hello\n".chomp #=> "hello"
1018  * "hello\r\n".chomp #=> "hello"
1019  * "hello\n\r".chomp #=> "hello\n"
1020  * "hello\r".chomp #=> "hello"
1021  * "hello \n there".chomp #=> "hello \n there"
1022  * "hello".chomp("llo") #=> "he"
1023  */
1024 static mrb_value
1025 mrb_str_chomp(mrb_state *mrb, mrb_value self)
1026 {
1027  mrb_value str;
1028 
1029  str = mrb_str_dup(mrb, self);
1030  mrb_str_chomp_bang(mrb, str);
1031  return str;
1032 }
1033 
1034 /* 15.2.10.5.12 */
1035 /*
1036  * call-seq:
1037  * str.chop! => str or nil
1038  *
1039  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
1040  * or <code>nil</code> if <i>str</i> is the empty string. See also
1041  * <code>String#chomp!</code>.
1042  */
1043 static mrb_value
1044 mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
1045 {
1046  struct RString *s = mrb_str_ptr(str);
1047 
1048  mrb_str_modify(mrb, s);
1049  if (s->len > 0) {
1050  int len;
1051  len = s->len - 1;
1052  if (s->ptr[len] == '\n') {
1053  if (len > 0 &&
1054  s->ptr[len-1] == '\r') {
1055  len--;
1056  }
1057  }
1058  s->len = len;
1059  s->ptr[len] = '\0';
1060  return str;
1061  }
1062  return mrb_nil_value();
1063 }
1064 
1065 /* 15.2.10.5.11 */
1066 /*
1067  * call-seq:
1068  * str.chop => new_str
1069  *
1070  * Returns a new <code>String</code> with the last character removed. If the
1071  * string ends with <code>\r\n</code>, both characters are removed. Applying
1072  * <code>chop</code> to an empty string returns an empty
1073  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1074  * the string unchanged if it doesn't end in a record separator.
1075  *
1076  * "string\r\n".chop #=> "string"
1077  * "string\n\r".chop #=> "string\n"
1078  * "string\n".chop #=> "string"
1079  * "string".chop #=> "strin"
1080  * "x".chop #=> ""
1081  */
1082 static mrb_value
1083 mrb_str_chop(mrb_state *mrb, mrb_value self)
1084 {
1085  mrb_value str;
1086  str = mrb_str_dup(mrb, self);
1087  mrb_str_chop_bang(mrb, str);
1088  return str;
1089 }
1090 
1091 /* 15.2.10.5.14 */
1092 /*
1093  * call-seq:
1094  * str.downcase! => str or nil
1095  *
1096  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1097  * changes were made.
1098  */
1099 static mrb_value
1100 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
1101 {
1102  char *p, *pend;
1103  int modify = 0;
1104  struct RString *s = mrb_str_ptr(str);
1105 
1106  mrb_str_modify(mrb, s);
1107  p = s->ptr;
1108  pend = s->ptr + s->len;
1109  while (p < pend) {
1110  if (ISUPPER(*p)) {
1111  *p = TOLOWER(*p);
1112  modify = 1;
1113  }
1114  p++;
1115  }
1116 
1117  if (modify) return str;
1118  return mrb_nil_value();
1119 }
1120 
1121 /* 15.2.10.5.13 */
1122 /*
1123  * call-seq:
1124  * str.downcase => new_str
1125  *
1126  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1127  * lowercase counterparts. The operation is locale insensitive---only
1128  * characters ``A'' to ``Z'' are affected.
1129  *
1130  * "hEllO".downcase #=> "hello"
1131  */
1132 static mrb_value
1133 mrb_str_downcase(mrb_state *mrb, mrb_value self)
1134 {
1135  mrb_value str;
1136 
1137  str = mrb_str_dup(mrb, self);
1138  mrb_str_downcase_bang(mrb, str);
1139  return str;
1140 }
1141 
1142 /* 15.2.10.5.16 */
1143 /*
1144  * call-seq:
1145  * str.empty? => true or false
1146  *
1147  * Returns <code>true</code> if <i>str</i> has a length of zero.
1148  *
1149  * "hello".empty? #=> false
1150  * "".empty? #=> true
1151  */
1152 static mrb_value
1153 mrb_str_empty_p(mrb_state *mrb, mrb_value self)
1154 {
1155  struct RString *s = mrb_str_ptr(self);
1156 
1157  return mrb_bool_value(s->len == 0);
1158 }
1159 
1160 /* 15.2.10.5.17 */
1161 /*
1162  * call-seq:
1163  * str.eql?(other) => true or false
1164  *
1165  * Two strings are equal if the have the same length and content.
1166  */
1167 static mrb_value
1168 mrb_str_eql(mrb_state *mrb, mrb_value self)
1169 {
1170  mrb_value str2;
1171  mrb_bool eql_p;
1172 
1173  mrb_get_args(mrb, "o", &str2);
1174  eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
1175 
1176  return mrb_bool_value(eql_p);
1177 }
1178 
1179 static mrb_value
1180 mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1181 {
1182  struct RString *orig, *s;
1184 
1185  orig = mrb_str_ptr(str);
1186  str_make_shared(mrb, orig);
1187  shared = orig->aux.shared;
1188  s = mrb_obj_alloc_string(mrb);
1189  s->ptr = orig->ptr + beg;
1190  s->len = len;
1191  s->aux.shared = shared;
1192  s->flags |= MRB_STR_SHARED;
1193  shared->refcnt++;
1194 
1195  return mrb_obj_value(s);
1196 }
1197 
1198 mrb_value
1200 {
1201  mrb_value str2;
1202 
1203  if (len < 0) return mrb_nil_value();
1204  if (!RSTRING_LEN(str)) {
1205  len = 0;
1206  }
1207  if (beg > RSTRING_LEN(str)) return mrb_nil_value();
1208  if (beg < 0) {
1209  beg += RSTRING_LEN(str);
1210  if (beg < 0) return mrb_nil_value();
1211  }
1212  if (beg + len > RSTRING_LEN(str))
1213  len = RSTRING_LEN(str) - beg;
1214  if (len <= 0) {
1215  len = 0;
1216  }
1217  str2 = mrb_str_subseq(mrb, str, beg, len);
1218 
1219  return str2;
1220 }
1221 
1222 mrb_value
1224 {
1225  mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
1226  return str;
1227 }
1228 
1229 mrb_int
1231 {
1232  /* 1-8-7 */
1233  struct RString *s = mrb_str_ptr(str);
1234  mrb_int len = s->len;
1235  char *p = s->ptr;
1236  mrb_int key = 0;
1237 
1238  while (len--) {
1239  key = key*65599 + *p;
1240  p++;
1241  }
1242  key = key + (key>>5);
1243  return key;
1244 }
1245 
1246 /* 15.2.10.5.20 */
1247 /*
1248  * call-seq:
1249  * str.hash => fixnum
1250  *
1251  * Return a hash based on the string's length and content.
1252  */
1253 static mrb_value
1254 mrb_str_hash_m(mrb_state *mrb, mrb_value self)
1255 {
1256  mrb_int key = mrb_str_hash(mrb, self);
1257  return mrb_fixnum_value(key);
1258 }
1259 
1260 /* 15.2.10.5.21 */
1261 /*
1262  * call-seq:
1263  * str.include? other_str => true or false
1264  * str.include? fixnum => true or false
1265  *
1266  * Returns <code>true</code> if <i>str</i> contains the given string or
1267  * character.
1268  *
1269  * "hello".include? "lo" #=> true
1270  * "hello".include? "ol" #=> false
1271  * "hello".include? ?h #=> true
1272  */
1273 static mrb_value
1274 mrb_str_include(mrb_state *mrb, mrb_value self)
1275 {
1276  mrb_int i;
1277  mrb_value str2;
1278  mrb_bool include_p;
1279 
1280  mrb_get_args(mrb, "o", &str2);
1281  if (mrb_type(str2) == MRB_TT_FIXNUM) {
1282  include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
1283  }
1284  else {
1285  str2 = mrb_str_to_str(mrb, str2);
1286  i = mrb_str_index(mrb, self, str2, 0);
1287 
1288  include_p = (i != -1);
1289  }
1290 
1291  return mrb_bool_value(include_p);
1292 }
1293 
1294 /* 15.2.10.5.22 */
1295 /*
1296  * call-seq:
1297  * str.index(substring [, offset]) => fixnum or nil
1298  * str.index(fixnum [, offset]) => fixnum or nil
1299  * str.index(regexp [, offset]) => fixnum or nil
1300  *
1301  * Returns the index of the first occurrence of the given
1302  * <i>substring</i>,
1303  * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1304  * Returns
1305  * <code>nil</code> if not found.
1306  * If the second parameter is present, it
1307  * specifies the position in the string to begin the search.
1308  *
1309  * "hello".index('e') #=> 1
1310  * "hello".index('lo') #=> 3
1311  * "hello".index('a') #=> nil
1312  * "hello".index(101) #=> 1(101=0x65='e')
1313  * "hello".index(/[aeiou]/, -3) #=> 4
1314  */
1315 static mrb_value
1316 mrb_str_index_m(mrb_state *mrb, mrb_value str)
1317 {
1318  mrb_value *argv;
1319  int argc;
1320 
1321  mrb_value sub;
1322  mrb_int pos;
1323 
1324  mrb_get_args(mrb, "*", &argv, &argc);
1325  if (argc == 2) {
1326  pos = mrb_fixnum(argv[1]);
1327  sub = argv[0];
1328  }
1329  else {
1330  pos = 0;
1331  if (argc > 0)
1332  sub = argv[0];
1333  else
1334  sub = mrb_nil_value();
1335 
1336  }
1337  regexp_check(mrb, sub);
1338  if (pos < 0) {
1339  pos += RSTRING_LEN(str);
1340  if (pos < 0) {
1341  return mrb_nil_value();
1342  }
1343  }
1344 
1345  switch (mrb_type(sub)) {
1346  case MRB_TT_FIXNUM: {
1347  int c = mrb_fixnum(sub);
1348  mrb_int len = RSTRING_LEN(str);
1349  unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1350 
1351  for (;pos<len;pos++) {
1352  if (p[pos] == c) return mrb_fixnum_value(pos);
1353  }
1354  return mrb_nil_value();
1355  }
1356 
1357  default: {
1358  mrb_value tmp;
1359 
1360  tmp = mrb_check_string_type(mrb, sub);
1361  if (mrb_nil_p(tmp)) {
1362  mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1363  }
1364  sub = tmp;
1365  }
1366  /* fall through */
1367  case MRB_TT_STRING:
1368  pos = mrb_str_index(mrb, str, sub, pos);
1369  break;
1370  }
1371 
1372  if (pos == -1) return mrb_nil_value();
1373  return mrb_fixnum_value(pos);
1374 }
1375 
1376 #define STR_REPLACE_SHARED_MIN 10
1377 
1378 static mrb_value
1379 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
1380 {
1381  if (s2->flags & MRB_STR_SHARED) {
1382  L_SHARE:
1383  if (s1->flags & MRB_STR_SHARED){
1384  str_decref(mrb, s1->aux.shared);
1385  }
1386  else {
1387  mrb_free(mrb, s1->ptr);
1388  }
1389  s1->ptr = s2->ptr;
1390  s1->len = s2->len;
1391  s1->aux.shared = s2->aux.shared;
1392  s1->flags |= MRB_STR_SHARED;
1393  s1->aux.shared->refcnt++;
1394  }
1395  else if (s2->len > STR_REPLACE_SHARED_MIN) {
1396  str_make_shared(mrb, s2);
1397  goto L_SHARE;
1398  }
1399  else {
1400  if (s1->flags & MRB_STR_SHARED) {
1401  str_decref(mrb, s1->aux.shared);
1402  s1->flags &= ~MRB_STR_SHARED;
1403  s1->ptr = (char *)mrb_malloc(mrb, s2->len+1);
1404  }
1405  else {
1406  s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, s2->len+1);
1407  }
1408  memcpy(s1->ptr, s2->ptr, s2->len);
1409  s1->ptr[s2->len] = 0;
1410  s1->len = s2->len;
1411  s1->aux.capa = s2->len;
1412  }
1413  return mrb_obj_value(s1);
1414 }
1415 
1416 /* 15.2.10.5.24 */
1417 /* 15.2.10.5.28 */
1418 /*
1419  * call-seq:
1420  * str.replace(other_str) => str
1421  *
1422  * s = "hello" #=> "hello"
1423  * s.replace "world" #=> "world"
1424  */
1425 static mrb_value
1426 mrb_str_replace(mrb_state *mrb, mrb_value str)
1427 {
1428  mrb_value str2;
1429 
1430  mrb_get_args(mrb, "S", &str2);
1431  return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
1432 }
1433 
1434 /* 15.2.10.5.23 */
1435 /*
1436  * call-seq:
1437  * String.new(str="") => new_str
1438  *
1439  * Returns a new string object containing a copy of <i>str</i>.
1440  */
1441 static mrb_value
1442 mrb_str_init(mrb_state *mrb, mrb_value self)
1443 {
1444  mrb_value str2;
1445 
1446  if (mrb_get_args(mrb, "|S", &str2) == 1) {
1447  str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
1448  }
1449  return self;
1450 }
1451 
1452 /* 15.2.10.5.25 */
1453 /* 15.2.10.5.41 */
1454 /*
1455  * call-seq:
1456  * str.intern => symbol
1457  * str.to_sym => symbol
1458  *
1459  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1460  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1461  *
1462  * "Koala".intern #=> :Koala
1463  * s = 'cat'.to_sym #=> :cat
1464  * s == :cat #=> true
1465  * s = '@cat'.to_sym #=> :@cat
1466  * s == :@cat #=> true
1467  *
1468  * This can also be used to create symbols that cannot be represented using the
1469  * <code>:xxx</code> notation.
1470  *
1471  * 'cat and dog'.to_sym #=> :"cat and dog"
1472  */
1473 mrb_value
1475 {
1476  mrb_sym id;
1477 
1478  id = mrb_intern_str(mrb, self);
1479  return mrb_symbol_value(id);
1480 
1481 }
1482 /* ---------------------------------- */
1483 mrb_value
1485 {
1486  mrb_value str;
1487 
1488  if (mrb_string_p(obj)) {
1489  return obj;
1490  }
1491  str = mrb_funcall(mrb, obj, "to_s", 0);
1492  if (!mrb_string_p(str))
1493  return mrb_any_to_s(mrb, obj);
1494  return str;
1495 }
1496 
1497 mrb_value
1499 {
1500  struct RString *p_str;
1501  char *p1;
1502  char *p2;
1503  uintptr_t n = (uintptr_t)p;
1504 
1505  p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
1506  p1 = p_str->ptr;
1507  *p1++ = '0';
1508  *p1++ = 'x';
1509  p2 = p1;
1510 
1511  do {
1512  *p2++ = mrb_digitmap[n % 16];
1513  n /= 16;
1514  } while (n > 0);
1515  *p2 = '\0';
1516  p_str->len = (mrb_int)(p2 - p_str->ptr);
1517 
1518  while (p1 < p2) {
1519  const char c = *p1;
1520  *p1++ = *--p2;
1521  *p2 = c;
1522  }
1523 
1524  return mrb_obj_value(p_str);
1525 }
1526 
1527 mrb_value
1529 {
1530  return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1531 }
1532 
1533 mrb_value
1535 {
1536  return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1537 }
1538 
1539 /* ---------------------------------- */
1540 /* 15.2.10.5.29 */
1541 /*
1542  * call-seq:
1543  * str.reverse => new_str
1544  *
1545  * Returns a new string with the characters from <i>str</i> in reverse order.
1546  *
1547  * "stressed".reverse #=> "desserts"
1548  */
1549 static mrb_value
1550 mrb_str_reverse(mrb_state *mrb, mrb_value str)
1551 {
1552  struct RString *s2;
1553  char *s, *e, *p;
1554 
1555  if (RSTRING(str)->len <= 1) return mrb_str_dup(mrb, str);
1556 
1557  s2 = str_new(mrb, 0, RSTRING(str)->len);
1558  str_with_class(mrb, s2, str);
1559  s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
1560  p = s2->ptr;
1561 
1562  while (e >= s) {
1563  *p++ = *e--;
1564  }
1565  return mrb_obj_value(s2);
1566 }
1567 
1568 /* 15.2.10.5.30 */
1569 /*
1570  * call-seq:
1571  * str.reverse! => str
1572  *
1573  * Reverses <i>str</i> in place.
1574  */
1575 static mrb_value
1576 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
1577 {
1578  struct RString *s = mrb_str_ptr(str);
1579  char *p, *e;
1580  char c;
1581 
1582  mrb_str_modify(mrb, s);
1583  if (s->len > 1) {
1584  p = s->ptr;
1585  e = p + s->len - 1;
1586  while (p < e) {
1587  c = *p;
1588  *p++ = *e;
1589  *e-- = c;
1590  }
1591  }
1592  return str;
1593 }
1594 
1595 /*
1596  * call-seq:
1597  * str.rindex(substring [, fixnum]) => fixnum or nil
1598  * str.rindex(fixnum [, fixnum]) => fixnum or nil
1599  * str.rindex(regexp [, fixnum]) => fixnum or nil
1600  *
1601  * Returns the index of the last occurrence of the given <i>substring</i>,
1602  * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1603  * <code>nil</code> if not found. If the second parameter is present, it
1604  * specifies the position in the string to end the search---characters beyond
1605  * this point will not be considered.
1606  *
1607  * "hello".rindex('e') #=> 1
1608  * "hello".rindex('l') #=> 3
1609  * "hello".rindex('a') #=> nil
1610  * "hello".rindex(101) #=> 1
1611  * "hello".rindex(/[aeiou]/, -2) #=> 1
1612  */
1613 static mrb_int
1614 mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
1615 {
1616  char *s, *sbeg, *t;
1617  struct RString *ps = mrb_str_ptr(str);
1618  struct RString *psub = mrb_str_ptr(sub);
1619  mrb_int len = psub->len;
1620 
1621  /* substring longer than string */
1622  if (ps->len < len) return -1;
1623  if (ps->len - pos < len) {
1624  pos = ps->len - len;
1625  }
1626  sbeg = ps->ptr;
1627  s = ps->ptr + pos;
1628  t = psub->ptr;
1629  if (len) {
1630  while (sbeg <= s) {
1631  if (memcmp(s, t, len) == 0) {
1632  return s - ps->ptr;
1633  }
1634  s--;
1635  }
1636  return -1;
1637  }
1638  else {
1639  return pos;
1640  }
1641 }
1642 
1643 /* 15.2.10.5.31 */
1644 /*
1645  * call-seq:
1646  * str.rindex(substring [, fixnum]) => fixnum or nil
1647  * str.rindex(fixnum [, fixnum]) => fixnum or nil
1648  * str.rindex(regexp [, fixnum]) => fixnum or nil
1649  *
1650  * Returns the index of the last occurrence of the given <i>substring</i>,
1651  * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1652  * <code>nil</code> if not found. If the second parameter is present, it
1653  * specifies the position in the string to end the search---characters beyond
1654  * this point will not be considered.
1655  *
1656  * "hello".rindex('e') #=> 1
1657  * "hello".rindex('l') #=> 3
1658  * "hello".rindex('a') #=> nil
1659  * "hello".rindex(101) #=> 1
1660  * "hello".rindex(/[aeiou]/, -2) #=> 1
1661  */
1662 static mrb_value
1663 mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
1664 {
1665  mrb_value *argv;
1666  int argc;
1667  mrb_value sub;
1668  mrb_value vpos;
1669  int pos, len = RSTRING_LEN(str);
1670 
1671  mrb_get_args(mrb, "*", &argv, &argc);
1672  if (argc == 2) {
1673  sub = argv[0];
1674  vpos = argv[1];
1675  pos = mrb_fixnum(vpos);
1676  if (pos < 0) {
1677  pos += len;
1678  if (pos < 0) {
1679  regexp_check(mrb, sub);
1680  return mrb_nil_value();
1681  }
1682  }
1683  if (pos > len) pos = len;
1684  }
1685  else {
1686  pos = len;
1687  if (argc > 0)
1688  sub = argv[0];
1689  else
1690  sub = mrb_nil_value();
1691  }
1692  regexp_check(mrb, sub);
1693 
1694  switch (mrb_type(sub)) {
1695  case MRB_TT_FIXNUM: {
1696  int c = mrb_fixnum(sub);
1697  mrb_int len = RSTRING_LEN(str);
1698  unsigned char *p = (unsigned char*)RSTRING_PTR(str);
1699 
1700  for (pos=len;pos>=0;pos--) {
1701  if (p[pos] == c) return mrb_fixnum_value(pos);
1702  }
1703  return mrb_nil_value();
1704  }
1705 
1706  default: {
1707  mrb_value tmp;
1708 
1709  tmp = mrb_check_string_type(mrb, sub);
1710  if (mrb_nil_p(tmp)) {
1711  mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1712  }
1713  sub = tmp;
1714  }
1715  /* fall through */
1716  case MRB_TT_STRING:
1717  pos = mrb_str_rindex(mrb, str, sub, pos);
1718  if (pos >= 0) return mrb_fixnum_value(pos);
1719  break;
1720 
1721  } /* end of switch (TYPE(sub)) */
1722  return mrb_nil_value();
1723 }
1724 
1725 static const char isspacetable[256] = {
1726  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1727  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1728  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1729  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1730  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1731  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1732  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1733  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1734  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1735  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1736  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1737  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1738  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1739  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1740  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1741  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1742 };
1743 
1744 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1745 
1746 /* 15.2.10.5.35 */
1747 
1748 /*
1749  * call-seq:
1750  * str.split(pattern=$;, [limit]) => anArray
1751  *
1752  * Divides <i>str</i> into substrings based on a delimiter, returning an array
1753  * of these substrings.
1754  *
1755  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1756  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1757  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1758  * of contiguous whitespace characters ignored.
1759  *
1760  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1761  * pattern matches. Whenever the pattern matches a zero-length string,
1762  * <i>str</i> is split into individual characters.
1763  *
1764  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1765  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1766  * split on whitespace as if ` ' were specified.
1767  *
1768  * If the <i>limit</i> parameter is omitted, trailing null fields are
1769  * suppressed. If <i>limit</i> is a positive number, at most that number of
1770  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1771  * string is returned as the only entry in an array). If negative, there is no
1772  * limit to the number of fields returned, and trailing null fields are not
1773  * suppressed.
1774  *
1775  * " now's the time".split #=> ["now's", "the", "time"]
1776  * " now's the time".split(' ') #=> ["now's", "the", "time"]
1777  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1778  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
1779  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1780  * "hello".split(//, 3) #=> ["h", "e", "llo"]
1781  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
1782  *
1783  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1784  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1785  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1786  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1787  */
1788 
1789 static mrb_value
1790 mrb_str_split_m(mrb_state *mrb, mrb_value str)
1791 {
1792  int argc;
1793  mrb_value spat = mrb_nil_value();
1794  enum {awk, string, regexp} split_type = string;
1795  long i = 0, lim_p;
1796  mrb_int beg;
1797  mrb_int end;
1798  mrb_int lim = 0;
1799  mrb_value result, tmp;
1800 
1801  argc = mrb_get_args(mrb, "|oi", &spat, &lim);
1802  lim_p = (lim > 0 && argc == 2);
1803  if (argc == 2) {
1804  if (lim == 1) {
1805  if (RSTRING_LEN(str) == 0)
1806  return mrb_ary_new_capa(mrb, 0);
1807  return mrb_ary_new_from_values(mrb, 1, &str);
1808  }
1809  i = 1;
1810  }
1811 
1812  if (argc == 0 || mrb_nil_p(spat)) {
1813  split_type = awk;
1814  }
1815  else {
1816  if (mrb_string_p(spat)) {
1817  split_type = string;
1818  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
1819  split_type = awk;
1820  }
1821  }
1822  else {
1823  noregexp(mrb, str);
1824  }
1825  }
1826 
1827  result = mrb_ary_new(mrb);
1828  beg = 0;
1829  if (split_type == awk) {
1830  char *ptr = RSTRING_PTR(str);
1831  char *eptr = RSTRING_END(str);
1832  char *bptr = ptr;
1833  int skip = 1;
1834  unsigned int c;
1835 
1836  end = beg;
1837  while (ptr < eptr) {
1838  int ai = mrb_gc_arena_save(mrb);
1839  c = (unsigned char)*ptr++;
1840  if (skip) {
1841  if (ascii_isspace(c)) {
1842  beg = ptr - bptr;
1843  }
1844  else {
1845  end = ptr - bptr;
1846  skip = 0;
1847  if (lim_p && lim <= i) break;
1848  }
1849  }
1850  else if (ascii_isspace(c)) {
1851  mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
1852  mrb_gc_arena_restore(mrb, ai);
1853  skip = 1;
1854  beg = ptr - bptr;
1855  if (lim_p) ++i;
1856  }
1857  else {
1858  end = ptr - bptr;
1859  }
1860  }
1861  }
1862  else if (split_type == string) {
1863  char *ptr = RSTRING_PTR(str);
1864  char *temp = ptr;
1865  char *eptr = RSTRING_END(str);
1866  mrb_int slen = RSTRING_LEN(spat);
1867 
1868  if (slen == 0) {
1869  int ai = mrb_gc_arena_save(mrb);
1870  while (ptr < eptr) {
1871  mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
1872  mrb_gc_arena_restore(mrb, ai);
1873  ptr++;
1874  if (lim_p && lim <= ++i) break;
1875  }
1876  }
1877  else {
1878  char *sptr = RSTRING_PTR(spat);
1879  int ai = mrb_gc_arena_save(mrb);
1880 
1881  while (ptr < eptr &&
1882  (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
1883  mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
1884  mrb_gc_arena_restore(mrb, ai);
1885  ptr += end + slen;
1886  if (lim_p && lim <= ++i) break;
1887  }
1888  }
1889  beg = ptr - temp;
1890  }
1891  else {
1892  noregexp(mrb, str);
1893  }
1894  if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
1895  if (RSTRING_LEN(str) == beg) {
1896  tmp = mrb_str_new_empty(mrb, str);
1897  }
1898  else {
1899  tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
1900  }
1901  mrb_ary_push(mrb, result, tmp);
1902  }
1903  if (!lim_p && lim == 0) {
1904  mrb_int len;
1905  while ((len = RARRAY_LEN(result)) > 0 &&
1906  (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
1907  mrb_ary_pop(mrb, result);
1908  }
1909 
1910  return result;
1911 }
1912 
1913 mrb_value
1914 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
1915 {
1916  char *end;
1917  char sign = 1;
1918  int c;
1919  unsigned long n;
1920  mrb_int val;
1921 
1922 #undef ISDIGIT
1923 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1924 #define conv_digit(c) \
1925  (!ISASCII(c) ? -1 : \
1926  isdigit(c) ? ((c) - '0') : \
1927  islower(c) ? ((c) - 'a' + 10) : \
1928  isupper(c) ? ((c) - 'A' + 10) : \
1929  -1)
1930 
1931  if (!str) {
1932  if (badcheck) goto bad;
1933  return mrb_fixnum_value(0);
1934  }
1935  while (ISSPACE(*str)) str++;
1936 
1937  if (str[0] == '+') {
1938  str++;
1939  }
1940  else if (str[0] == '-') {
1941  str++;
1942  sign = 0;
1943  }
1944  if (str[0] == '+' || str[0] == '-') {
1945  if (badcheck) goto bad;
1946  return mrb_fixnum_value(0);
1947  }
1948  if (base <= 0) {
1949  if (str[0] == '0') {
1950  switch (str[1]) {
1951  case 'x': case 'X':
1952  base = 16;
1953  break;
1954  case 'b': case 'B':
1955  base = 2;
1956  break;
1957  case 'o': case 'O':
1958  base = 8;
1959  break;
1960  case 'd': case 'D':
1961  base = 10;
1962  break;
1963  default:
1964  base = 8;
1965  }
1966  }
1967  else if (base < -1) {
1968  base = -base;
1969  }
1970  else {
1971  base = 10;
1972  }
1973  }
1974  switch (base) {
1975  case 2:
1976  if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
1977  str += 2;
1978  }
1979  break;
1980  case 3:
1981  break;
1982  case 8:
1983  if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
1984  str += 2;
1985  }
1986  case 4: case 5: case 6: case 7:
1987  break;
1988  case 10:
1989  if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
1990  str += 2;
1991  }
1992  case 9: case 11: case 12: case 13: case 14: case 15:
1993  break;
1994  case 16:
1995  if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
1996  str += 2;
1997  }
1998  break;
1999  default:
2000  if (base < 2 || 36 < base) {
2001  mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2002  }
2003  break;
2004  } /* end of switch (base) { */
2005  if (*str == '0') { /* squeeze preceeding 0s */
2006  int us = 0;
2007  while ((c = *++str) == '0' || c == '_') {
2008  if (c == '_') {
2009  if (++us >= 2)
2010  break;
2011  }
2012  else
2013  us = 0;
2014  }
2015  if (!(c = *str) || ISSPACE(c)) --str;
2016  }
2017  c = *str;
2018  c = conv_digit(c);
2019  if (c < 0 || c >= base) {
2020  if (badcheck) goto bad;
2021  return mrb_fixnum_value(0);
2022  }
2023 
2024  n = strtoul((char*)str, &end, base);
2025  if (n > MRB_INT_MAX) {
2026  mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
2027  }
2028  val = n;
2029  if (badcheck) {
2030  if (end == str) goto bad; /* no number */
2031  while (*end && ISSPACE(*end)) end++;
2032  if (*end) goto bad; /* trailing garbage */
2033  }
2034 
2035  return mrb_fixnum_value(sign ? val : -val);
2036 bad:
2037  mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
2038  /* not reached */
2039  return mrb_fixnum_value(0);
2040 }
2041 
2042 char *
2044 {
2045  struct RString *ps = mrb_str_ptr(*ptr);
2046  char *s = ps->ptr;
2047 
2048  if (!s || ps->len != strlen(s)) {
2049  mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
2050  }
2051  return s;
2052 }
2053 
2054 mrb_value
2055 mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
2056 {
2057  char *s;
2058  int len;
2059 
2060  str = mrb_str_to_str(mrb, str);
2061  if (badcheck) {
2062  s = mrb_string_value_cstr(mrb, &str);
2063  }
2064  else {
2065  s = RSTRING_PTR(str);
2066  }
2067  if (s) {
2068  len = RSTRING_LEN(str);
2069  if (s[len]) { /* no sentinel somehow */
2070  struct RString *temp_str = str_new(mrb, s, len);
2071  s = temp_str->ptr;
2072  }
2073  }
2074  return mrb_cstr_to_inum(mrb, s, base, badcheck);
2075 }
2076 
2077 /* 15.2.10.5.38 */
2078 /*
2079  * call-seq:
2080  * str.to_i(base=10) => integer
2081  *
2082  * Returns the result of interpreting leading characters in <i>str</i> as an
2083  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2084  * end of a valid number are ignored. If there is not a valid number at the
2085  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2086  * exception.
2087  *
2088  * "12345".to_i #=> 12345
2089  * "99 red balloons".to_i #=> 99
2090  * "0a".to_i #=> 0
2091  * "0a".to_i(16) #=> 10
2092  * "hello".to_i #=> 0
2093  * "1100101".to_i(2) #=> 101
2094  * "1100101".to_i(8) #=> 294977
2095  * "1100101".to_i(10) #=> 1100101
2096  * "1100101".to_i(16) #=> 17826049
2097  */
2098 static mrb_value
2099 mrb_str_to_i(mrb_state *mrb, mrb_value self)
2100 {
2101  mrb_value *argv;
2102  int argc;
2103  int base;
2104 
2105  mrb_get_args(mrb, "*", &argv, &argc);
2106  if (argc == 0)
2107  base = 10;
2108  else
2109  base = mrb_fixnum(argv[0]);
2110 
2111  if (base < 0) {
2112  mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2113  }
2114  return mrb_str_to_inum(mrb, self, base, 0/*Qfalse*/);
2115 }
2116 
2117 double
2118 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck)
2119 {
2120  char *end;
2121  double d;
2122 #if !defined(DBL_DIG)
2123 # define DBL_DIG 16
2124 #endif
2125 
2126  enum {max_width = 20};
2127 #define OutOfRange() (((w = end - p) > max_width) ? \
2128  (w = max_width, ellipsis = "...") : \
2129  (w = (int)(end - p), ellipsis = ""))
2130 
2131  if (!p) return 0.0;
2132  while (ISSPACE(*p)) p++;
2133 
2134  if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2135  return 0.0;
2136  }
2137  d = strtod(p, &end);
2138  if (p == end) {
2139  if (badcheck) {
2140 bad:
2141  mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
2142  /* not reached */
2143  }
2144  return d;
2145  }
2146  if (*end) {
2147  char buf[DBL_DIG * 4 + 10];
2148  char *n = buf;
2149  char *e = buf + sizeof(buf) - 1;
2150  char prev = 0;
2151 
2152  while (p < end && n < e) prev = *n++ = *p++;
2153  while (*p) {
2154  if (*p == '_') {
2155  /* remove underscores between digits */
2156  if (badcheck) {
2157  if (n == buf || !ISDIGIT(prev)) goto bad;
2158  ++p;
2159  if (!ISDIGIT(*p)) goto bad;
2160  }
2161  else {
2162  while (*++p == '_');
2163  continue;
2164  }
2165  }
2166  prev = *p++;
2167  if (n < e) *n++ = prev;
2168  }
2169  *n = '\0';
2170  p = buf;
2171 
2172  if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2173  return 0.0;
2174  }
2175 
2176  d = strtod(p, &end);
2177  if (badcheck) {
2178  if (!end || p == end) goto bad;
2179  while (*end && ISSPACE(*end)) end++;
2180  if (*end) goto bad;
2181  }
2182  }
2183  return d;
2184 }
2185 
2186 double
2187 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
2188 {
2189  char *s;
2190  int len;
2191 
2192  str = mrb_str_to_str(mrb, str);
2193  s = RSTRING_PTR(str);
2194  len = RSTRING_LEN(str);
2195  if (s) {
2196  if (badcheck && memchr(s, '\0', len)) {
2197  mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
2198  }
2199  if (s[len]) { /* no sentinel somehow */
2200  struct RString *temp_str = str_new(mrb, s, len);
2201  s = temp_str->ptr;
2202  }
2203  }
2204  return mrb_cstr_to_dbl(mrb, s, badcheck);
2205 }
2206 
2207 /* 15.2.10.5.39 */
2208 /*
2209  * call-seq:
2210  * str.to_f => float
2211  *
2212  * Returns the result of interpreting leading characters in <i>str</i> as a
2213  * floating point number. Extraneous characters past the end of a valid number
2214  * are ignored. If there is not a valid number at the start of <i>str</i>,
2215  * <code>0.0</code> is returned. This method never raises an exception.
2216  *
2217  * "123.45e1".to_f #=> 1234.5
2218  * "45.67 degrees".to_f #=> 45.67
2219  * "thx1138".to_f #=> 0.0
2220  */
2221 static mrb_value
2222 mrb_str_to_f(mrb_state *mrb, mrb_value self)
2223 {
2224  return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, 0/*Qfalse*/));
2225 }
2226 
2227 /* 15.2.10.5.40 */
2228 /*
2229  * call-seq:
2230  * str.to_s => str
2231  * str.to_str => str
2232  *
2233  * Returns the receiver.
2234  */
2235 static mrb_value
2236 mrb_str_to_s(mrb_state *mrb, mrb_value self)
2237 {
2238  if (mrb_obj_class(mrb, self) != mrb->string_class) {
2239  return mrb_str_dup(mrb, self);
2240  }
2241  return self;
2242 }
2243 
2244 /* 15.2.10.5.43 */
2245 /*
2246  * call-seq:
2247  * str.upcase! => str or nil
2248  *
2249  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2250  * were made.
2251  */
2252 static mrb_value
2253 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
2254 {
2255  struct RString *s = mrb_str_ptr(str);
2256  char *p, *pend;
2257  int modify = 0;
2258 
2259  mrb_str_modify(mrb, s);
2260  p = RSTRING_PTR(str);
2261  pend = RSTRING_END(str);
2262  while (p < pend) {
2263  if (ISLOWER(*p)) {
2264  *p = TOUPPER(*p);
2265  modify = 1;
2266  }
2267  p++;
2268  }
2269 
2270  if (modify) return str;
2271  return mrb_nil_value();
2272 }
2273 
2274 /* 15.2.10.5.42 */
2275 /*
2276  * call-seq:
2277  * str.upcase => new_str
2278  *
2279  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2280  * uppercase counterparts. The operation is locale insensitive---only
2281  * characters ``a'' to ``z'' are affected.
2282  *
2283  * "hEllO".upcase #=> "HELLO"
2284  */
2285 static mrb_value
2286 mrb_str_upcase(mrb_state *mrb, mrb_value self)
2287 {
2288  mrb_value str;
2289 
2290  str = mrb_str_dup(mrb, self);
2291  mrb_str_upcase_bang(mrb, str);
2292  return str;
2293 }
2294 
2295 /*
2296  * call-seq:
2297  * str.dump -> new_str
2298  *
2299  * Produces a version of <i>str</i> with all nonprinting characters replaced by
2300  * <code>\nnn</code> notation and all special characters escaped.
2301  */
2302 mrb_value
2304 {
2305  mrb_int len;
2306  const char *p, *pend;
2307  char *q;
2308  struct RString *result;
2309 
2310  len = 2; /* "" */
2311  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2312  while (p < pend) {
2313  unsigned char c = *p++;
2314  switch (c) {
2315  case '"': case '\\':
2316  case '\n': case '\r':
2317  case '\t': case '\f':
2318  case '\013': case '\010': case '\007': case '\033':
2319  len += 2;
2320  break;
2321 
2322  case '#':
2323  len += IS_EVSTR(p, pend) ? 2 : 1;
2324  break;
2325 
2326  default:
2327  if (ISPRINT(c)) {
2328  len++;
2329  }
2330  else {
2331  len += 4; /* \NNN */
2332  }
2333  break;
2334  }
2335  }
2336 
2337  result = str_new(mrb, 0, len);
2338  str_with_class(mrb, result, str);
2339  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2340  q = result->ptr;
2341 
2342  *q++ = '"';
2343  while (p < pend) {
2344  unsigned char c = *p++;
2345 
2346  switch (c) {
2347  case '"':
2348  case '\\':
2349  *q++ = '\\';
2350  *q++ = c;
2351  break;
2352 
2353  case '\n':
2354  *q++ = '\\';
2355  *q++ = 'n';
2356  break;
2357 
2358  case '\r':
2359  *q++ = '\\';
2360  *q++ = 'r';
2361  break;
2362 
2363  case '\t':
2364  *q++ = '\\';
2365  *q++ = 't';
2366  break;
2367 
2368  case '\f':
2369  *q++ = '\\';
2370  *q++ = 'f';
2371  break;
2372 
2373  case '\013':
2374  *q++ = '\\';
2375  *q++ = 'v';
2376  break;
2377 
2378  case '\010':
2379  *q++ = '\\';
2380  *q++ = 'b';
2381  break;
2382 
2383  case '\007':
2384  *q++ = '\\';
2385  *q++ = 'a';
2386  break;
2387 
2388  case '\033':
2389  *q++ = '\\';
2390  *q++ = 'e';
2391  break;
2392 
2393  case '#':
2394  if (IS_EVSTR(p, pend)) *q++ = '\\';
2395  *q++ = '#';
2396  break;
2397 
2398  default:
2399  if (ISPRINT(c)) {
2400  *q++ = c;
2401  }
2402  else {
2403  *q++ = '\\';
2404  q[2] = '0' + c % 8; c /= 8;
2405  q[1] = '0' + c % 8; c /= 8;
2406  q[0] = '0' + c % 8;
2407  q += 3;
2408  }
2409  }
2410  }
2411  *q++ = '"';
2412  return mrb_obj_value(result);
2413 }
2414 
2415 mrb_value
2416 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
2417 {
2418  if ((mrb_int)len < 0) {
2419  mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
2420  }
2421  str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
2422  return str;
2423 }
2424 
2425 mrb_value
2426 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
2427 {
2428  return mrb_str_cat(mrb, str, ptr, strlen(ptr));
2429 }
2430 
2431 mrb_value
2433 {
2434  str2 = mrb_str_to_str(mrb, str2);
2435  return mrb_str_buf_append(mrb, str, str2);
2436 }
2437 
2438 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2439 
2440 /*
2441  * call-seq:
2442  * str.inspect -> string
2443  *
2444  * Returns a printable version of _str_, surrounded by quote marks,
2445  * with special characters escaped.
2446  *
2447  * str = "hello"
2448  * str[3] = "\b"
2449  * str.inspect #=> "\"hel\\bo\""
2450  */
2451 mrb_value
2453 {
2454  const char *p, *pend;
2455  char buf[CHAR_ESC_LEN + 1];
2456  mrb_value result = mrb_str_new(mrb, "\"", 1);
2457 
2458  p = RSTRING_PTR(str); pend = RSTRING_END(str);
2459  for (;p < pend; p++) {
2460  unsigned int c, cc;
2461 
2462  c = *p;
2463  if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
2464  buf[0] = '\\'; buf[1] = c;
2465  mrb_str_buf_cat(mrb, result, buf, 2);
2466  continue;
2467  }
2468  if (ISPRINT(c)) {
2469  buf[0] = c;
2470  mrb_str_buf_cat(mrb, result, buf, 1);
2471  continue;
2472  }
2473  switch (c) {
2474  case '\n': cc = 'n'; break;
2475  case '\r': cc = 'r'; break;
2476  case '\t': cc = 't'; break;
2477  case '\f': cc = 'f'; break;
2478  case '\013': cc = 'v'; break;
2479  case '\010': cc = 'b'; break;
2480  case '\007': cc = 'a'; break;
2481  case 033: cc = 'e'; break;
2482  default: cc = 0; break;
2483  }
2484  if (cc) {
2485  buf[0] = '\\';
2486  buf[1] = (char)cc;
2487  mrb_str_buf_cat(mrb, result, buf, 2);
2488  continue;
2489  }
2490  else {
2491  buf[0] = '\\';
2492  buf[3] = '0' + c % 8; c /= 8;
2493  buf[2] = '0' + c % 8; c /= 8;
2494  buf[1] = '0' + c % 8;
2495  mrb_str_buf_cat(mrb, result, buf, 4);
2496  continue;
2497  }
2498  }
2499  mrb_str_buf_cat(mrb, result, "\"", 1);
2500 
2501  return result;
2502 }
2503 
2504 /*
2505  * call-seq:
2506  * str.bytes -> array of fixnums
2507  *
2508  * Returns an array of bytes in _str_.
2509  *
2510  * str = "hello"
2511  * str.bytes #=> [104, 101, 108, 108, 111]
2512  */
2513 static mrb_value
2514 mrb_str_bytes(mrb_state *mrb, mrb_value str)
2515 {
2516  struct RString *s = mrb_str_ptr(str);
2517  mrb_value a = mrb_ary_new_capa(mrb, s->len);
2518  unsigned char *p = (unsigned char *)(s->ptr), *pend = p + s->len;
2519 
2520  while (p < pend) {
2521  mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
2522  p++;
2523  }
2524  return a;
2525 }
2526 
2527 /* ---------------------------*/
2528 void
2530 {
2531  struct RClass *s;
2532 
2533  s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class);
2535  mrb_include_module(mrb, s, mrb_class_get(mrb, "Comparable"));
2536 
2537 
2538  mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
2539 
2540  mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2541  mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2542  mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2543  mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2544  mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2545  mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2546  mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
2547  mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2548  mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2549  mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
2550  mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
2551  mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2552  mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2553  mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2554  mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2555 
2556  mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
2557  mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2558  mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2559  mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2560  mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2561  mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2562  mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2563  mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2564  mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2565  mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2566  mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2567  mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2568  mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2569  mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2570 
2571  mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2572  mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2573  mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2574  mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
2575  mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2576  mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
2577  mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
2578  mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2579  mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
2580 }