24 const char mrb_digitmap[] =
"0123456789abcdefghijklmnopqrstuvwxyz";
33 #define MRB_STR_SHARED 1
34 #define MRB_STR_NOFREE 2
39 #define RESIZE_CAPA(s,capacity) do {\
40 s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
41 s->aux.capa = capacity;\
74 ptr = (
char *)
mrb_malloc(mrb, (
size_t)len + 1);
81 str_decref(mrb, shared);
108 if (slen < len || slen - len > 256) {
122 if (s->
ptr != p || s->
len != len) {
127 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
146 memcpy(s->
ptr, p, len);
161 struct RString *s = str_new(mrb, 0, 0);
164 return mrb_obj_value(s);
167 #ifndef MRB_STR_BUF_MIN_SIZE
168 # define MRB_STR_BUF_MIN_SIZE 128
186 return mrb_obj_value(s);
197 if (ptr >= s->
ptr && ptr <= s->ptr + s->
len) {
200 if (len == 0)
return;
207 while (total > capa) {
209 capa = (total + 4095) / 4096;
212 capa = (capa + 1) * 2;
219 memcpy(s->
ptr + s->
len, ptr, len);
221 s->
ptr[total] =
'\0';
227 if (len == 0)
return str;
237 s = str_new(mrb, p, len);
238 return mrb_obj_value(s);
264 s = str_new(mrb, p, len);
266 return mrb_obj_value(s);
279 return mrb_obj_value(s);
301 if ((strlen(s->
ptr) ^ s->
len) != 0) {
350 str_make_shared(mrb, orig);
359 return mrb_obj_value(s);
394 len = s1->
len + s2->len;
400 memcpy(s1->
ptr+s1->
len, s2->ptr, s2->len);
418 t = str_new(mrb, 0, s->
len + s2->
len);
422 return mrb_obj_value(t);
452 return mrb_fixnum_value(s->
len);
467 return mrb_fixnum_value(s->
len);
496 str2 = str_new(mrb, 0, len);
506 memcpy(p + n, p, len-n);
510 return mrb_obj_value(str2);
514 #define lesser(a,b) (((a)>(b))?(b):(a))
533 retval = memcmp(s1->
ptr, s2->
ptr, len);
535 if (s1->
len == s2->
len)
return 0;
536 if (s1->
len > s2->
len)
return 1;
539 if (retval > 0)
return 1;
578 return mrb_nil_value();
581 return mrb_nil_value();
586 if (
mrb_nil_p(tmp))
return mrb_nil_value();
588 return mrb_funcall(mrb, mrb_fixnum_value(0),
"-", 1, tmp);
596 return mrb_fixnum_value(result);
622 return str_eql(mrb, str1, str2);
645 return mrb_bool_value(equal_p);
674 return mrb_nil_value();
686 mrb_memsearch_qs(
const unsigned char *xs,
mrb_int m,
const unsigned char *ys,
mrb_int n)
688 const unsigned char *x = xs, *xe = xs + m;
689 const unsigned char *y = ys;
693 for (i = 0; i < 256; ++
i)
696 qstable[*x] = xe - x;
698 for (; y + m <= ys + n; y += *(qstable + y[m])) {
699 if (*xs == *y && memcmp(xs, y, m) == 0)
706 mrb_memsearch(
const void *x0,
mrb_int m,
const void *y0,
mrb_int n)
708 const unsigned char *x = (
const unsigned char *)x0, *y = (
const unsigned char *)y0;
710 if (m > n)
return -1;
712 return memcmp(x0, y0, m) == 0 ? 0 : -1;
718 const unsigned char *ys = y, *ye = ys + n;
719 for (; y < ye; ++y) {
725 return mrb_memsearch_qs((
const unsigned char *)x0, m, (
const unsigned char *)y0, n);
739 if (offset < 0)
return -1;
741 if (len - offset < slen)
return -1;
746 if (slen == 0)
return offset;
751 pos = mrb_memsearch(sptr, slen, s, len);
752 if (pos < 0)
return pos;
770 regexp_check(mrb, indx);
781 if (mrb_str_index(mrb, str, indx, 0) != -1)
783 return mrb_nil_value();
793 tmp = mrb_str_subseq(mrb, str, beg, len);
797 return mrb_nil_value();
804 return mrb_nil_value();
863 regexp_check(mrb, a1);
869 return mrb_str_aref(mrb, str, a1);
893 if (s->
len == 0 || !s->
ptr)
return mrb_nil_value();
905 if (modify)
return str;
906 return mrb_nil_value();
927 mrb_str_capitalize_bang(mrb, str);
952 if (len == 0)
return mrb_nil_value();
954 if (s->
ptr[len-1] ==
'\n') {
957 s->
ptr[s->
len-1] ==
'\r') {
961 else if (s->
ptr[len-1] ==
'\r') {
965 return mrb_nil_value();
971 if (len == 0 ||
mrb_nil_p(rs))
return mrb_nil_value();
975 while (len>0 && p[len-1] ==
'\n') {
977 if (len>0 && p[len-1] ==
'\r')
985 return mrb_nil_value();
987 if (rslen > len)
return mrb_nil_value();
989 if (rslen == 1 && newline ==
'\n')
991 if (rslen == 1 && newline ==
'\n')
994 pp = p + len - rslen;
995 if (p[len-1] == newline &&
998 s->
len = len - rslen;
1002 return mrb_nil_value();
1030 mrb_str_chomp_bang(mrb, str);
1052 if (s->
ptr[len] ==
'\n') {
1054 s->
ptr[len-1] ==
'\r') {
1062 return mrb_nil_value();
1087 mrb_str_chop_bang(mrb, str);
1117 if (modify)
return str;
1118 return mrb_nil_value();
1138 mrb_str_downcase_bang(mrb, str);
1157 return mrb_bool_value(s->
len == 0);
1176 return mrb_bool_value(eql_p);
1186 str_make_shared(mrb, orig);
1189 s->
ptr = orig->
ptr + beg;
1195 return mrb_obj_value(s);
1203 if (len < 0)
return mrb_nil_value();
1207 if (beg >
RSTRING_LEN(str))
return mrb_nil_value();
1210 if (beg < 0)
return mrb_nil_value();
1217 str2 = mrb_str_subseq(mrb, str, beg, len);
1239 key = key*65599 + *p;
1242 key = key + (key>>5);
1257 return mrb_fixnum_value(key);
1286 i = mrb_str_index(mrb,
self, str2, 0);
1288 include_p = (i != -1);
1291 return mrb_bool_value(include_p);
1334 sub = mrb_nil_value();
1337 regexp_check(mrb, sub);
1341 return mrb_nil_value();
1349 unsigned char *p = (
unsigned char*)
RSTRING_PTR(str);
1351 for (;pos<
len;pos++) {
1352 if (p[pos] == c)
return mrb_fixnum_value(pos);
1354 return mrb_nil_value();
1368 pos = mrb_str_index(mrb, str, sub, pos);
1372 if (pos == -1)
return mrb_nil_value();
1373 return mrb_fixnum_value(pos);
1376 #define STR_REPLACE_SHARED_MIN 10
1396 str_make_shared(mrb, s2);
1413 return mrb_obj_value(s1);
1479 return mrb_symbol_value(
id);
1503 uintptr_t n = (uintptr_t)p;
1505 p_str = str_new(mrb, NULL, 2 +
sizeof(uintptr_t) * CHAR_BIT / 4);
1524 return mrb_obj_value(p_str);
1557 s2 = str_new(mrb, 0,
RSTRING(str)->len);
1565 return mrb_obj_value(s2);
1622 if (ps->
len < len)
return -1;
1623 if (ps->
len - pos < len) {
1631 if (memcmp(s, t, len) == 0) {
1679 regexp_check(mrb, sub);
1680 return mrb_nil_value();
1683 if (pos > len) pos =
len;
1690 sub = mrb_nil_value();
1692 regexp_check(mrb, sub);
1698 unsigned char *p = (
unsigned char*)
RSTRING_PTR(str);
1700 for (pos=len;pos>=0;pos--) {
1701 if (p[pos] == c)
return mrb_fixnum_value(pos);
1703 return mrb_nil_value();
1717 pos = mrb_str_rindex(mrb, str, sub, pos);
1718 if (pos >= 0)
return mrb_fixnum_value(pos);
1722 return mrb_nil_value();
1725 static const char isspacetable[256] = {
1726 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1728 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1729 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1730 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1731 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1732 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1733 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1734 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1735 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1736 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1737 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1744 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
1794 enum {awk, string, regexp} split_type = string;
1802 lim_p = (lim > 0 && argc == 2);
1817 split_type = string;
1829 if (split_type == awk) {
1837 while (ptr < eptr) {
1839 c = (
unsigned char)*ptr++;
1847 if (lim_p && lim <= i)
break;
1851 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
1862 else if (split_type ==
string) {
1870 while (ptr < eptr) {
1871 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
1874 if (lim_p && lim <= ++i)
break;
1881 while (ptr < eptr &&
1882 (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
1883 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
1886 if (lim_p && lim <= ++i)
break;
1896 tmp = mrb_str_new_empty(mrb, str);
1899 tmp = mrb_str_subseq(mrb, str, beg,
RSTRING_LEN(str)-beg);
1903 if (!lim_p && lim == 0) {
1923 #define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
1924 #define conv_digit(c) \
1925 (!ISASCII(c) ? -1 : \
1926 isdigit(c) ? ((c) - '0') : \
1927 islower(c) ? ((c) - 'a' + 10) : \
1928 isupper(c) ? ((c) - 'A' + 10) : \
1932 if (badcheck)
goto bad;
1933 return mrb_fixnum_value(0);
1937 if (str[0] ==
'+') {
1940 else if (str[0] ==
'-') {
1944 if (str[0] ==
'+' || str[0] ==
'-') {
1945 if (badcheck)
goto bad;
1946 return mrb_fixnum_value(0);
1949 if (str[0] ==
'0') {
1967 else if (base < -1) {
1976 if (str[0] ==
'0' && (str[1] ==
'b'||str[1] ==
'B')) {
1983 if (str[0] ==
'0' && (str[1] ==
'o'||str[1] ==
'O')) {
1986 case 4:
case 5:
case 6:
case 7:
1989 if (str[0] ==
'0' && (str[1] ==
'd'||str[1] ==
'D')) {
1992 case 9:
case 11:
case 12:
case 13:
case 14:
case 15:
1995 if (str[0] ==
'0' && (str[1] ==
'x'||str[1] ==
'X')) {
2000 if (base < 2 || 36 < base) {
2007 while ((c = *++str) ==
'0' || c ==
'_') {
2015 if (!(c = *str) ||
ISSPACE(c)) --str;
2019 if (c < 0 || c >= base) {
2020 if (badcheck)
goto bad;
2021 return mrb_fixnum_value(0);
2024 n = strtoul((
char*)str, &end, base);
2030 if (end == str)
goto bad;
2031 while (*end &&
ISSPACE(*end)) end++;
2035 return mrb_fixnum_value(sign ? val : -val);
2039 return mrb_fixnum_value(0);
2048 if (!s || ps->
len != strlen(s)) {
2070 struct RString *temp_str = str_new(mrb, s, len);
2122 #if !defined(DBL_DIG)
2126 enum {max_width = 20};
2127 #define OutOfRange() (((w = end - p) > max_width) ? \
2128 (w = max_width, ellipsis = "...") : \
2129 (w = (int)(end - p), ellipsis = ""))
2134 if (!badcheck && p[0] ==
'0' && (p[1] ==
'x' || p[1] ==
'X')) {
2137 d = strtod(p, &end);
2149 char *e = buf +
sizeof(buf) - 1;
2152 while (p < end && n < e) prev = *n++ = *p++;
2157 if (n == buf || !
ISDIGIT(prev))
goto bad;
2162 while (*++p ==
'_');
2167 if (n < e) *n++ = prev;
2172 if (!badcheck && p[0] ==
'0' && (p[1] ==
'x' || p[1] ==
'X')) {
2176 d = strtod(p, &end);
2178 if (!end || p == end)
goto bad;
2179 while (*end &&
ISSPACE(*end)) end++;
2196 if (badcheck && memchr(s,
'\0', len)) {
2200 struct RString *temp_str = str_new(mrb, s, len);
2270 if (modify)
return str;
2271 return mrb_nil_value();
2291 mrb_str_upcase_bang(mrb, str);
2306 const char *p, *pend;
2313 unsigned char c = *p++;
2315 case '"':
case '\\':
2316 case '\n':
case '\r':
2317 case '\t':
case '\f':
2318 case '\013':
case '\010':
case '\007':
case '\033':
2337 result = str_new(mrb, 0, len);
2344 unsigned char c = *p++;
2394 if (
IS_EVSTR(p, pend)) *q++ =
'\\';
2404 q[2] =
'0' + c % 8; c /= 8;
2405 q[1] =
'0' + c % 8; c /= 8;
2412 return mrb_obj_value(result);
2438 #define CHAR_ESC_LEN 13
2454 const char *p, *pend;
2459 for (;p < pend; p++) {
2463 if (c ==
'"'|| c ==
'\\' || (c ==
'#' &&
IS_EVSTR(p, pend))) {
2464 buf[0] =
'\\'; buf[1] = c;
2474 case '\n': cc =
'n';
break;
2475 case '\r': cc =
'r';
break;
2476 case '\t': cc =
't';
break;
2477 case '\f': cc =
'f';
break;
2478 case '\013': cc =
'v';
break;
2479 case '\010': cc =
'b';
break;
2480 case '\007': cc =
'a';
break;
2481 case 033: cc =
'e';
break;
2482 default: cc = 0;
break;
2492 buf[3] =
'0' + c % 8; c /= 8;
2493 buf[2] =
'0' + c % 8; c /= 8;
2494 buf[1] =
'0' + c % 8;
2518 unsigned char *p = (
unsigned char *)(s->
ptr), *pend = p + s->
len;