Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
sprintf.c
Go to the documentation of this file.
1 /*
2 ** sprintf.c - Kernel.#sprintf
3 **
4 ** See Copyright Notice in mruby.h
5 */
6 
7 #include "mruby.h"
8 
9 #include <limits.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include "mruby/string.h"
13 #include "mruby/hash.h"
14 #include "mruby/numeric.h"
15 #include <math.h>
16 #include <ctype.h>
17 
18 #ifdef HAVE_IEEEFP_H
19 #include <ieeefp.h>
20 #endif
21 
22 #define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */
23 #define BITSPERDIG (sizeof(mrb_int)*CHAR_BIT)
24 #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
25 
26 static void fmt_setup(char*,size_t,int,int,mrb_int,mrb_int);
27 
28 static char*
29 remove_sign_bits(char *str, int base)
30 {
31  char *t;
32 
33  t = str;
34  if (base == 16) {
35  while (*t == 'f') {
36  t++;
37  }
38  }
39  else if (base == 8) {
40  *t |= EXTENDSIGN(3, strlen(t));
41  while (*t == '7') {
42  t++;
43  }
44  }
45  else if (base == 2) {
46  while (*t == '1') {
47  t++;
48  }
49  }
50 
51  return t;
52 }
53 
54 static char
55 sign_bits(int base, const char *p)
56 {
57  char c;
58 
59  switch (base) {
60  case 16:
61  if (*p == 'X') c = 'F';
62  else c = 'f';
63  break;
64  case 8:
65  c = '7'; break;
66  case 2:
67  c = '1'; break;
68  default:
69  c = '.'; break;
70  }
71  return c;
72 }
73 
74 static mrb_value
75 mrb_fix2binstr(mrb_state *mrb, mrb_value x, int base)
76 {
77  char buf[64], *b = buf + sizeof buf;
78  mrb_int num = mrb_fixnum(x);
79  unsigned long val = (unsigned long)num;
80  char d;
81 
82  if (base != 2) {
83  mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid radix %S", mrb_fixnum_value(base));
84  }
85 
86  if (val >= (1 << 10))
87  val &= 0x3ff;
88 
89  if (val == 0) {
90  return mrb_str_new(mrb, "0", 1);
91  }
92  *--b = '\0';
93  do {
94  *--b = mrb_digitmap[(int)(val % base)];
95  } while (val /= base);
96 
97  if (num < 0) {
98  b = remove_sign_bits(b, base);
99  switch (base) {
100  case 16: d = 'f'; break;
101  case 8: d = '7'; break;
102  case 2: d = '1'; break;
103  default: d = 0; break;
104  }
105 
106  if (d && *b != d) {
107  *--b = d;
108  }
109  }
110 
111  return mrb_str_new_cstr(mrb, b);
112 }
113 
114 #define FNONE 0
115 #define FSHARP 1
116 #define FMINUS 2
117 #define FPLUS 4
118 #define FZERO 8
119 #define FSPACE 16
120 #define FWIDTH 32
121 #define FPREC 64
122 #define FPREC0 128
123 
124 #define CHECK(l) do {\
125 /* int cr = ENC_CODERANGE(result);*/\
126  while (blen + (l) >= bsiz) {\
127  bsiz*=2;\
128  }\
129  mrb_str_resize(mrb, result, bsiz);\
130 /* ENC_CODERANGE_SET(result, cr);*/\
131  buf = RSTRING_PTR(result);\
132 } while (0)
133 
134 #define PUSH(s, l) do { \
135  CHECK(l);\
136  memcpy(&buf[blen], s, l);\
137  blen += (l);\
138 } while (0)
139 
140 #define FILL(c, l) do { \
141  CHECK(l);\
142  memset(&buf[blen], c, l);\
143  blen += (l);\
144 } while (0)
145 
146 #define GETARG() (!mrb_undef_p(nextvalue) ? nextvalue : \
147  posarg == -1 ? \
148  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "unnumbered(%S) mixed with numbered", mrb_fixnum_value(nextarg)), mrb_undef_value()) : \
149  posarg == -2 ? \
150  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "unnumbered(%S) mixed with named", mrb_fixnum_value(nextarg)), mrb_undef_value()) : \
151  (posarg = nextarg++, GETNTHARG(posarg)))
152 
153 #define GETPOSARG(n) (posarg > 0 ? \
154  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "numbered(%S) after unnumbered(%S)", mrb_fixnum_value(n), mrb_fixnum_value(posarg)), mrb_undef_value()) : \
155  posarg == -2 ? \
156  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "numbered(%S) after named", mrb_fixnum_value(n)), mrb_undef_value()) : \
157  ((n < 1) ? \
158  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid index - %S$", mrb_fixnum_value(n)), mrb_undef_value()) : \
159  (posarg = -1, GETNTHARG(n))))
160 
161 #define GETNTHARG(nth) \
162  ((nth >= argc) ? (mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"), mrb_undef_value()) : argv[nth])
163 
164 #define GETNAMEARG(id, name, len) ( \
165  posarg > 0 ? \
166  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "named%S after unnumbered(%S)", mrb_str_new(mrb, (name), (len)), mrb_fixnum_value(posarg)), mrb_undef_value()) : \
167  posarg == -1 ? \
168  (mrb_raisef(mrb, E_ARGUMENT_ERROR, "named%S after numbered", mrb_str_new(mrb, (name), (len))), mrb_undef_value()) : \
169  (posarg = -2, mrb_hash_fetch(mrb, get_hash(mrb, &hash, argc, argv), id, mrb_undef_value())))
170 
171 #define GETNUM(n, val) \
172  for (; p < end && ISDIGIT(*p); p++) {\
173  int next_n = 10 * n + (*p - '0'); \
174  if (next_n / 10 != n) {\
175  mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big"); \
176  } \
177  n = next_n; \
178  } \
179  if (p >= end) { \
180  mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %*[0-9]"); \
181  }
182 
183 #define GETASTER(num) do { \
184  t = p++; \
185  n = 0; \
186  GETNUM(n, val); \
187  if (*p == '$') { \
188  tmp = GETPOSARG(n); \
189  } \
190  else { \
191  tmp = GETARG(); \
192  p = t; \
193  } \
194  num = mrb_fixnum(tmp); \
195 } while (0)
196 
197 static mrb_value
198 get_hash(mrb_state *mrb, mrb_value *hash, int argc, const mrb_value *argv)
199 {
200  mrb_value tmp;
201 
202  if (!mrb_undef_p(*hash)) return *hash;
203  if (argc != 2) {
204  mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
205  }
206  tmp = mrb_check_convert_type(mrb, argv[1], MRB_TT_HASH, "Hash", "to_hash");
207  if (mrb_nil_p(tmp)) {
208  mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
209  }
210  return (*hash = tmp);
211 }
212 
213 /*
214  * call-seq:
215  * format(format_string [, arguments...] ) -> string
216  * sprintf(format_string [, arguments...] ) -> string
217  *
218  * Returns the string resulting from applying <i>format_string</i> to
219  * any additional arguments. Within the format string, any characters
220  * other than format sequences are copied to the result.
221  *
222  * The syntax of a format sequence is follows.
223  *
224  * %[flags][width][.precision]type
225  *
226  * A format
227  * sequence consists of a percent sign, followed by optional flags,
228  * width, and precision indicators, then terminated with a field type
229  * character. The field type controls how the corresponding
230  * <code>sprintf</code> argument is to be interpreted, while the flags
231  * modify that interpretation.
232  *
233  * The field type characters are:
234  *
235  * Field | Integer Format
236  * ------+--------------------------------------------------------------
237  * b | Convert argument as a binary number.
238  * | Negative numbers will be displayed as a two's complement
239  * | prefixed with `..1'.
240  * B | Equivalent to `b', but uses an uppercase 0B for prefix
241  * | in the alternative format by #.
242  * d | Convert argument as a decimal number.
243  * i | Identical to `d'.
244  * o | Convert argument as an octal number.
245  * | Negative numbers will be displayed as a two's complement
246  * | prefixed with `..7'.
247  * u | Identical to `d'.
248  * x | Convert argument as a hexadecimal number.
249  * | Negative numbers will be displayed as a two's complement
250  * | prefixed with `..f' (representing an infinite string of
251  * | leading 'ff's).
252  * X | Equivalent to `x', but uses uppercase letters.
253  *
254  * Field | Float Format
255  * ------+--------------------------------------------------------------
256  * e | Convert floating point argument into exponential notation
257  * | with one digit before the decimal point as [-]d.dddddde[+-]dd.
258  * | The precision specifies the number of digits after the decimal
259  * | point (defaulting to six).
260  * E | Equivalent to `e', but uses an uppercase E to indicate
261  * | the exponent.
262  * f | Convert floating point argument as [-]ddd.dddddd,
263  * | where the precision specifies the number of digits after
264  * | the decimal point.
265  * g | Convert a floating point number using exponential form
266  * | if the exponent is less than -4 or greater than or
267  * | equal to the precision, or in dd.dddd form otherwise.
268  * | The precision specifies the number of significant digits.
269  * G | Equivalent to `g', but use an uppercase `E' in exponent form.
270  * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
271  * | which is consisted from optional sign, "0x", fraction part
272  * | as hexadecimal, "p", and exponential part as decimal.
273  * A | Equivalent to `a', but use uppercase `X' and `P'.
274  *
275  * Field | Other Format
276  * ------+--------------------------------------------------------------
277  * c | Argument is the numeric code for a single character or
278  * | a single character string itself.
279  * p | The valuing of argument.inspect.
280  * s | Argument is a string to be substituted. If the format
281  * | sequence contains a precision, at most that many characters
282  * | will be copied.
283  * % | A percent sign itself will be displayed. No argument taken.
284  *
285  * The flags modifies the behavior of the formats.
286  * The flag characters are:
287  *
288  * Flag | Applies to | Meaning
289  * ---------+---------------+-----------------------------------------
290  * space | bBdiouxX | Leave a space at the start of
291  * | aAeEfgG | non-negative numbers.
292  * | (numeric fmt) | For `o', `x', `X', `b' and `B', use
293  * | | a minus sign with absolute value for
294  * | | negative values.
295  * ---------+---------------+-----------------------------------------
296  * (digit)$ | all | Specifies the absolute argument number
297  * | | for this field. Absolute and relative
298  * | | argument numbers cannot be mixed in a
299  * | | sprintf string.
300  * ---------+---------------+-----------------------------------------
301  * # | bBoxX | Use an alternative format.
302  * | aAeEfgG | For the conversions `o', increase the precision
303  * | | until the first digit will be `0' if
304  * | | it is not formatted as complements.
305  * | | For the conversions `x', `X', `b' and `B'
306  * | | on non-zero, prefix the result with ``0x'',
307  * | | ``0X'', ``0b'' and ``0B'', respectively.
308  * | | For `a', `A', `e', `E', `f', `g', and 'G',
309  * | | force a decimal point to be added,
310  * | | even if no digits follow.
311  * | | For `g' and 'G', do not remove trailing zeros.
312  * ---------+---------------+-----------------------------------------
313  * + | bBdiouxX | Add a leading plus sign to non-negative
314  * | aAeEfgG | numbers.
315  * | (numeric fmt) | For `o', `x', `X', `b' and `B', use
316  * | | a minus sign with absolute value for
317  * | | negative values.
318  * ---------+---------------+-----------------------------------------
319  * - | all | Left-justify the result of this conversion.
320  * ---------+---------------+-----------------------------------------
321  * 0 (zero) | bBdiouxX | Pad with zeros, not spaces.
322  * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1
323  * | (numeric fmt) | is used for negative numbers formatted as
324  * | | complements.
325  * ---------+---------------+-----------------------------------------
326  * * | all | Use the next argument as the field width.
327  * | | If negative, left-justify the result. If the
328  * | | asterisk is followed by a number and a dollar
329  * | | sign, use the indicated argument as the width.
330  *
331  * Examples of flags:
332  *
333  * # `+' and space flag specifies the sign of non-negative numbers.
334  * sprintf("%d", 123) #=> "123"
335  * sprintf("%+d", 123) #=> "+123"
336  * sprintf("% d", 123) #=> " 123"
337  *
338  * # `#' flag for `o' increases number of digits to show `0'.
339  * # `+' and space flag changes format of negative numbers.
340  * sprintf("%o", 123) #=> "173"
341  * sprintf("%#o", 123) #=> "0173"
342  * sprintf("%+o", -123) #=> "-173"
343  * sprintf("%o", -123) #=> "..7605"
344  * sprintf("%#o", -123) #=> "..7605"
345  *
346  * # `#' flag for `x' add a prefix `0x' for non-zero numbers.
347  * # `+' and space flag disables complements for negative numbers.
348  * sprintf("%x", 123) #=> "7b"
349  * sprintf("%#x", 123) #=> "0x7b"
350  * sprintf("%+x", -123) #=> "-7b"
351  * sprintf("%x", -123) #=> "..f85"
352  * sprintf("%#x", -123) #=> "0x..f85"
353  * sprintf("%#x", 0) #=> "0"
354  *
355  * # `#' for `X' uses the prefix `0X'.
356  * sprintf("%X", 123) #=> "7B"
357  * sprintf("%#X", 123) #=> "0X7B"
358  *
359  * # `#' flag for `b' add a prefix `0b' for non-zero numbers.
360  * # `+' and space flag disables complements for negative numbers.
361  * sprintf("%b", 123) #=> "1111011"
362  * sprintf("%#b", 123) #=> "0b1111011"
363  * sprintf("%+b", -123) #=> "-1111011"
364  * sprintf("%b", -123) #=> "..10000101"
365  * sprintf("%#b", -123) #=> "0b..10000101"
366  * sprintf("%#b", 0) #=> "0"
367  *
368  * # `#' for `B' uses the prefix `0B'.
369  * sprintf("%B", 123) #=> "1111011"
370  * sprintf("%#B", 123) #=> "0B1111011"
371  *
372  * # `#' for `e' forces to show the decimal point.
373  * sprintf("%.0e", 1) #=> "1e+00"
374  * sprintf("%#.0e", 1) #=> "1.e+00"
375  *
376  * # `#' for `f' forces to show the decimal point.
377  * sprintf("%.0f", 1234) #=> "1234"
378  * sprintf("%#.0f", 1234) #=> "1234."
379  *
380  * # `#' for `g' forces to show the decimal point.
381  * # It also disables stripping lowest zeros.
382  * sprintf("%g", 123.4) #=> "123.4"
383  * sprintf("%#g", 123.4) #=> "123.400"
384  * sprintf("%g", 123456) #=> "123456"
385  * sprintf("%#g", 123456) #=> "123456."
386  *
387  * The field width is an optional integer, followed optionally by a
388  * period and a precision. The width specifies the minimum number of
389  * characters that will be written to the result for this field.
390  *
391  * Examples of width:
392  *
393  * # padding is done by spaces, width=20
394  * # 0 or radix-1. <------------------>
395  * sprintf("%20d", 123) #=> " 123"
396  * sprintf("%+20d", 123) #=> " +123"
397  * sprintf("%020d", 123) #=> "00000000000000000123"
398  * sprintf("%+020d", 123) #=> "+0000000000000000123"
399  * sprintf("% 020d", 123) #=> " 0000000000000000123"
400  * sprintf("%-20d", 123) #=> "123 "
401  * sprintf("%-+20d", 123) #=> "+123 "
402  * sprintf("%- 20d", 123) #=> " 123 "
403  * sprintf("%020x", -123) #=> "..ffffffffffffffff85"
404  *
405  * For
406  * numeric fields, the precision controls the number of decimal places
407  * displayed. For string fields, the precision determines the maximum
408  * number of characters to be copied from the string. (Thus, the format
409  * sequence <code>%10.10s</code> will always contribute exactly ten
410  * characters to the result.)
411  *
412  * Examples of precisions:
413  *
414  * # precision for `d', 'o', 'x' and 'b' is
415  * # minimum number of digits <------>
416  * sprintf("%20.8d", 123) #=> " 00000123"
417  * sprintf("%20.8o", 123) #=> " 00000173"
418  * sprintf("%20.8x", 123) #=> " 0000007b"
419  * sprintf("%20.8b", 123) #=> " 01111011"
420  * sprintf("%20.8d", -123) #=> " -00000123"
421  * sprintf("%20.8o", -123) #=> " ..777605"
422  * sprintf("%20.8x", -123) #=> " ..ffff85"
423  * sprintf("%20.8b", -11) #=> " ..110101"
424  *
425  * # "0x" and "0b" for `#x' and `#b' is not counted for
426  * # precision but "0" for `#o' is counted. <------>
427  * sprintf("%#20.8d", 123) #=> " 00000123"
428  * sprintf("%#20.8o", 123) #=> " 00000173"
429  * sprintf("%#20.8x", 123) #=> " 0x0000007b"
430  * sprintf("%#20.8b", 123) #=> " 0b01111011"
431  * sprintf("%#20.8d", -123) #=> " -00000123"
432  * sprintf("%#20.8o", -123) #=> " ..777605"
433  * sprintf("%#20.8x", -123) #=> " 0x..ffff85"
434  * sprintf("%#20.8b", -11) #=> " 0b..110101"
435  *
436  * # precision for `e' is number of
437  * # digits after the decimal point <------>
438  * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03"
439  *
440  * # precision for `f' is number of
441  * # digits after the decimal point <------>
442  * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000"
443  *
444  * # precision for `g' is number of
445  * # significant digits <------->
446  * sprintf("%20.8g", 1234.56789) #=> " 1234.5679"
447  *
448  * # <------->
449  * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08"
450  *
451  * # precision for `s' is
452  * # maximum number of characters <------>
453  * sprintf("%20.8s", "string test") #=> " string t"
454  *
455  * Examples:
456  *
457  * sprintf("%d %04x", 123, 123) #=> "123 007b"
458  * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'"
459  * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello"
460  * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8"
461  * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23"
462  * sprintf("%u", -123) #=> "-123"
463  *
464  * For more complex formatting, Ruby supports a reference by name.
465  * %<name>s style uses format style, but %{name} style doesn't.
466  *
467  * Exapmles:
468  * sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
469  * #=> 1 : 2.000000
470  * sprintf("%{foo}f", { :foo => 1 })
471  * # => "1f"
472  */
473 
474 mrb_value
476 {
477  int argc;
478  mrb_value *argv;
479 
480  mrb_get_args(mrb, "*", &argv, &argc);
481 
482  if (argc <= 0) {
483  mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments");
484  return mrb_nil_value();
485  }
486  else {
487  return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]);
488  }
489 }
490 
491 mrb_value
492 mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt)
493 {
494  const char *p, *end;
495  char *buf;
496  mrb_int blen;
497  mrb_int bsiz;
498  mrb_value result;
499  mrb_int n;
500  mrb_int width;
501  mrb_int prec;
502  int flags = FNONE;
503  int nextarg = 1;
504  int posarg = 0;
505  mrb_value nextvalue;
506  mrb_value tmp;
507  mrb_value str;
508  mrb_value hash = mrb_undef_value();
509 
510 #define CHECK_FOR_WIDTH(f) \
511  if ((f) & FWIDTH) { \
512  mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice"); \
513  } \
514  if ((f) & FPREC0) { \
515  mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision"); \
516  }
517 #define CHECK_FOR_FLAGS(f) \
518  if ((f) & FWIDTH) { \
519  mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width"); \
520  } \
521  if ((f) & FPREC0) { \
522  mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision"); \
523  }
524 
525  ++argc;
526  --argv;
527  fmt = mrb_str_to_str(mrb, fmt);
528  p = RSTRING_PTR(fmt);
529  end = p + RSTRING_LEN(fmt);
530  blen = 0;
531  bsiz = 120;
532  result = mrb_str_buf_new(mrb, bsiz);
533  buf = RSTRING_PTR(result);
534  memset(buf, 0, bsiz);
535 
536  for (; p < end; p++) {
537  const char *t;
538  mrb_sym id = 0;
539 
540  for (t = p; t < end && *t != '%'; t++) ;
541  PUSH(p, t - p);
542  if (t >= end)
543  goto sprint_exit; /* end of fmt string */
544 
545  p = t + 1; /* skip `%' */
546 
547  width = prec = -1;
548  nextvalue = mrb_undef_value();
549 
550 retry:
551  switch (*p) {
552  default:
553  mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - \\%%S", mrb_str_new(mrb, p, 1));
554  break;
555 
556  case ' ':
557  CHECK_FOR_FLAGS(flags);
558  flags |= FSPACE;
559  p++;
560  goto retry;
561 
562  case '#':
563  CHECK_FOR_FLAGS(flags);
564  flags |= FSHARP;
565  p++;
566  goto retry;
567 
568  case '+':
569  CHECK_FOR_FLAGS(flags);
570  flags |= FPLUS;
571  p++;
572  goto retry;
573 
574  case '-':
575  CHECK_FOR_FLAGS(flags);
576  flags |= FMINUS;
577  p++;
578  goto retry;
579 
580  case '0':
581  CHECK_FOR_FLAGS(flags);
582  flags |= FZERO;
583  p++;
584  goto retry;
585 
586  case '1': case '2': case '3': case '4':
587  case '5': case '6': case '7': case '8': case '9':
588  n = 0;
589  GETNUM(n, width);
590  if (*p == '$') {
591  if (!mrb_undef_p(nextvalue)) {
592  mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %S$", mrb_fixnum_value(n));
593  }
594  nextvalue = GETPOSARG(n);
595  p++;
596  goto retry;
597  }
598  CHECK_FOR_WIDTH(flags);
599  width = n;
600  flags |= FWIDTH;
601  goto retry;
602 
603  case '<':
604  case '{': {
605  const char *start = p;
606  char term = (*p == '<') ? '>' : '}';
607  mrb_value symname;
608 
609  for (; p < end && *p != term; )
610  p++;
611  if (id) {
612  mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%S after <%S>",
613  mrb_str_new(mrb, start, p - start + 1), mrb_sym2str(mrb, id));
614  }
615  symname = mrb_str_new(mrb, start + 1, p - start - 1);
616  id = mrb_intern_str(mrb, symname);
617  nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1));
618  if (mrb_undef_p(nextvalue)) {
619  mrb_raisef(mrb, E_KEY_ERROR, "key%S not found", mrb_str_new(mrb, start, p - start + 1));
620  }
621  if (term == '}') goto format_s;
622  p++;
623  goto retry;
624  }
625 
626  case '*':
627  CHECK_FOR_WIDTH(flags);
628  flags |= FWIDTH;
629  GETASTER(width);
630  if (width < 0) {
631  flags |= FMINUS;
632  width = -width;
633  }
634  p++;
635  goto retry;
636 
637  case '.':
638  if (flags & FPREC0) {
639  mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
640  }
641  flags |= FPREC|FPREC0;
642 
643  prec = 0;
644  p++;
645  if (*p == '*') {
646  GETASTER(prec);
647  if (prec < 0) { /* ignore negative precision */
648  flags &= ~FPREC;
649  }
650  p++;
651  goto retry;
652  }
653 
654  GETNUM(prec, precision);
655  goto retry;
656 
657  case '\n':
658  case '\0':
659  p--;
660 
661  case '%':
662  if (flags != FNONE) {
663  mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %");
664  }
665  PUSH("%", 1);
666  break;
667 
668  case 'c': {
669  mrb_value val = GETARG();
670  mrb_value tmp;
671  unsigned int c;
672 
673  tmp = mrb_check_string_type(mrb, val);
674  if (!mrb_nil_p(tmp)) {
675  if (RSTRING_LEN(tmp) != 1 ) {
676  mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
677  }
678  c = RSTRING_PTR(tmp)[0];
679  n = 1;
680  }
681  else {
682  c = mrb_fixnum(val);
683  n = 1;
684  }
685  if (n <= 0) {
686  mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
687  }
688  if (!(flags & FWIDTH)) {
689  CHECK(n);
690  buf[blen] = c;
691  blen += n;
692  }
693  else if ((flags & FMINUS)) {
694  CHECK(n);
695  buf[blen] = c;
696  blen += n;
697  FILL(' ', width-1);
698  }
699  else {
700  FILL(' ', width-1);
701  CHECK(n);
702  buf[blen] = c;
703  blen += n;
704  }
705  }
706  break;
707 
708  case 's':
709  case 'p':
710  format_s:
711  {
712  mrb_value arg = GETARG();
713  mrb_int len;
714  mrb_int slen;
715 
716  if (*p == 'p') arg = mrb_inspect(mrb, arg);
717  str = mrb_obj_as_string(mrb, arg);
718  len = RSTRING_LEN(str);
719  RSTRING_LEN(result) = blen;
720  if (flags&(FPREC|FWIDTH)) {
721  slen = RSTRING_LEN(str);
722  if (slen < 0) {
723  mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
724  }
725  if ((flags&FPREC) && (prec < slen)) {
726  char *p = RSTRING_PTR(str) + prec;
727  slen = prec;
728  len = p - RSTRING_PTR(str);
729  }
730  /* need to adjust multi-byte string pos */
731  if ((flags&FWIDTH) && (width > slen)) {
732  width -= (int)slen;
733  if (!(flags&FMINUS)) {
734  CHECK(width);
735  while (width--) {
736  buf[blen++] = ' ';
737  }
738  }
739  CHECK(len);
740  memcpy(&buf[blen], RSTRING_PTR(str), len);
741  blen += len;
742  if (flags&FMINUS) {
743  CHECK(width);
744  while (width--) {
745  buf[blen++] = ' ';
746  }
747  }
748  break;
749  }
750  }
751  PUSH(RSTRING_PTR(str), len);
752  }
753  break;
754 
755  case 'd':
756  case 'i':
757  case 'o':
758  case 'x':
759  case 'X':
760  case 'b':
761  case 'B':
762  case 'u': {
763  mrb_value val = GETARG();
764  char fbuf[32], nbuf[64], *s;
765  const char *prefix = NULL;
766  int sign = 0, dots = 0;
767  char sc = 0;
768  mrb_int v = 0, org_v = 0;
769  int base;
770  mrb_int len;
771 
772  switch (*p) {
773  case 'd':
774  case 'i':
775  case 'u':
776  sign = 1; break;
777  case 'o':
778  case 'x':
779  case 'X':
780  case 'b':
781  case 'B':
782  if (flags&(FPLUS|FSPACE)) sign = 1;
783  break;
784  default:
785  break;
786  }
787  if (flags & FSHARP) {
788  switch (*p) {
789  case 'o': prefix = "0"; break;
790  case 'x': prefix = "0x"; break;
791  case 'X': prefix = "0X"; break;
792  case 'b': prefix = "0b"; break;
793  case 'B': prefix = "0B"; break;
794  default: break;
795  }
796  }
797 
798  bin_retry:
799  switch (mrb_type(val)) {
800  case MRB_TT_FLOAT:
801  if (FIXABLE(mrb_float(val))) {
802  val = mrb_fixnum_value((mrb_int)mrb_float(val));
803  goto bin_retry;
804  }
805  val = mrb_flo_to_fixnum(mrb, val);
806  if (mrb_fixnum_p(val)) goto bin_retry;
807  break;
808  case MRB_TT_STRING:
809  val = mrb_str_to_inum(mrb, val, 0, TRUE);
810  goto bin_retry;
811  case MRB_TT_FIXNUM:
812  v = mrb_fixnum(val);
813  break;
814  default:
815  val = mrb_Integer(mrb, val);
816  goto bin_retry;
817  }
818 
819  switch (*p) {
820  case 'o':
821  base = 8; break;
822  case 'x':
823  case 'X':
824  base = 16; break;
825  case 'b':
826  case 'B':
827  base = 2; break;
828  case 'u':
829  case 'd':
830  case 'i':
831  default:
832  base = 10; break;
833  }
834 
835  if (base == 2) {
836  org_v = v;
837  if ( v < 0 && !sign ) {
838  val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
839  dots = 1;
840  }
841  else {
842  val = mrb_fixnum_to_str(mrb, mrb_fixnum_value(v), base);
843  }
844  v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/));
845  }
846  if (sign) {
847  char c = *p;
848  if (c == 'i') c = 'd'; /* %d and %i are identical */
849  if (base == 2) c = 'd';
850  if (v < 0) {
851  v = -v;
852  sc = '-';
853  width--;
854  }
855  else if (flags & FPLUS) {
856  sc = '+';
857  width--;
858  }
859  else if (flags & FSPACE) {
860  sc = ' ';
861  width--;
862  }
863  snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
864  snprintf(nbuf, sizeof(nbuf), fbuf, v);
865  s = nbuf;
866  }
867  else {
868  char c = *p;
869  if (c == 'X') c = 'x';
870  if (base == 2) c = 'd';
871  s = nbuf;
872  if (v < 0) {
873  dots = 1;
874  }
875  snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
876  snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
877  if (v < 0) {
878  char d;
879 
880  s = remove_sign_bits(s, base);
881  switch (base) {
882  case 16: d = 'f'; break;
883  case 8: d = '7'; break;
884  case 2: d = '1'; break;
885  default: d = 0; break;
886  }
887 
888  if (d && *s != d) {
889  *--s = d;
890  }
891  }
892  }
893  {
894  size_t size;
895  size = strlen(s);
896  /* PARANOID: assert(size <= MRB_INT_MAX) */
897  len = (mrb_int)size;
898  }
899 
900  if (dots) {
901  prec -= 2;
902  width -= 2;
903  }
904 
905  if (*p == 'X') {
906  char *pp = s;
907  int c;
908  while ((c = (int)(unsigned char)*pp) != 0) {
909  *pp = toupper(c);
910  pp++;
911  }
912  }
913 
914  if (prefix && !prefix[1]) { /* octal */
915  if (dots) {
916  prefix = NULL;
917  }
918  else if (len == 1 && *s == '0') {
919  len = 0;
920  if (flags & FPREC) prec--;
921  }
922  else if ((flags & FPREC) && (prec > len)) {
923  prefix = NULL;
924  }
925  }
926  else if (len == 1 && *s == '0') {
927  prefix = NULL;
928  }
929 
930  if (prefix) {
931  size_t size;
932  size = strlen(prefix);
933  /* PARANOID: assert(size <= MRB_INT_MAX).
934  * this check is absolutely paranoid. */
935  width -= (mrb_int)size;
936  }
937 
938  if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
939  prec = width;
940  width = 0;
941  }
942  else {
943  if (prec < len) {
944  if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
945  prec = len;
946  }
947  width -= prec;
948  }
949 
950  if (!(flags&FMINUS)) {
951  CHECK(width);
952  while (width-- > 0) {
953  buf[blen++] = ' ';
954  }
955  }
956 
957  if (sc) PUSH(&sc, 1);
958 
959  if (prefix) {
960  int plen = (int)strlen(prefix);
961  PUSH(prefix, plen);
962  }
963  CHECK(prec - len);
964  if (dots) PUSH("..", 2);
965 
966  if (v < 0 || (base == 2 && org_v < 0)) {
967  char c = sign_bits(base, p);
968  while (len < prec--) {
969  buf[blen++] = c;
970  }
971  }
972  else if ((flags & (FMINUS|FPREC)) != FMINUS) {
973  char c = '0';
974  while (len < prec--) {
975  buf[blen++] = c;
976  }
977  }
978 
979  PUSH(s, len);
980  CHECK(width);
981  while (width-- > 0) {
982  buf[blen++] = ' ';
983  }
984  }
985  break;
986 
987  case 'f':
988  case 'g':
989  case 'G':
990  case 'e':
991  case 'E':
992  case 'a':
993  case 'A': {
994  mrb_value val = GETARG();
995  double fval;
996  int i, need = 6;
997  char fbuf[32];
998 
999  fval = mrb_float(mrb_Float(mrb, val));
1000  if (isnan(fval) || isinf(fval)) {
1001  const char *expr;
1002  const int elen = 3;
1003 
1004  if (isnan(fval)) {
1005  expr = "NaN";
1006  }
1007  else {
1008  expr = "Inf";
1009  }
1010  need = elen;
1011  if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
1012  need++;
1013  if ((flags & FWIDTH) && need < width)
1014  need = width;
1015 
1016  CHECK(need + 1);
1017  snprintf(&buf[blen], need + 1, "%*s", need, "");
1018  if (flags & FMINUS) {
1019  if (!isnan(fval) && fval < 0.0)
1020  buf[blen++] = '-';
1021  else if (flags & FPLUS)
1022  buf[blen++] = '+';
1023  else if (flags & FSPACE)
1024  blen++;
1025  memcpy(&buf[blen], expr, elen);
1026  }
1027  else {
1028  if (!isnan(fval) && fval < 0.0)
1029  buf[blen + need - elen - 1] = '-';
1030  else if (flags & FPLUS)
1031  buf[blen + need - elen - 1] = '+';
1032  else if ((flags & FSPACE) && need > width)
1033  blen++;
1034  memcpy(&buf[blen + need - elen], expr, elen);
1035  }
1036  blen += strlen(&buf[blen]);
1037  break;
1038  }
1039 
1040  fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
1041  need = 0;
1042  if (*p != 'e' && *p != 'E') {
1043  i = INT_MIN;
1044  frexp(fval, &i);
1045  if (i > 0)
1046  need = BIT_DIGITS(i);
1047  }
1048  need += (flags&FPREC) ? prec : 6;
1049  if ((flags&FWIDTH) && need < width)
1050  need = width;
1051  need += 20;
1052 
1053  CHECK(need);
1054  n = snprintf(&buf[blen], need, fbuf, fval);
1055  blen += n;
1056  }
1057  break;
1058  }
1059  flags = FNONE;
1060  }
1061 
1062  sprint_exit:
1063 #if 0
1064  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
1065  */
1066  if (posarg >= 0 && nextarg < argc) {
1067  const char *mesg = "too many arguments for format string";
1068  if (mrb_test(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, mesg);
1069  if (mrb_test(ruby_verbose)) mrb_warn(mrb, "%S", mrb_str_new_cstr(mrb, mesg));
1070  }
1071 #endif
1072  mrb_str_resize(mrb, result, blen);
1073 
1074  return result;
1075 }
1076 
1077 static void
1078 fmt_setup(char *buf, size_t size, int c, int flags, mrb_int width, mrb_int prec)
1079 {
1080  char *end = buf + size;
1081  int n;
1082 
1083  *buf++ = '%';
1084  if (flags & FSHARP) *buf++ = '#';
1085  if (flags & FPLUS) *buf++ = '+';
1086  if (flags & FMINUS) *buf++ = '-';
1087  if (flags & FZERO) *buf++ = '0';
1088  if (flags & FSPACE) *buf++ = ' ';
1089 
1090  if (flags & FWIDTH) {
1091  n = snprintf(buf, end - buf, "%d", (int)width);
1092  buf += n;
1093  }
1094 
1095  if (flags & FPREC) {
1096  n = snprintf(buf, end - buf, ".%d", (int)prec);
1097  buf += n;
1098  }
1099 
1100  *buf++ = c;
1101  *buf = '\0';
1102 }