Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
symbol.c
Go to the documentation of this file.
1 /*
2 ** symbol.c - Symbol class
3 **
4 ** See Copyright Notice in mruby.h
5 */
6 
7 #include <ctype.h>
8 #include <limits.h>
9 #include <string.h>
10 #include "mruby.h"
11 #include "mruby/khash.h"
12 #include "mruby/string.h"
13 
14 /* ------------------------------------------------------ */
15 typedef struct symbol_name {
16  size_t len;
17  const char *name;
18 } symbol_name;
19 
20 static inline khint_t
21 sym_hash_func(mrb_state *mrb, const symbol_name s)
22 {
23  khint_t h = 0;
24  size_t i;
25  const char *p = s.name;
26 
27  for (i=0; i<s.len; i++) {
28  h = (h << 5) - h + *p++;
29  }
30  return h;
31 }
32 #define sym_hash_equal(mrb,a, b) (a.len == b.len && memcmp(a.name, b.name, a.len) == 0)
33 
35 KHASH_DEFINE (n2s, symbol_name, mrb_sym, 1, sym_hash_func, sym_hash_equal)
36 /* ------------------------------------------------------ */
37 mrb_sym
38 mrb_intern2(mrb_state *mrb, const char *name, size_t len)
39 {
40  khash_t(n2s) *h = mrb->name2sym;
41  symbol_name sname;
42  khiter_t k;
43  mrb_sym sym;
44  char *p;
45 
46  sname.len = len;
47  sname.name = name;
48  k = kh_get(n2s, h, sname);
49  if (k != kh_end(h))
50  return kh_value(h, k);
51 
52  sym = ++mrb->symidx;
53  p = (char *)mrb_malloc(mrb, len+1);
54  memcpy(p, name, len);
55  p[len] = 0;
56  sname.name = (const char*)p;
57  k = kh_put(n2s, h, sname);
58  kh_value(h, k) = sym;
59 
60  return sym;
61 }
62 
63 mrb_sym
64 mrb_intern_cstr(mrb_state *mrb, const char *name)
65 {
66  return mrb_intern2(mrb, name, strlen(name));
67 }
68 
69 mrb_sym
71 {
72  return mrb_intern2(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
73 }
74 
76 mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
77 {
78  khash_t(n2s) *h = mrb->name2sym;
79  symbol_name sname;
80  khiter_t k;
81 
82  sname.len = len;
83  sname.name = name;
84 
85  k = kh_get(n2s, h, sname);
86  if (k != kh_end(h)) {
87  return mrb_symbol_value(kh_value(h, k));
88  }
89  return mrb_nil_value();
90 }
91 
93 mrb_check_intern_cstr(mrb_state *mrb, const char *name)
94 {
95  return mrb_check_intern(mrb, name, strlen(name));
96 }
97 
100 {
101  return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
102 }
103 
104 /* lenp must be a pointer to a size_t variable */
105 const char*
106 mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, size_t *lenp)
107 {
108  khash_t(n2s) *h = mrb->name2sym;
109  khiter_t k;
110  symbol_name sname;
111 
112  for (k = kh_begin(h); k != kh_end(h); k++) {
113  if (kh_exist(h, k)) {
114  if (kh_value(h, k) == sym) {
115  sname = kh_key(h, k);
116  *lenp = sname.len;
117  return sname.name;
118  }
119  }
120  }
121  *lenp = 0;
122  return NULL; /* missing */
123 }
124 
125 void
127 {
128  khash_t(n2s) *h = mrb->name2sym;
129  khiter_t k;
130 
131  for (k = kh_begin(h); k != kh_end(h); k++)
132  if (kh_exist(h, k)) mrb_free(mrb, (char*)kh_key(h, k).name);
133  kh_destroy(n2s,mrb->name2sym);
134 }
135 
136 void
138 {
139  mrb->name2sym = kh_init(n2s, mrb);
140 }
141 
142 /**********************************************************************
143  * Document-class: Symbol
144  *
145  * <code>Symbol</code> objects represent names and some strings
146  * inside the Ruby
147  * interpreter. They are generated using the <code>:name</code> and
148  * <code>:"string"</code> literals
149  * syntax, and by the various <code>to_sym</code> methods. The same
150  * <code>Symbol</code> object will be created for a given name or string
151  * for the duration of a program's execution, regardless of the context
152  * or meaning of that name. Thus if <code>Fred</code> is a constant in
153  * one context, a method in another, and a class in a third, the
154  * <code>Symbol</code> <code>:Fred</code> will be the same object in
155  * all three contexts.
156  *
157  * module One
158  * class Fred
159  * end
160  * $f1 = :Fred
161  * end
162  * module Two
163  * Fred = 1
164  * $f2 = :Fred
165  * end
166  * def Fred()
167  * end
168  * $f3 = :Fred
169  * $f1.object_id #=> 2514190
170  * $f2.object_id #=> 2514190
171  * $f3.object_id #=> 2514190
172  *
173  */
174 
175 
176 /* 15.2.11.3.1 */
177 /*
178  * call-seq:
179  * sym == obj -> true or false
180  *
181  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
182  * symbol, returns <code>true</code>.
183  */
184 
185 static mrb_value
186 sym_equal(mrb_state *mrb, mrb_value sym1)
187 {
188  mrb_value sym2;
189  mrb_bool equal_p;
190 
191  mrb_get_args(mrb, "o", &sym2);
192  equal_p = mrb_obj_equal(mrb, sym1, sym2);
193 
194  return mrb_bool_value(equal_p);
195 }
196 
197 /* 15.2.11.3.2 */
198 /* 15.2.11.3.3 */
199 /*
200  * call-seq:
201  * sym.id2name -> string
202  * sym.to_s -> string
203  *
204  * Returns the name or string corresponding to <i>sym</i>.
205  *
206  * :fred.id2name #=> "fred"
207  */
208 mrb_value
210 {
211  mrb_sym id = mrb_symbol(sym);
212  const char *p;
213  size_t len;
214 
215  p = mrb_sym2name_len(mrb, id, &len);
216  return mrb_str_new_static(mrb, p, len);
217 }
218 
219 /* 15.2.11.3.4 */
220 /*
221  * call-seq:
222  * sym.to_sym -> sym
223  * sym.intern -> sym
224  *
225  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
226  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
227  * in this case.
228  */
229 
230 static mrb_value
231 sym_to_sym(mrb_state *mrb, mrb_value sym)
232 {
233  return sym;
234 }
235 
236 /* 15.2.11.3.5(x) */
237 /*
238  * call-seq:
239  * sym.inspect -> string
240  *
241  * Returns the representation of <i>sym</i> as a symbol literal.
242  *
243  * :fred.inspect #=> ":fred"
244  */
245 
246 #if __STDC__
247 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
248 #else /* not __STDC__ */
249 /* As in Harbison and Steele. */
250 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
251 #endif
252 #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
253 
254 static mrb_bool
255 is_special_global_name(const char* m)
256 {
257  switch (*m) {
258  case '~': case '*': case '$': case '?': case '!': case '@':
259  case '/': case '\\': case ';': case ',': case '.': case '=':
260  case ':': case '<': case '>': case '\"':
261  case '&': case '`': case '\'': case '+':
262  case '0':
263  ++m;
264  break;
265  case '-':
266  ++m;
267  if (is_identchar(*m)) m += 1;
268  break;
269  default:
270  if (!ISDIGIT(*m)) return FALSE;
271  do ++m; while (ISDIGIT(*m));
272  break;
273  }
274  return !*m;
275 }
276 
277 static mrb_bool
278 symname_p(const char *name)
279 {
280  const char *m = name;
281  int localid = FALSE;
282 
283  if (!m) return FALSE;
284  switch (*m) {
285  case '\0':
286  return FALSE;
287 
288  case '$':
289  if (is_special_global_name(++m)) return TRUE;
290  goto id;
291 
292  case '@':
293  if (*++m == '@') ++m;
294  goto id;
295 
296  case '<':
297  switch (*++m) {
298  case '<': ++m; break;
299  case '=': if (*++m == '>') ++m; break;
300  default: break;
301  }
302  break;
303 
304  case '>':
305  switch (*++m) {
306  case '>': case '=': ++m; break;
307  default: break;
308  }
309  break;
310 
311  case '=':
312  switch (*++m) {
313  case '~': ++m; break;
314  case '=': if (*++m == '=') ++m; break;
315  default: return FALSE;
316  }
317  break;
318 
319  case '*':
320  if (*++m == '*') ++m;
321  break;
322  case '!':
323  if (*++m == '=') ++m;
324  break;
325  case '+': case '-':
326  if (*++m == '@') ++m;
327  break;
328  case '|':
329  if (*++m == '|') ++m;
330  break;
331  case '&':
332  if (*++m == '&') ++m;
333  break;
334 
335  case '^': case '/': case '%': case '~': case '`':
336  ++m;
337  break;
338 
339  case '[':
340  if (*++m != ']') return FALSE;
341  if (*++m == '=') ++m;
342  break;
343 
344  default:
345  localid = !ISUPPER(*m);
346 id:
347  if (*m != '_' && !ISALPHA(*m)) return FALSE;
348  while (is_identchar(*m)) m += 1;
349  if (localid) {
350  switch (*m) {
351  case '!': case '?': case '=': ++m;
352  default: break;
353  }
354  }
355  break;
356  }
357  return *m ? FALSE : TRUE;
358 }
359 
360 static mrb_value
361 sym_inspect(mrb_state *mrb, mrb_value sym)
362 {
363  mrb_value str;
364  const char *name;
365  size_t len;
366  mrb_sym id = mrb_symbol(sym);
367 
368  name = mrb_sym2name_len(mrb, id, &len);
369  str = mrb_str_new(mrb, 0, len+1);
370  RSTRING(str)->ptr[0] = ':';
371  memcpy(RSTRING(str)->ptr+1, name, len);
372  if (!symname_p(name) || strlen(name) != len) {
373  str = mrb_str_dump(mrb, str);
374  memcpy(RSTRING(str)->ptr, ":\"", 2);
375  }
376  return str;
377 }
378 
379 mrb_value
381 {
382  size_t len;
383  const char *name = mrb_sym2name_len(mrb, sym, &len);
384  mrb_value str;
385 
386  if (!name) return mrb_undef_value(); /* can't happen */
387  str = mrb_str_new_static(mrb, name, len);
388  if (symname_p(name) && strlen(name) == len) {
389  return str;
390  }
391  return mrb_str_dump(mrb, str);
392 }
393 
394 const char*
396 {
397  size_t len;
398  const char *name = mrb_sym2name_len(mrb, sym, &len);
399 
400  if (!name) return NULL;
401  if (symname_p(name) && strlen(name) == len) {
402  return name;
403  }
404  else {
405  mrb_value str = mrb_str_dump(mrb, mrb_str_new_static(mrb, name, len));
406  return RSTRING(str)->ptr;
407  }
408 }
409 
410 #define lesser(a,b) (((a)>(b))?(b):(a))
411 
412 static mrb_value
413 sym_cmp(mrb_state *mrb, mrb_value s1)
414 {
415  mrb_value s2;
416  mrb_sym sym1, sym2;
417 
418  mrb_get_args(mrb, "o", &s2);
419  if (mrb_type(s2) != MRB_TT_SYMBOL) return mrb_nil_value();
420  sym1 = mrb_symbol(s1);
421  sym2 = mrb_symbol(s2);
422  if (sym1 == sym2) return mrb_fixnum_value(0);
423  else {
424  const char *p1, *p2;
425  int retval;
426  size_t len, len1, len2;
427 
428  p1 = mrb_sym2name_len(mrb, sym1, &len1);
429  p2 = mrb_sym2name_len(mrb, sym2, &len2);
430  len = lesser(len1, len2);
431  retval = memcmp(p1, p2, len);
432  if (retval == 0) {
433  if (len1 == len2) return mrb_fixnum_value(0);
434  if (len1 > len2) return mrb_fixnum_value(1);
435  return mrb_fixnum_value(-1);
436  }
437  if (retval > 0) return mrb_fixnum_value(1);
438  return mrb_fixnum_value(-1);
439  }
440 }
441 
442 void
444 {
445  struct RClass *sym;
446 
447  sym = mrb->symbol_class = mrb_define_class(mrb, "Symbol", mrb->object_class);
448 
449  mrb_define_method(mrb, sym, "===", sym_equal, MRB_ARGS_REQ(1)); /* 15.2.11.3.1 */
450  mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
451  mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
452  mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
453  mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
454  mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
455 }