Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
test-string.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
2 /*
3  Copyright (C) 2008-2012 Kouhei Sutou <kou@clear-code.com>
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License version 2.1 as published by the Free Software Foundation.
8 
9  This library is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public
15  License along with this library; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 #include <groonga_in.h>
20 #include <groonga.h>
21 
22 #include <stdlib.h>
23 #include <str.h>
24 
25 #include <gcutter.h>
26 #include <glib/gstdio.h>
27 
28 #include "../lib/grn-assertions.h"
29 
30 void data_normalize(void);
31 void test_normalize(gconstpointer data);
32 void data_normalize_broken(void);
33 void test_normalize_broken(gconstpointer data);
35 void test_remove_tokenized_delimiter(gconstpointer data);
36 void data_charlen_broken(void);
37 void test_charlen_broken(gconstpointer data);
38 void data_urlenc(void);
39 void test_urlenc(gconstpointer data);
40 void data_urldec(void);
41 void test_urldec(gconstpointer data);
42 void data_cgidec(void);
43 void test_cgidec(gconstpointer data);
44 void data_url_path_normalize(void);
45 void test_url_path_normalize(gconstpointer data);
47 void test_url_path_normalize_invalid(gconstpointer data);
48 void data_text_otoj(void);
49 void test_text_otoj(gconstpointer data);
50 void data_str_len(void);
51 void test_str_len(gconstpointer data);
52 void data_aton(void);
53 void test_aton(gconstpointer data);
54 void data_itoh(void);
55 void test_itoh(gconstpointer data);
56 
57 static grn_ctx context;
58 static grn_obj *database;
59 static grn_obj buffer;
60 
61 static const gchar text_ja_utf8[] =
62  "Groongaは組み込み型の全文検索エンジンです。DBMSやスクリプト言語処理系等に\n"
63  "組み込むことによって、その全文検索機能を強化することができます。n-gram\n"
64  "インデックスと単語インデックスの特徴を兼ね備えた、高速かつ高精度な転置\n"
65  "インデックスタイプのエンジンです。コンパクトな実装ですが、大規模な文書\n"
66  "量と検索要求を処理できるように設計されています。また、純粋なn-gramイン\n"
67  "デックスの作成も可能です。";
68 
69 static const gchar normalized_text_ja_utf8[] =
70  "groongaは組み込み型の全文検索エンジンです。dbmsやスクリプト言語処理系等に"
71  "組み込むことによって、その全文検索機能を強化することができます。n-gram"
72  "インデックスと単語インデックスの特徴を兼ね備えた、高速かつ高精度な転置"
73  "インデックスタイプのエンジンです。コンパクトな実装ですが、大規模な文書"
74  "量と検索要求を処理できるように設計されています。また、純粋なn-gramイン"
75  "デックスの作成も可能です。";
76 
77 void
78 setup (void)
79 {
80  grn_ctx_init(&context, 0);
81  database = grn_db_create(&context, NULL, NULL);
82  GRN_VOID_INIT(&buffer);
83 }
84 
85 void
86 teardown (void)
87 {
88  GRN_OBJ_FIN(&context, &buffer);
89  grn_obj_close(&context, database);
90  grn_ctx_fin(&context);
91 }
92 
93 void
94 data_normalize(void)
95 {
96 #define ADD_DATUM_WITH_ENCODING(label, expected, input, encoding) \
97  gcut_add_datum(label, \
98  "expected", G_TYPE_STRING, expected, \
99  "input", G_TYPE_STRING, input, \
100  "encoding", G_TYPE_INT, encoding, \
101  NULL)
102 #define ADD_DATUM_JAPANESE(label, expected, input) \
103  ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">", \
104  expected, input, GRN_ENC_UTF8); \
105  ADD_DATUM_WITH_ENCODING("Japanese (eucJP): " label " <" input ">", \
106  expected, input, GRN_ENC_EUC_JP); \
107  ADD_DATUM_WITH_ENCODING("Japanese (Shift_JIS): " label " <" input ">",\
108  expected, input, GRN_ENC_SJIS)
109 
110 #define ADD_DATUM_JAPANESE_NO_SJIS(label, expected, input) \
111  ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">",\
112  expected, input, GRN_ENC_UTF8); \
113  ADD_DATUM_WITH_ENCODING("Japanese (eucJP): " label " <" input ">", \
114  expected, input, GRN_ENC_EUC_JP)
115 
116 #define ADD_DATUM_JAPANESE_UTF_8(label, expected, input) \
117  ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">", \
118  expected, input, GRN_ENC_UTF8)
119 
120  ADD_DATUM_JAPANESE("with newlines",
121  "groongaは組み込み型の全文検索エンジンです。"
122  "dbmsやスクリプト言語処理系等に組み込むこと"
123  "によって、その全文検索機能を強化することが"
124  "できます。n-gramインデックスと単語インデッ"
125  "クスの特徴を兼ね備えた、高速かつ高精度な転"
126  "置インデックスタイプのエンジンです。コンパ"
127  "クトな実装ですが、大規模な文書量と検索要求"
128  "を処理できるように設計されています。また、"
129  "純粋なn-gramインデックスの作成も可能です。",
130 
131  "groongaは組み込み型の全文検索エンジンです。\n"
132  "DBMSやスクリプト言語処理系等に組み込むこと\n"
133  "によって、その全文検索機能を強化することが\n"
134  "できます。n-gramインデックスと単語インデッ\n"
135  "クスの特徴を兼ね備えた、高速かつ高精度な転\n"
136  "置インデックスタイプのエンジンです。コンパ\n"
137  "クトな実装ですが、大規模な文書量と検索要求\n"
138  "を処理できるように設計されています。また、\n"
139  "純粋なn-gramインデックスの作成も可能です。");
140 
141  ADD_DATUM_JAPANESE_UTF_8("large normalization",
142  "キロメートルキロメートルキロメートルキロメートル",
143  "㌖㌖㌖㌖");
144 
145  ADD_DATUM_JAPANESE_UTF_8("tilde and fullwidth tilde and wave dash",
146  "~~~",
147  "~~〜");
148 
149 #undef ADD_DATUM_JAPANESE_NO_SJIS
150 #undef ADD_DATUM_JAPANESE
151 #undef ADD_DATUM_WITH_ENCODING
152 }
153 
154 static const gchar *
155 convert_encoding(const gchar *utf8, grn_encoding encoding)
156 {
157  const gchar *encoded = NULL;
158  GError *error = NULL;
159 
160  switch (encoding) {
161  case GRN_ENC_DEFAULT:
162  case GRN_ENC_NONE:
163  case GRN_ENC_UTF8:
164  encoded = utf8;
165  break;
166  case GRN_ENC_EUC_JP:
167  encoded = cut_take_string(g_convert(utf8, -1, "eucJP", "UTF-8",
168  NULL, NULL, &error));
169  break;
170  case GRN_ENC_SJIS:
171  encoded = cut_take_string(g_convert(utf8, -1, "CP932", "UTF-8",
172  NULL, NULL, &error));
173  break;
174  case GRN_ENC_LATIN1:
175  encoded = cut_take_string(g_convert(utf8, -1, "CP1252", "UTF-8",
176  NULL, NULL, &error));
177  break;
178  case GRN_ENC_KOI8R:
179  encoded = cut_take_string(g_convert(utf8, -1, "KOI8-R", "UTF-8",
180  NULL, NULL, &error));
181  break;
182  }
183  gcut_assert_error(error);
184 
185  return encoded;
186 }
187 
188 void
189 test_normalize(gconstpointer data)
190 {
191  const gchar *utf8_expected, *encoded_expected;
192  const gchar *utf8_input, *encoded_input;
193  grn_obj *string;
194  const gchar *normalized_text;
195  guint normalized_text_length;
196  guint normalized_text_n_characters;
197  int flags;
198  grn_encoding encoding;
199 
200  encoding = gcut_data_get_int(data, "encoding");
201  GRN_CTX_SET_ENCODING(&context, encoding);
203  utf8_input = gcut_data_get_string(data, "input");
204  encoded_input = convert_encoding(utf8_input, encoding);
205  string = grn_string_open(&context,
206  encoded_input,
207  strlen(encoded_input),
209  flags);
210  grn_string_get_normalized(&context, string,
211  &normalized_text,
212  &normalized_text_length,
213  &normalized_text_n_characters);
214  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
215  grn_obj_unlink(&context, string);
216 
217  utf8_expected = gcut_data_get_string(data, "expected");
218  encoded_expected = convert_encoding(utf8_expected, encoding);
219  cut_assert_equal_string(encoded_expected, normalized_text);
220  cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length);
221  cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1),
222  normalized_text_n_characters);
223 }
224 
225 void
227 {
228 #define ADD_DATUM(label, input, input_encoding, input_length, \
229  context_encoding) \
230  gcut_add_datum(label, \
231  "input", G_TYPE_STRING, input, \
232  "input-encoding", G_TYPE_INT, input_encoding, \
233  "input-length", G_TYPE_INT, input_length, \
234  "context-encoding", G_TYPE_INT, context_encoding, \
235  NULL)
236 
237  ADD_DATUM("short", "あ", GRN_ENC_UTF8, 1, GRN_ENC_UTF8);
238  ADD_DATUM("NULL", "\0", GRN_ENC_UTF8, 1, GRN_ENC_UTF8);
239 
240 #define ADD_DATUM_JAPANESE_NON_UTF8(label, input, input_length) \
241  ADD_DATUM("eucJP with UTF-8 context: " label " <" input ">", \
242  input, GRN_ENC_EUC_JP, input_length, GRN_ENC_UTF8); \
243  ADD_DATUM("ShiftJIS with UTF-8 context : " label " <" input ">", \
244  input, GRN_ENC_SJIS, input_length, GRN_ENC_UTF8);
245 
246  ADD_DATUM_JAPANESE_NON_UTF8("different encoding", "日本語", -1);
247 
248 #undef ADD_DATUM_JAPANESE_NON_UTF8
249 
250 #undef ADD_DATUM
251 }
252 
253 void
254 test_normalize_broken(gconstpointer data)
255 {
256  grn_obj *string;
257  const gchar *input, *encoded_input;
258  const gchar *normalized_text;
259  grn_encoding input_encoding, context_encoding;
260  gint input_length;
261  guint normalized_text_length, normalized_text_n_characters;
263 
264  context_encoding = gcut_data_get_int(data, "context-encoding");
265  GRN_CTX_SET_ENCODING(&context, context_encoding);
266 
267  input = gcut_data_get_string(data, "input");
268  input_encoding = gcut_data_get_int(data, "input-encoding");
269  input_length = gcut_data_get_int(data, "input-length");
270  encoded_input = convert_encoding(input, input_encoding);
271  if (input_length < 0) {
272  input_length = strlen(encoded_input);
273  }
274  string = grn_string_open(&context, encoded_input, input_length,
275  GRN_NORMALIZER_AUTO, flags);
276  grn_string_get_normalized(&context, string,
277  &normalized_text,
278  &normalized_text_length,
279  &normalized_text_n_characters);
280  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
281  grn_obj_unlink(&context, string);
282 
283  cut_assert_equal_string("", normalized_text);
284  cut_assert_equal_int(0, normalized_text_length);
285  cut_assert_equal_int(0, normalized_text_n_characters);
286 }
287 
288 void
290 {
291 #define ADD_DATUM(label, expected, input, flags) \
292  gcut_add_datum(label, \
293  "expected", G_TYPE_STRING, expected, \
294  "input", G_TYPE_STRING, input, \
295  "flags", G_TYPE_INT, flags, \
296  NULL)
297 
298 #define UFFFE_IN_UTF8 "\xef\xbf\xbe"
299 
300  ADD_DATUM("normalize",
301  "abあい",
302  UFFFE_IN_UTF8 "A"
303  UFFFE_IN_UTF8 "B"
304  UFFFE_IN_UTF8 "あ"
305  UFFFE_IN_UTF8 "い"
308  ADD_DATUM("not normalize",
309  "ABあい",
310  UFFFE_IN_UTF8 "A"
311  UFFFE_IN_UTF8 "B"
312  UFFFE_IN_UTF8 "あ"
313  UFFFE_IN_UTF8 "い"
314  UFFFE_IN_UTF8,
315  0);
316 
317 #undef UFFFE_IN_UTF8
318 
319 #undef ADD_DATUM
320 }
321 
322 void
324 {
325  grn_obj *string;
326  grn_obj *normalizer = NULL;
327  const gchar *expected;
328  const gchar *input;
329  const gchar *normalized;
330  unsigned int length_in_bytes;
332 
334 
335  input = gcut_data_get_string(data, "input");
336  flags |= gcut_data_get_int(data, "flags");
337  if (flags & GRN_OBJ_KEY_NORMALIZE) {
338  normalizer = GRN_NORMALIZER_AUTO;
339  }
340 
341  string = grn_string_open(&context, input, strlen(input), normalizer, flags);
342  grn_string_get_normalized(&context, string,
343  &normalized, &length_in_bytes, NULL);
344  normalized = cut_take_strndup(normalized, length_in_bytes);
345  grn_obj_unlink(&context, string);
346 
347  expected = gcut_data_get_string(data, "expected");
348  cut_assert_equal_string(expected, normalized);
349 }
350 
351 void
353 {
354 #define ADD_DATUM_WITH_ENCODING(label, input, input_length, encoding) \
355  gcut_add_datum(label, \
356  "input", G_TYPE_STRING, input, \
357  "input-length", G_TYPE_INT, input_length, \
358  "encoding", G_TYPE_INT, encoding, \
359  NULL)
360 
361 #define ADD_DATUM(label, input, input_length) \
362  ADD_DATUM_WITH_ENCODING("(None): " label " <" input ">", \
363  input, input_length, GRN_ENC_NONE); \
364  ADD_DATUM_WITH_ENCODING("(UTF-8): " label " <" input ">", \
365  input, input_length, GRN_ENC_UTF8); \
366  ADD_DATUM_WITH_ENCODING("(eucJP): " label " <" input ">", \
367  input, input_length, GRN_ENC_EUC_JP); \
368  ADD_DATUM_WITH_ENCODING("(Shift_JIS): " label " <" input ">", \
369  input, input_length, GRN_ENC_SJIS); \
370  ADD_DATUM_WITH_ENCODING("(Latin1): " label " <" input ">", \
371  input, input_length, GRN_ENC_LATIN1); \
372  ADD_DATUM_WITH_ENCODING("(KOI8R): " label " <" input ">", \
373  input, input_length, GRN_ENC_KOI8R)
374 
375 #define ADD_DATUM_JAPANESE(label, input, input_length) \
376  ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">", \
377  input, input_length, GRN_ENC_UTF8); \
378  ADD_DATUM_WITH_ENCODING("Japanese (eucJP): " label " <" input ">", \
379  input, input_length, GRN_ENC_EUC_JP); \
380  ADD_DATUM_WITH_ENCODING("Japanese (Shift_JIS): " label " <" input ">",\
381  input, input_length, GRN_ENC_SJIS)
382 
383  ADD_DATUM_JAPANESE("short length", "あ", 1);
384 
385  ADD_DATUM_WITH_ENCODING("NULL", "\0", 1, GRN_ENC_UTF8);
386 
387 #undef ADD_DATUM_JAPANESE
388 #undef ADD_DATUM
389 #undef ADD_DATUM_WITH_ENCODING
390 }
391 
392 void
393 test_charlen_broken(gconstpointer data)
394 {
395  const gchar *input, *encoded_input, *encoded_input_end;
396  grn_encoding encoding;
397  gint input_length;
398 
399  encoding = gcut_data_get_int(data, "encoding");
400  GRN_CTX_SET_ENCODING(&context, encoding);
401 
402  input = gcut_data_get_string(data, "input");
403  input_length = gcut_data_get_int(data, "input-length");
404  encoded_input = convert_encoding(input, encoding);
405  if (input_length < 0) {
406  input_length = strlen(encoded_input);
407  }
408  encoded_input_end = encoded_input + input_length;
409  cut_assert_equal_uint(0, grn_charlen(&context,
410  encoded_input,
411  encoded_input_end));
412 }
413 
414 void
416 {
417 #define ADD_DATUM(label, expected, input, input_length) \
418  gcut_add_datum(label, \
419  "expected", G_TYPE_STRING, expected, \
420  "input", G_TYPE_STRING, input, \
421  "input-length", G_TYPE_INT, input_length, \
422  NULL)
423 
424  ADD_DATUM("Japanese",
425  "%20%E6%97%A5%E6%9C%AC%E8%AA%9E%E3%81%A7%E3%81%99%E3%80%82%20",
426  " 日本語です。 ",
427  -1);
428  ADD_DATUM("percent", "%251%252%253", "%1%2%3", -1);
429  ADD_DATUM("plus", "%2B%20%2B", "+ +", -1);
430 
431 #undef ADD_DATUM
432 }
433 
434 void
435 test_urlenc(gconstpointer data)
436 {
437  grn_obj buffer;
438  const gchar *expected, *input;
439  gint input_length;
440 
441  expected = gcut_data_get_string(data, "expected");
442  input = gcut_data_get_string(data, "input");
443  input_length = gcut_data_get_int(data, "input-length");
444 
445  if (input_length < 0) {
446  input_length = strchr(input, '\0') - input;
447  }
448 
449  GRN_TEXT_INIT(&buffer, 0);
450  grn_text_urlenc(&context, &buffer, input, input_length);
451  cut_assert_equal_substring(expected,
452  GRN_TEXT_VALUE(&buffer),
453  GRN_TEXT_LEN(&buffer));
454  GRN_OBJ_FIN(&context, &buffer);
455 }
456 
457 void
459 {
460 #define ADD_DATUM(label, expected, input, input_length, end_char) \
461  gcut_add_datum(label, \
462  "expected", G_TYPE_STRING, expected, \
463  "input", G_TYPE_STRING, input, \
464  "input-length", G_TYPE_INT, input_length, \
465  "end-char", G_TYPE_CHAR, end_char, \
466  NULL)
467 
468  ADD_DATUM("Japanese",
469  "+日本語です。+",
470  "+%e6%97%a5%e6%9c%ac%e8%aa%9e%e3%81%a7%e3%81%99%e3%80%82+$yo",
471  -1,
472  '$');
473  ADD_DATUM("invalid", "%1%2%3", "%1%2%3", -1, '\0');
474 
475 #undef ADD_DATUM
476 }
477 
478 void
479 test_urldec(gconstpointer data)
480 {
481  grn_obj buffer;
482  const gchar *expected, *input;
483  gint input_length;
484  gchar end_char;
485 
486  expected = gcut_data_get_string(data, "expected");
487  input = gcut_data_get_string(data, "input");
488  input_length = gcut_data_get_int(data, "input-length");
489  end_char = gcut_data_get_char(data, "end-char");
490 
491  if (input_length < 0) {
492  input_length = strchr(input, '\0') - input;
493  }
494 
495  GRN_TEXT_INIT(&buffer, 0);
496  grn_text_urldec(&context, &buffer, input, input + input_length, end_char);
497  cut_assert_equal_substring(expected,
498  GRN_TEXT_VALUE(&buffer),
499  GRN_TEXT_LEN(&buffer));
500  grn_obj_unlink(&context, &buffer);
501 }
502 
503 void
505 {
506 #define ADD_DATUM(label, expected, input, input_length, delimiter) \
507  gcut_add_datum(label, \
508  "expected", G_TYPE_STRING, expected, \
509  "input", G_TYPE_STRING, input, \
510  "input-length", G_TYPE_INT, input_length, \
511  "delimiter", G_TYPE_STRING, delimiter, \
512  NULL)
513 
514  ADD_DATUM("?", "/d/select?table=users&limit=10", "/d/select", -1, "?");
515  ADD_DATUM("&", "table=users&limit=10", "table=users", -1, "&;");
516  ADD_DATUM(";", "table=users;limit=10", "table=users", -1, "&;");
517  ADD_DATUM("Japanese",
518  " 日本語です。 ",
519  "+%e6%97%a5%e6%9c%ac%e8%aa%9e%e3%81%a7%e3%81%99%e3%80%82+$yo",
520  -1,
521  "$");
522  ADD_DATUM("invalid", "%1%2%3", "%1%2%3", -1, "");
523 
524 #undef ADD_DATUM
525 }
526 
527 void
528 test_cgidec(gconstpointer data)
529 {
530  grn_obj buffer;
531  const gchar *expected, *input;
532  gint input_length;
533  const gchar *delimiter;
534 
535  expected = gcut_data_get_string(data, "expected");
536  input = gcut_data_get_string(data, "input");
537  input_length = gcut_data_get_int(data, "input-length");
538  delimiter = gcut_data_get_string(data, "delimiter");
539 
540  if (input_length < 0) {
541  input_length = strchr(input, '\0') - input;
542  }
543 
544  GRN_TEXT_INIT(&buffer, 0);
545  grn_text_cgidec(&context, &buffer, input, input + input_length, delimiter);
546  cut_assert_equal_substring(expected,
547  GRN_TEXT_VALUE(&buffer),
548  GRN_TEXT_LEN(&buffer));
549  grn_obj_unlink(&context, &buffer);
550 }
551 
552 void
554 {
555 #define ADD_DATUM(label, expected, input) \
556  gcut_add_datum(label, \
557  "expected", G_TYPE_STRING, expected, \
558  "input", G_TYPE_STRING, input, \
559  NULL)
560 
561  ADD_DATUM("no '.' and '..'",
562  "/a/b/c/",
563  "/a/b/c/");
564  ADD_DATUM("with '.' and '..'",
565  "/a/c/d/e",
566  "/a/b/../c/d/././e");
567 
568 #undef ADD_DATUM
569 }
570 
571 void
572 test_url_path_normalize(gconstpointer data)
573 {
574 #define BUFFER_SIZE 1024
575  gchar buffer[BUFFER_SIZE];
576  const gchar *expected, *input;
577 
578  expected = gcut_data_get_string(data, "expected");
579  input = gcut_data_get_string(data, "input");
580 
581  grn_str_url_path_normalize(&context, input, strlen(input),
582  buffer, BUFFER_SIZE);
583  cut_assert_equal_string(expected, buffer);
584 #undef BUFFER_SIZE
585 }
586 
587 void
589 {
590 #define ADD_DATUM(label, error_message, input) \
591  gcut_add_datum(label, \
592  "error-message", G_TYPE_STRING, error_message, \
593  "input", G_TYPE_STRING, input, \
594  NULL)
595 
596  ADD_DATUM("too many '..'",
597  "parent path doesn't exist.",
598  "/a/../../b");
599 
600 #undef ADD_DATUM
601 }
602 
603 void
605 {
606 #define BUFFER_SIZE 1024
607  gchar buffer[BUFFER_SIZE];
608  const gchar *error_message, *input;
609 
610  error_message = gcut_data_get_string(data, "error-message");
611  input = gcut_data_get_string(data, "input");
612 
613  grn_str_url_path_normalize(&context, input, strlen(input),
614  buffer, BUFFER_SIZE);
615  grn_test_assert_error(GRN_INVALID_ARGUMENT, error_message, &context);
616 #undef BUFFER_SIZE
617 }
618 
619 void
621 {
622 #define ADD_DATUM(label, expected, type, ...) \
623  gcut_add_datum(label, \
624  "expected", G_TYPE_STRING, expected, \
625  "type", G_TYPE_INT, type, \
626  __VA_ARGS__);
627 
628  ADD_DATUM("Void", "", GRN_DB_VOID, NULL);
629  ADD_DATUM("Bool", "true", GRN_DB_BOOL,
630  "value", G_TYPE_BOOLEAN, TRUE,
631  NULL);
632  ADD_DATUM("Bool", "false", GRN_DB_BOOL,
633  "value", G_TYPE_BOOLEAN, FALSE,
634  NULL);
635  ADD_DATUM("Int8 (min)", cut_take_printf("%d", INT8_MIN), GRN_DB_INT8,
636  "value", G_TYPE_INT, INT8_MIN,
637  NULL);
638  ADD_DATUM("Int8 (max)", cut_take_printf("%d", INT8_MAX), GRN_DB_INT8,
639  "value", G_TYPE_INT, INT8_MAX,
640  NULL);
641  ADD_DATUM("UInt8 (min)", "0", GRN_DB_UINT8,
642  "value", G_TYPE_UINT, 0,
643  NULL);
644  ADD_DATUM("UInt8 (max)", cut_take_printf("%u", UINT8_MAX), GRN_DB_UINT8,
645  "value", G_TYPE_UINT, UINT8_MAX,
646  NULL);
647  ADD_DATUM("Int16 (min)", cut_take_printf("%d", INT16_MIN), GRN_DB_INT16,
648  "value", G_TYPE_INT, INT16_MIN,
649  NULL);
650  ADD_DATUM("Int16 (max)", cut_take_printf("%d", INT16_MAX), GRN_DB_INT16,
651  "value", G_TYPE_INT, INT16_MAX,
652  NULL);
653  ADD_DATUM("UInt16 (min)", "0", GRN_DB_UINT16,
654  "value", G_TYPE_UINT, 0,
655  NULL);
656  ADD_DATUM("UInt16 (max)", cut_take_printf("%u", UINT16_MAX), GRN_DB_UINT16,
657  "value", G_TYPE_UINT, UINT16_MAX,
658  NULL);
659  ADD_DATUM("Int32 (min)", cut_take_printf("%d", INT32_MIN), GRN_DB_INT32,
660  "value", G_TYPE_INT, INT32_MIN,
661  NULL);
662  ADD_DATUM("Int32 (max)", cut_take_printf("%d", INT32_MAX), GRN_DB_INT32,
663  "value", G_TYPE_INT, INT32_MAX,
664  NULL);
665  ADD_DATUM("UInt32 (min)", "0", GRN_DB_UINT32,
666  "value", G_TYPE_UINT, 0,
667  NULL);
668  ADD_DATUM("UInt32 (max)", cut_take_printf("%u", UINT32_MAX), GRN_DB_UINT32,
669  "value", G_TYPE_UINT, UINT32_MAX,
670  NULL);
671  ADD_DATUM("Int64 (min)",
672  cut_take_printf("%" G_GINT64_FORMAT, INT64_MIN), GRN_DB_INT64,
673  "value", G_TYPE_INT64, INT64_MIN,
674  NULL);
675  ADD_DATUM("Int64 (max)",
676  cut_take_printf("%" G_GINT64_FORMAT, INT64_MAX), GRN_DB_INT64,
677  "value", G_TYPE_INT64, INT64_MAX,
678  NULL);
679  ADD_DATUM("UInt64 (min)", "0", GRN_DB_UINT64,
680  "value", G_TYPE_UINT64, G_GUINT64_CONSTANT(0),
681  NULL);
682  ADD_DATUM("UInt64 (max)",
683  cut_take_printf("%" G_GUINT64_FORMAT, UINT64_MAX), GRN_DB_UINT64,
684  "value", G_TYPE_UINT64, UINT64_MAX,
685  NULL);
686  ADD_DATUM("Float", cut_take_printf("%g", 2.9), GRN_DB_FLOAT,
687  "value", G_TYPE_DOUBLE, 2.9,
688  NULL);
689  ADD_DATUM("Time", "1271053050.21148", GRN_DB_TIME,
690  "value", G_TYPE_INT64, GRN_TIME_PACK(1271053050, 211479),
691  NULL);
692  ADD_DATUM("ShortText",
693  "\"\\\"'\\\\aAzZ09 \\n\\t\\r日本語\"", GRN_DB_SHORT_TEXT,
694  "value", G_TYPE_STRING, "\"'\\aAzZ09 \n\t\r日本語",
695  NULL);
696  ADD_DATUM("Text",
697  "\"\\\"'\\\\aAzZ09 \\n\\t\\r日本語\"", GRN_DB_TEXT,
698  "value", G_TYPE_STRING, "\"'\\aAzZ09 \n\t\r日本語",
699  NULL);
700  ADD_DATUM("LongText",
701  "\"\\\"'\\\\aAzZ09 \\n\\t\\r日本語\"", GRN_DB_LONG_TEXT,
702  "value", G_TYPE_STRING, "\"'\\aAzZ09 \n\t\r日本語",
703  NULL);
704  ADD_DATUM("TokyoGeoPoint", "\"35681396x139766049\"", GRN_DB_TOKYO_GEO_POINT,
705  "latitude", G_TYPE_INT, 35681396,
706  "longitude", G_TYPE_INT, 139766049,
707  NULL);
708  ADD_DATUM("WGS84GeoPoint", "\"36032548x140164867\"", GRN_DB_WGS84_GEO_POINT,
709  "latitude", G_TYPE_INT, 36032548,
710  "longitude", G_TYPE_INT, 140164867,
711  NULL);
712 
713  /* FIXME* unknown bulk */
714  /* FIXME: GRN_UVECTOR */
715  /* FIXME: GRN_VECTOR */
716  /* FIXME: table with format */
717  /* FIXME: table without format */
718  /* FIXME: grn_text_atoj */
719 
720 #undef ADD_DATUM
721 }
722 
723 static void
724 construct_object(gconstpointer data, grn_builtin_type type, grn_obj *object)
725 {
726  switch (type) {
727  case GRN_DB_VOID:
728  GRN_VOID_INIT(object);
729  break;
730  case GRN_DB_BOOL:
731  GRN_BOOL_INIT(object, 0);
732  GRN_BOOL_SET(&context, object, gcut_data_get_boolean(data, "value"));
733  break;
734  case GRN_DB_INT8:
735  GRN_INT8_INIT(object, 0);
736  GRN_INT8_SET(&context, object, gcut_data_get_int(data, "value"));
737  break;
738  case GRN_DB_UINT8:
739  GRN_UINT8_INIT(object, 0);
740  GRN_UINT8_SET(&context, object, gcut_data_get_uint(data, "value"));
741  break;
742  case GRN_DB_INT16:
743  GRN_INT16_INIT(object, 0);
744  GRN_INT16_SET(&context, object, gcut_data_get_int(data, "value"));
745  break;
746  case GRN_DB_UINT16:
747  GRN_UINT16_INIT(object, 0);
748  GRN_UINT16_SET(&context, object, gcut_data_get_uint(data, "value"));
749  break;
750  case GRN_DB_INT32:
751  GRN_INT32_INIT(object, 0);
752  GRN_INT32_SET(&context, object, gcut_data_get_int(data, "value"));
753  break;
754  case GRN_DB_UINT32:
755  GRN_UINT32_INIT(object, 0);
756  GRN_UINT32_SET(&context, object, gcut_data_get_uint(data, "value"));
757  break;
758  case GRN_DB_INT64:
759  GRN_INT64_INIT(object, 0);
760  GRN_INT64_SET(&context, object, gcut_data_get_int64(data, "value"));
761  break;
762  case GRN_DB_UINT64:
763  GRN_UINT64_INIT(object, 0);
764  GRN_UINT64_SET(&context, object, gcut_data_get_uint64(data, "value"));
765  break;
766  case GRN_DB_FLOAT:
767  GRN_FLOAT_INIT(object, 0);
768  GRN_FLOAT_SET(&context, object, gcut_data_get_double(data, "value"));
769  break;
770  case GRN_DB_TIME:
771  GRN_TIME_INIT(object, 0);
772  GRN_TIME_SET(&context, object, gcut_data_get_int64(data, "value"));
773  break;
774  case GRN_DB_SHORT_TEXT:
775  GRN_SHORT_TEXT_INIT(object, 0);
776  GRN_TEXT_SETS(&context, object, gcut_data_get_string(data, "value"));
777  break;
778  case GRN_DB_TEXT:
779  GRN_TEXT_INIT(object, 0);
780  GRN_TEXT_SETS(&context, object, gcut_data_get_string(data, "value"));
781  break;
782  case GRN_DB_LONG_TEXT:
783  GRN_LONG_TEXT_INIT(object, 0);
784  GRN_TEXT_SETS(&context, object, gcut_data_get_string(data, "value"));
785  break;
787  GRN_TOKYO_GEO_POINT_INIT(object, 0);
788  GRN_GEO_POINT_SET(&context, object,
789  gcut_data_get_int(data, "latitude"),
790  gcut_data_get_int(data, "longitude"));
791  break;
793  GRN_WGS84_GEO_POINT_INIT(object, 0);
794  GRN_GEO_POINT_SET(&context, object,
795  gcut_data_get_int(data, "latitude"),
796  gcut_data_get_int(data, "longitude"));
797  break;
798  default:
799  cut_fail("unknown type: %d", type);
800  break;
801  }
802 }
803 
804 void
805 test_text_otoj(gconstpointer data)
806 {
807  grn_obj object, json;
809  const gchar *expected, *actual;
810 
811  GRN_TEXT_INIT(&json, 0);
812 
813  expected = gcut_data_get_string(data, "expected");
814  type = gcut_data_get_int(data, "type");
815  cut_trace(construct_object(data, type, &object));
816  grn_text_otoj(&context, &json, &object, NULL);
817  grn_obj_unlink(&context, &object);
818  actual = cut_take_printf("%.*s",
819  (int)GRN_TEXT_LEN(&json), GRN_TEXT_VALUE(&json));
820  grn_obj_unlink(&context, &json);
821  cut_assert_equal_string(expected, actual);
822 }
823 
824 void
826 {
827 #define ADD_DATUM(label, expected, input, encoding) \
828  gcut_add_datum(label, \
829  "expected", GCUT_TYPE_SIZE, expected, \
830  "input", G_TYPE_STRING, input, \
831  "encoding", G_TYPE_INT, encoding, \
832  NULL)
833 
834 #define ADD_DATUM_ALL_ENCODING(label, expected, input) \
835  ADD_DATUM(label " (none) <" input ">", \
836  expected, input, GRN_ENC_NONE); \
837  ADD_DATUM(label " (EUC-JP) <" input ">", \
838  expected, input, GRN_ENC_EUC_JP); \
839  ADD_DATUM(label " (UTF-8) <" input ">", \
840  expected, input, GRN_ENC_UTF8); \
841  ADD_DATUM(label " (Shift_JIS) <" input ">", \
842  expected, input, GRN_ENC_SJIS); \
843  ADD_DATUM(label " (Latin1) <" input ">", \
844  expected, input, GRN_ENC_LATIN1); \
845  ADD_DATUM(label " (KOI8R) <" input ">", \
846  expected, input, GRN_ENC_KOI8R);
847 
848 #define ADD_DATUM_JAPANESE(label, expected, input) \
849  ADD_DATUM("Japanese: " label " (EUC-JP) <" input ">", \
850  expected, cut_take_convert(input, "eucJP", "UTF-8"), \
851  GRN_ENC_EUC_JP); \
852  ADD_DATUM("Japanese: " label " (UTF-8) <" input ">", \
853  expected, input, GRN_ENC_UTF8); \
854  ADD_DATUM("Japanese: " label " (Shift_JIS) <" input ">", \
855  expected, cut_take_convert(input, "CP932", "UTF-8"), \
856  GRN_ENC_SJIS);
857 
858  ADD_DATUM_ALL_ENCODING("half width", 11, "ABC! & ABC!");
859 
860  ADD_DATUM_JAPANESE("with newlines",
861  209,
862  "groongaは組み込み型の全文検索エンジンです。\n"
863  "DBMSやスクリプト言語処理系等に組み込むこと\n"
864  "によって、その全文検索機能を強化することが\n"
865  "できます。n-gramインデックスと単語インデッ\n"
866  "クスの特徴を兼ね備えた、高速かつ高精度な転\n"
867  "置インデックスタイプのエンジンです。コンパ\n"
868  "クトな実装ですが、大規模な文書量と検索要求\n"
869  "を処理できるように設計されています。また、\n"
870  "純粋なn-gramインデックスの作成も可能です。");
871 
872 #undef ADD_DATUM_JAPANESE
873 #undef ADD_DATUM_ALL_ENCODING
874 #undef ADD_DATUM
875 }
876 
877 void
878 test_str_len(gconstpointer data)
879 {
880  size_t result, expected;
881  const gchar *input;
882  const char *input_end;
883  grn_encoding encoding;
884 
885  input = gcut_data_get_string(data, "input");
886  input_end = strchr(input, '\0');
887  encoding = gcut_data_get_int(data, "encoding");
888  result = grn_str_len(&context, input, encoding, &input_end);
889  expected = gcut_data_get_size(data, "expected");
890  cut_assert_equal_size(expected, result);
891 }
892 
893 void
895 {
896 #define ADD_DATUM(label, grn_type, g_type, expected, input) \
897  gcut_add_datum(label " - " #expected, \
898  "type", G_TYPE_INT, grn_type, \
899  "expected", g_type, expected, \
900  "input", G_TYPE_STRING, input, \
901  NULL)
902 
903  ADD_DATUM("int32",
904  GRN_DB_INT32,
905  G_TYPE_INT, 344494643,
906  "+344494643");
907  ADD_DATUM("int32 - negative",
908  GRN_DB_INT32,
909  G_TYPE_INT, -344494643,
910  "-344494643");
911  ADD_DATUM("uint32",
913  G_TYPE_UINT, (guint32)G_GUINT64_CONSTANT(2147483648),
914  "2147483648");
915  ADD_DATUM("int64",
916  GRN_DB_INT64,
917  G_TYPE_INT64, G_GINT64_CONSTANT(344494643000000),
918  "344494643000000");
919  ADD_DATUM("int64 - negative",
920  GRN_DB_INT64,
921  G_TYPE_INT64, G_GINT64_CONSTANT(-344494643000000),
922  "-344494643000000");
923  /* TODO: support uint64.
924  ADD_DATUM("uint64",
925  GRN_DB_UINT64,
926  G_TYPE_UINT64, G_GUINT64_CONSTANT(9223372036854775808),
927  "9223372036854775808");
928  */
929  ADD_DATUM("float",
930  GRN_DB_FLOAT,
931  G_TYPE_DOUBLE, 3.44494643e14,
932  "3.44494643e14");
933 
934 #undef ADD_DATUM
935 }
936 
937 void
938 test_aton(gconstpointer data)
939 {
940  const gchar *input, *input_end, *rest;
942  grn_rc rc;
943 
944  type = gcut_data_get_int(data, "type");
945  input = gcut_data_get_string(data, "input");
946  input_end = strchr(input, '\0');
947  rc = grn_aton(&context, input, input_end, &rest, &buffer);
948  grn_test_assert(rc);
949  cut_assert_equal_string(input_end, rest);
950  cut_assert_equal_int(type, buffer.header.domain);
951  switch (type) {
952  case GRN_DB_INT32 :
953  cut_assert_equal_int(gcut_data_get_int(data, "expected"),
954  GRN_INT32_VALUE(&buffer));
955  break;
956  case GRN_DB_UINT32 :
957  cut_assert_equal_uint(gcut_data_get_uint(data, "expected"),
958  GRN_UINT32_VALUE(&buffer));
959  break;
960  case GRN_DB_INT64 :
961  gcut_assert_equal_int64(gcut_data_get_int64(data, "expected"),
962  GRN_INT64_VALUE(&buffer));
963  break;
964  case GRN_DB_UINT64 :
965  gcut_assert_equal_uint64(gcut_data_get_uint64(data, "expected"),
966  GRN_UINT64_VALUE(&buffer));
967  break;
968  case GRN_DB_FLOAT :
969  cut_assert_equal_double(gcut_data_get_double(data, "expected"),
970  0.000001,
971  GRN_FLOAT_VALUE(&buffer));
972  break;
973  default :
974  cut_error("unknown type: %d", type);
975  break;
976  }
977 }
978 
979 void
981 {
982 #define ADD_DATUM(label, expected, input, length) \
983  gcut_add_datum(label " - " #input " [" #length "] (" expected ")", \
984  "expected", G_TYPE_STRING, expected, \
985  "input", G_TYPE_UINT, input, \
986  "length", G_TYPE_UINT, length, \
987  NULL)
988 
989  ADD_DATUM("no alphabet",
990  "9", 9, 1);
991  ADD_DATUM("alphabet",
992  "A", 10, 1);
993  ADD_DATUM("many number of digits",
994  "0013579BDF", 324508639, 10);
995 
996 #undef ADD_DATUM
997 }
998 
999 void
1000 test_itoh(gconstpointer data)
1001 {
1002  const gchar *expected;
1003  gchar *actual;
1004  guint input;
1005  guint length;
1006 
1007  input = gcut_data_get_uint(data, "input");
1008  length = gcut_data_get_uint(data, "length");
1009  expected = gcut_data_get_string(data, "expected");
1010 
1011  actual = g_new0(gchar, length);
1012  cut_take(actual, g_free);
1013  grn_itoh(input, actual, length);
1014  cut_assert_equal_substring(expected, actual, length);
1015 }