26 #include <glib/gstdio.h>
28 #include "../lib/grn-assertions.h"
61 static const gchar text_ja_utf8[] =
62 "Groongaは組み込み型の全文検索エンジンです。DBMSやスクリプト言語処理系等に\n"
63 "組み込むことによって、その全文検索機能を強化することができます。n-gram\n"
64 "インデックスと単語インデックスの特徴を兼ね備えた、高速かつ高精度な転置\n"
65 "インデックスタイプのエンジンです。コンパクトな実装ですが、大規模な文書\n"
66 "量と検索要求を処理できるように設計されています。また、純粋なn-gramイン\n"
69 static const gchar normalized_text_ja_utf8[] =
70 "groongaは組み込み型の全文検索エンジンです。dbmsやスクリプト言語処理系等に"
71 "組み込むことによって、その全文検索機能を強化することができます。n-gram"
72 "インデックスと単語インデックスの特徴を兼ね備えた、高速かつ高精度な転置"
73 "インデックスタイプのエンジンです。コンパクトな実装ですが、大規模な文書"
74 "量と検索要求を処理できるように設計されています。また、純粋なn-gramイン"
96 #define ADD_DATUM_WITH_ENCODING(label, expected, input, encoding) \
97 gcut_add_datum(label, \
98 "expected", G_TYPE_STRING, expected, \
99 "input", G_TYPE_STRING, input, \
100 "encoding", G_TYPE_INT, encoding, \
102 #define ADD_DATUM_JAPANESE(label, expected, input) \
103 ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">", \
104 expected, input, GRN_ENC_UTF8); \
105 ADD_DATUM_WITH_ENCODING("Japanese (eucJP): " label " <" input ">", \
106 expected, input, GRN_ENC_EUC_JP); \
107 ADD_DATUM_WITH_ENCODING("Japanese (Shift_JIS): " label " <" input ">",\
108 expected, input, GRN_ENC_SJIS)
110 #define ADD_DATUM_JAPANESE_NO_SJIS(label, expected, input) \
111 ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">",\
112 expected, input, GRN_ENC_UTF8); \
113 ADD_DATUM_WITH_ENCODING("Japanese (eucJP): " label " <" input ">", \
114 expected, input, GRN_ENC_EUC_JP)
116 #define ADD_DATUM_JAPANESE_UTF_8(label, expected, input) \
117 ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">", \
118 expected, input, GRN_ENC_UTF8)
121 "groongaは組み込み型の全文検索エンジンです。"
122 "dbmsやスクリプト言語処理系等に組み込むこと"
123 "によって、その全文検索機能を強化することが"
124 "できます。n-gramインデックスと単語インデッ"
125 "クスの特徴を兼ね備えた、高速かつ高精度な転"
126 "置インデックスタイプのエンジンです。コンパ"
127 "クトな実装ですが、大規模な文書量と検索要求"
128 "を処理できるように設計されています。また、"
129 "純粋なn-gramインデックスの作成も可能です。",
131 "groongaは組み込み型の全文検索エンジンです。\n"
132 "DBMSやスクリプト言語処理系等に組み込むこと\n"
133 "によって、その全文検索機能を強化することが\n"
134 "できます。n-gramインデックスと単語インデッ\n"
135 "クスの特徴を兼ね備えた、高速かつ高精度な転\n"
136 "置インデックスタイプのエンジンです。コンパ\n"
137 "クトな実装ですが、大規模な文書量と検索要求\n"
138 "を処理できるように設計されています。また、\n"
139 "純粋なn-gramインデックスの作成も可能です。");
142 "キロメートルキロメートルキロメートルキロメートル",
149 #undef ADD_DATUM_JAPANESE_NO_SJIS
150 #undef ADD_DATUM_JAPANESE
151 #undef ADD_DATUM_WITH_ENCODING
155 convert_encoding(
const gchar *utf8,
grn_encoding encoding)
157 const gchar *encoded = NULL;
158 GError *
error = NULL;
167 encoded = cut_take_string(g_convert(utf8, -1,
"eucJP",
"UTF-8",
168 NULL, NULL, &error));
171 encoded = cut_take_string(g_convert(utf8, -1,
"CP932",
"UTF-8",
172 NULL, NULL, &error));
175 encoded = cut_take_string(g_convert(utf8, -1,
"CP1252",
"UTF-8",
176 NULL, NULL, &error));
179 encoded = cut_take_string(g_convert(utf8, -1,
"KOI8-R",
"UTF-8",
180 NULL, NULL, &error));
183 gcut_assert_error(error);
191 const gchar *utf8_expected, *encoded_expected;
192 const gchar *utf8_input, *encoded_input;
194 const gchar *normalized_text;
195 guint normalized_text_length;
196 guint normalized_text_n_characters;
200 encoding = gcut_data_get_int(data,
"encoding");
203 utf8_input = gcut_data_get_string(data,
"input");
204 encoded_input = convert_encoding(utf8_input, encoding);
207 strlen(encoded_input),
212 &normalized_text_length,
213 &normalized_text_n_characters);
214 normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
217 utf8_expected = gcut_data_get_string(data,
"expected");
218 encoded_expected = convert_encoding(utf8_expected, encoding);
219 cut_assert_equal_string(encoded_expected, normalized_text);
220 cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length);
221 cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1),
222 normalized_text_n_characters);
228 #define ADD_DATUM(label, input, input_encoding, input_length, \
230 gcut_add_datum(label, \
231 "input", G_TYPE_STRING, input, \
232 "input-encoding", G_TYPE_INT, input_encoding, \
233 "input-length", G_TYPE_INT, input_length, \
234 "context-encoding", G_TYPE_INT, context_encoding, \
240 #define ADD_DATUM_JAPANESE_NON_UTF8(label, input, input_length) \
241 ADD_DATUM("eucJP with UTF-8 context: " label " <" input ">", \
242 input, GRN_ENC_EUC_JP, input_length, GRN_ENC_UTF8); \
243 ADD_DATUM("ShiftJIS with UTF-8 context : " label " <" input ">", \
244 input, GRN_ENC_SJIS, input_length, GRN_ENC_UTF8);
248 #undef ADD_DATUM_JAPANESE_NON_UTF8
257 const gchar *input, *encoded_input;
258 const gchar *normalized_text;
261 guint normalized_text_length, normalized_text_n_characters;
264 context_encoding = gcut_data_get_int(data,
"context-encoding");
267 input = gcut_data_get_string(data,
"input");
268 input_encoding = gcut_data_get_int(data,
"input-encoding");
269 input_length = gcut_data_get_int(data,
"input-length");
270 encoded_input = convert_encoding(input, input_encoding);
271 if (input_length < 0) {
272 input_length = strlen(encoded_input);
278 &normalized_text_length,
279 &normalized_text_n_characters);
280 normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
283 cut_assert_equal_string(
"", normalized_text);
284 cut_assert_equal_int(0, normalized_text_length);
285 cut_assert_equal_int(0, normalized_text_n_characters);
291 #define ADD_DATUM(label, expected, input, flags) \
292 gcut_add_datum(label, \
293 "expected", G_TYPE_STRING, expected, \
294 "input", G_TYPE_STRING, input, \
295 "flags", G_TYPE_INT, flags, \
298 #define UFFFE_IN_UTF8 "\xef\xbf\xbe"
327 const gchar *expected;
329 const gchar *normalized;
330 unsigned int length_in_bytes;
335 input = gcut_data_get_string(data,
"input");
336 flags |= gcut_data_get_int(data,
"flags");
341 string =
grn_string_open(&context, input, strlen(input), normalizer, flags);
343 &normalized, &length_in_bytes, NULL);
344 normalized = cut_take_strndup(normalized, length_in_bytes);
347 expected = gcut_data_get_string(data,
"expected");
348 cut_assert_equal_string(expected, normalized);
354 #define ADD_DATUM_WITH_ENCODING(label, input, input_length, encoding) \
355 gcut_add_datum(label, \
356 "input", G_TYPE_STRING, input, \
357 "input-length", G_TYPE_INT, input_length, \
358 "encoding", G_TYPE_INT, encoding, \
361 #define ADD_DATUM(label, input, input_length) \
362 ADD_DATUM_WITH_ENCODING("(None): " label " <" input ">", \
363 input, input_length, GRN_ENC_NONE); \
364 ADD_DATUM_WITH_ENCODING("(UTF-8): " label " <" input ">", \
365 input, input_length, GRN_ENC_UTF8); \
366 ADD_DATUM_WITH_ENCODING("(eucJP): " label " <" input ">", \
367 input, input_length, GRN_ENC_EUC_JP); \
368 ADD_DATUM_WITH_ENCODING("(Shift_JIS): " label " <" input ">", \
369 input, input_length, GRN_ENC_SJIS); \
370 ADD_DATUM_WITH_ENCODING("(Latin1): " label " <" input ">", \
371 input, input_length, GRN_ENC_LATIN1); \
372 ADD_DATUM_WITH_ENCODING("(KOI8R): " label " <" input ">", \
373 input, input_length, GRN_ENC_KOI8R)
375 #define ADD_DATUM_JAPANESE(label, input, input_length) \
376 ADD_DATUM_WITH_ENCODING("Japanese (UTF-8): " label " <" input ">", \
377 input, input_length, GRN_ENC_UTF8); \
378 ADD_DATUM_WITH_ENCODING("Japanese (eucJP): " label " <" input ">", \
379 input, input_length, GRN_ENC_EUC_JP); \
380 ADD_DATUM_WITH_ENCODING("Japanese (Shift_JIS): " label " <" input ">",\
381 input, input_length, GRN_ENC_SJIS)
387 #undef ADD_DATUM_JAPANESE
389 #undef ADD_DATUM_WITH_ENCODING
395 const gchar *input, *encoded_input, *encoded_input_end;
399 encoding = gcut_data_get_int(data,
"encoding");
402 input = gcut_data_get_string(data,
"input");
403 input_length = gcut_data_get_int(data,
"input-length");
404 encoded_input = convert_encoding(input, encoding);
405 if (input_length < 0) {
406 input_length = strlen(encoded_input);
408 encoded_input_end = encoded_input + input_length;
417 #define ADD_DATUM(label, expected, input, input_length) \
418 gcut_add_datum(label, \
419 "expected", G_TYPE_STRING, expected, \
420 "input", G_TYPE_STRING, input, \
421 "input-length", G_TYPE_INT, input_length, \
425 "%20%E6%97%A5%E6%9C%AC%E8%AA%9E%E3%81%A7%E3%81%99%E3%80%82%20",
428 ADD_DATUM(
"percent",
"%251%252%253",
"%1%2%3", -1);
429 ADD_DATUM(
"plus",
"%2B%20%2B",
"+ +", -1);
438 const gchar *expected, *input;
441 expected = gcut_data_get_string(data,
"expected");
442 input = gcut_data_get_string(data,
"input");
443 input_length = gcut_data_get_int(data,
"input-length");
445 if (input_length < 0) {
446 input_length = strchr(input,
'\0') - input;
451 cut_assert_equal_substring(expected,
460 #define ADD_DATUM(label, expected, input, input_length, end_char) \
461 gcut_add_datum(label, \
462 "expected", G_TYPE_STRING, expected, \
463 "input", G_TYPE_STRING, input, \
464 "input-length", G_TYPE_INT, input_length, \
465 "end-char", G_TYPE_CHAR, end_char, \
470 "+%e6%97%a5%e6%9c%ac%e8%aa%9e%e3%81%a7%e3%81%99%e3%80%82+$yo",
473 ADD_DATUM(
"invalid",
"%1%2%3",
"%1%2%3", -1,
'\0');
482 const gchar *expected, *input;
486 expected = gcut_data_get_string(data,
"expected");
487 input = gcut_data_get_string(data,
"input");
488 input_length = gcut_data_get_int(data,
"input-length");
489 end_char = gcut_data_get_char(data,
"end-char");
491 if (input_length < 0) {
492 input_length = strchr(input,
'\0') - input;
496 grn_text_urldec(&context, &buffer, input, input + input_length, end_char);
497 cut_assert_equal_substring(expected,
506 #define ADD_DATUM(label, expected, input, input_length, delimiter) \
507 gcut_add_datum(label, \
508 "expected", G_TYPE_STRING, expected, \
509 "input", G_TYPE_STRING, input, \
510 "input-length", G_TYPE_INT, input_length, \
511 "delimiter", G_TYPE_STRING, delimiter, \
514 ADD_DATUM(
"?",
"/d/select?table=users&limit=10",
"/d/select", -1,
"?");
515 ADD_DATUM(
"&",
"table=users&limit=10",
"table=users", -1,
"&;");
516 ADD_DATUM(
";",
"table=users;limit=10",
"table=users", -1,
"&;");
519 "+%e6%97%a5%e6%9c%ac%e8%aa%9e%e3%81%a7%e3%81%99%e3%80%82+$yo",
522 ADD_DATUM(
"invalid",
"%1%2%3",
"%1%2%3", -1,
"");
531 const gchar *expected, *input;
533 const gchar *delimiter;
535 expected = gcut_data_get_string(data,
"expected");
536 input = gcut_data_get_string(data,
"input");
537 input_length = gcut_data_get_int(data,
"input-length");
538 delimiter = gcut_data_get_string(data,
"delimiter");
540 if (input_length < 0) {
541 input_length = strchr(input,
'\0') - input;
545 grn_text_cgidec(&context, &buffer, input, input + input_length, delimiter);
546 cut_assert_equal_substring(expected,
555 #define ADD_DATUM(label, expected, input) \
556 gcut_add_datum(label, \
557 "expected", G_TYPE_STRING, expected, \
558 "input", G_TYPE_STRING, input, \
566 "/a/b/../c/d/././e");
574 #define BUFFER_SIZE 1024
576 const gchar *expected, *input;
578 expected = gcut_data_get_string(data,
"expected");
579 input = gcut_data_get_string(data,
"input");
583 cut_assert_equal_string(expected, buffer);
590 #define ADD_DATUM(label, error_message, input) \
591 gcut_add_datum(label, \
592 "error-message", G_TYPE_STRING, error_message, \
593 "input", G_TYPE_STRING, input, \
597 "parent path doesn't exist.",
606 #define BUFFER_SIZE 1024
608 const gchar *error_message, *input;
610 error_message = gcut_data_get_string(data,
"error-message");
611 input = gcut_data_get_string(data,
"input");
622 #define ADD_DATUM(label, expected, type, ...) \
623 gcut_add_datum(label, \
624 "expected", G_TYPE_STRING, expected, \
625 "type", G_TYPE_INT, type, \
630 "value", G_TYPE_BOOLEAN,
TRUE,
633 "value", G_TYPE_BOOLEAN,
FALSE,
642 "value", G_TYPE_UINT, 0,
645 "value", G_TYPE_UINT, UINT8_MAX,
654 "value", G_TYPE_UINT, 0,
657 "value", G_TYPE_UINT, UINT16_MAX,
666 "value", G_TYPE_UINT, 0,
680 "value", G_TYPE_UINT64, G_GUINT64_CONSTANT(0),
683 cut_take_printf(
"%" G_GUINT64_FORMAT, UINT64_MAX),
GRN_DB_UINT64,
684 "value", G_TYPE_UINT64, UINT64_MAX,
687 "value", G_TYPE_DOUBLE, 2.9,
694 "value", G_TYPE_STRING,
"\"'\\aAzZ09 \n\t\r日本語",
698 "value", G_TYPE_STRING,
"\"'\\aAzZ09 \n\t\r日本語",
702 "value", G_TYPE_STRING,
"\"'\\aAzZ09 \n\t\r日本語",
705 "latitude", G_TYPE_INT, 35681396,
706 "longitude", G_TYPE_INT, 139766049,
709 "latitude", G_TYPE_INT, 36032548,
710 "longitude", G_TYPE_INT, 140164867,
732 GRN_BOOL_SET(&context,
object, gcut_data_get_boolean(data,
"value"));
736 GRN_INT8_SET(&context,
object, gcut_data_get_int(data,
"value"));
740 GRN_UINT8_SET(&context,
object, gcut_data_get_uint(data,
"value"));
744 GRN_INT16_SET(&context,
object, gcut_data_get_int(data,
"value"));
748 GRN_UINT16_SET(&context,
object, gcut_data_get_uint(data,
"value"));
752 GRN_INT32_SET(&context,
object, gcut_data_get_int(data,
"value"));
756 GRN_UINT32_SET(&context,
object, gcut_data_get_uint(data,
"value"));
760 GRN_INT64_SET(&context,
object, gcut_data_get_int64(data,
"value"));
764 GRN_UINT64_SET(&context,
object, gcut_data_get_uint64(data,
"value"));
768 GRN_FLOAT_SET(&context,
object, gcut_data_get_double(data,
"value"));
772 GRN_TIME_SET(&context,
object, gcut_data_get_int64(data,
"value"));
776 GRN_TEXT_SETS(&context,
object, gcut_data_get_string(data,
"value"));
780 GRN_TEXT_SETS(&context,
object, gcut_data_get_string(data,
"value"));
784 GRN_TEXT_SETS(&context,
object, gcut_data_get_string(data,
"value"));
789 gcut_data_get_int(data,
"latitude"),
790 gcut_data_get_int(data,
"longitude"));
795 gcut_data_get_int(data,
"latitude"),
796 gcut_data_get_int(data,
"longitude"));
799 cut_fail(
"unknown type: %d", type);
809 const gchar *expected, *actual;
813 expected = gcut_data_get_string(data,
"expected");
814 type = gcut_data_get_int(data,
"type");
815 cut_trace(construct_object(data, type, &
object));
818 actual = cut_take_printf(
"%.*s",
821 cut_assert_equal_string(expected, actual);
827 #define ADD_DATUM(label, expected, input, encoding) \
828 gcut_add_datum(label, \
829 "expected", GCUT_TYPE_SIZE, expected, \
830 "input", G_TYPE_STRING, input, \
831 "encoding", G_TYPE_INT, encoding, \
834 #define ADD_DATUM_ALL_ENCODING(label, expected, input) \
835 ADD_DATUM(label " (none) <" input ">", \
836 expected, input, GRN_ENC_NONE); \
837 ADD_DATUM(label " (EUC-JP) <" input ">", \
838 expected, input, GRN_ENC_EUC_JP); \
839 ADD_DATUM(label " (UTF-8) <" input ">", \
840 expected, input, GRN_ENC_UTF8); \
841 ADD_DATUM(label " (Shift_JIS) <" input ">", \
842 expected, input, GRN_ENC_SJIS); \
843 ADD_DATUM(label " (Latin1) <" input ">", \
844 expected, input, GRN_ENC_LATIN1); \
845 ADD_DATUM(label " (KOI8R) <" input ">", \
846 expected, input, GRN_ENC_KOI8R);
848 #define ADD_DATUM_JAPANESE(label, expected, input) \
849 ADD_DATUM("Japanese: " label " (EUC-JP) <" input ">", \
850 expected, cut_take_convert(input, "eucJP", "UTF-8"), \
852 ADD_DATUM("Japanese: " label " (UTF-8) <" input ">", \
853 expected, input, GRN_ENC_UTF8); \
854 ADD_DATUM("Japanese: " label " (Shift_JIS) <" input ">", \
855 expected, cut_take_convert(input, "CP932", "UTF-8"), \
862 "groongaは組み込み型の全文検索エンジンです。\n"
863 "DBMSやスクリプト言語処理系等に組み込むこと\n"
864 "によって、その全文検索機能を強化することが\n"
865 "できます。n-gramインデックスと単語インデッ\n"
866 "クスの特徴を兼ね備えた、高速かつ高精度な転\n"
867 "置インデックスタイプのエンジンです。コンパ\n"
868 "クトな実装ですが、大規模な文書量と検索要求\n"
869 "を処理できるように設計されています。また、\n"
870 "純粋なn-gramインデックスの作成も可能です。");
872 #undef ADD_DATUM_JAPANESE
873 #undef ADD_DATUM_ALL_ENCODING
880 size_t result, expected;
882 const char *input_end;
885 input = gcut_data_get_string(data,
"input");
886 input_end = strchr(input,
'\0');
887 encoding = gcut_data_get_int(data,
"encoding");
888 result =
grn_str_len(&context, input, encoding, &input_end);
889 expected = gcut_data_get_size(data,
"expected");
890 cut_assert_equal_size(expected, result);
896 #define ADD_DATUM(label, grn_type, g_type, expected, input) \
897 gcut_add_datum(label " - " #expected, \
898 "type", G_TYPE_INT, grn_type, \
899 "expected", g_type, expected, \
900 "input", G_TYPE_STRING, input, \
905 G_TYPE_INT, 344494643,
909 G_TYPE_INT, -344494643,
913 G_TYPE_UINT, (guint32)G_GUINT64_CONSTANT(2147483648),
917 G_TYPE_INT64, G_GINT64_CONSTANT(344494643000000),
921 G_TYPE_INT64, G_GINT64_CONSTANT(-344494643000000),
931 G_TYPE_DOUBLE, 3.44494643e14,
940 const gchar *input, *input_end, *rest;
944 type = gcut_data_get_int(data,
"type");
945 input = gcut_data_get_string(data,
"input");
946 input_end = strchr(input,
'\0');
947 rc =
grn_aton(&context, input, input_end, &rest, &buffer);
949 cut_assert_equal_string(input_end, rest);
953 cut_assert_equal_int(gcut_data_get_int(data,
"expected"),
957 cut_assert_equal_uint(gcut_data_get_uint(data,
"expected"),
961 gcut_assert_equal_int64(gcut_data_get_int64(data,
"expected"),
965 gcut_assert_equal_uint64(gcut_data_get_uint64(data,
"expected"),
969 cut_assert_equal_double(gcut_data_get_double(data,
"expected"),
974 cut_error(
"unknown type: %d", type);
982 #define ADD_DATUM(label, expected, input, length) \
983 gcut_add_datum(label " - " #input " [" #length "] (" expected ")", \
984 "expected", G_TYPE_STRING, expected, \
985 "input", G_TYPE_UINT, input, \
986 "length", G_TYPE_UINT, length, \
994 "0013579BDF", 324508639, 10);
1002 const gchar *expected;
1007 input = gcut_data_get_uint(data,
"input");
1008 length = gcut_data_get_uint(data,
"length");
1009 expected = gcut_data_get_string(data,
"expected");
1011 actual = g_new0(gchar, length);
1012 cut_take(actual, g_free);
1014 cut_assert_equal_substring(expected, actual, length);