22 #include <glib/gstdio.h>
24 #include "../lib/grn-assertions.h"
51 static gchar *keyword;
55 static int default_flags;
56 static unsigned int default_width;
57 static unsigned int default_max_results;
58 static gchar *default_open_tag;
59 static unsigned int default_open_tag_len;
60 static gchar *default_close_tag;
61 static unsigned int default_close_tag_len;
64 static const gchar text[] =
65 "Groonga is an embeddable fulltext search engine, which you can use in\n"
66 "conjunction with various scripting languages and databases. Groonga is\n"
67 "an inverted index based engine, & combines the best of n-gram\n"
68 "indexing and word indexing to achieve fast, precise searches. While\n"
69 "groonga codebase is rather compact it is scalable enough to handle large\n"
70 "amounts of data and queries.";
71 static const gchar text_ja_utf8[] =
72 "Groongaは組み込み型の全文検索エンジンです。DBMSやスクリプト言語処理系等に\n"
73 "組み込むことによって、その全文検索機能を強化することができます。n-gram\n"
74 "インデックスと単語インデックスの特徴を兼ね備えた、高速かつ高精度な転置\n"
75 "インデックスタイプのエンジンです。コンパクトな実装ですが、大規模な文書\n"
76 "量と検索要求を処理できるように設計されています。また、純粋なn-gramイン\n"
78 static gchar *text_ja_euc;
79 static gchar *text_ja_sjis;
80 static const gchar html_text[] =
81 "<div class=\"day\">\n"
82 " <h2 id=\"Requirements\">Requirements</h2>\n"
83 " <div class=\"body\">\n"
84 " <div class=\"section\">\n"
88 " <li>Linux, FreeBSD, MacOS X</li>\n"
90 " <li>Requirements</li>\n"
92 " <li>MeCab-0.80 or later "
93 "(for japanese-word indexing. normally not required.)</li>\n"
94 " <li>Ruby 1.8.1 or later (for Ruby binding.)"
95 "<a class=\"external\" href=\"http://www.ruby-lang.org/\">"
96 "http://www.ruby-lang.org/"
101 " </div><!--section-->\n"
102 " </div><!--body-->\n"
103 "</div><!--day-->\n";
106 convert(
const gchar *
string,
const gchar *from,
const gchar *to, GError **
error)
108 return g_convert(
string, -1, to, from, NULL, NULL, error);
112 utf8_to_euc_jp(
const gchar *utf8, GError **
error)
114 return convert(utf8,
"utf-8",
"eucJP", error);
118 euc_jp_to_utf8(
const gchar *euc_jp, GError **
error)
120 return convert(euc_jp,
"eucJP",
"utf-8", error);
124 utf8_to_sjis(
const gchar *utf8, GError **
error)
126 return convert(utf8,
"utf-8",
"CP932", error);
130 sjis_to_utf8(
const gchar *sjis, GError **
error)
132 return convert(sjis,
"CP932",
"utf-8", error);
136 take_euc_jp_to_utf8(
const gchar *euc_jp)
139 GError *
error = NULL;
141 utf8 = euc_jp_to_utf8(euc_jp, &error);
142 cut_assert_g_error(error);
143 return cut_take_string(utf8);
147 take_sjis_to_utf8(
const gchar *sjis)
150 GError *
error = NULL;
152 utf8 = sjis_to_utf8(sjis, &error);
153 cut_assert_g_error(error);
154 return cut_take_string(utf8);
157 #define cut_check_g_error(error) do \
159 GError *_error = (error); \
161 const gchar *message; \
162 message = cut_take_printf("%s: %d: %s", \
163 g_quark_to_string(_error->domain), \
166 g_error_free(_error); \
167 cut_error("%s", message); \
174 GError *
error = NULL;
179 text_ja_euc = utf8_to_euc_jp(text_ja_utf8, &error);
182 text_ja_sjis = utf8_to_sjis(text_ja_utf8, &error);
194 g_free(text_ja_sjis);
211 default_max_results = 10;
212 default_open_tag = g_strdup(
"[[");
213 default_open_tag_len = strlen(default_open_tag);
214 default_close_tag = g_strdup(
"]]");
215 default_close_tag_len = strlen(default_close_tag);
216 default_mapping = NULL;
232 if (default_open_tag) {
233 g_free(default_open_tag);
235 if (default_close_tag) {
236 g_free(default_close_tag);
251 default_width, default_max_results,
252 default_open_tag, default_open_tag_len,
253 default_close_tag, default_close_tag_len,
258 #define cut_assert_open_snip() do \
267 unsigned int n_results;
268 unsigned int max_tagged_len;
269 unsigned int result_len;
270 const gchar keyword[] =
"Groonga";
277 &n_results, &max_tagged_len));
278 cut_assert_equal_uint(2, n_results);
279 cut_assert_equal_uint(105, max_tagged_len);
280 result = g_new(gchar, max_tagged_len);
283 cut_assert_equal_string(
"[[Groonga]] is an embeddable fulltext search engine, "
284 "which you can use in\n"
285 "conjunction with various scrip",
287 cut_assert_equal_uint(104, result_len);
290 cut_assert_equal_string(
"ting languages and databases. [[Groonga]] is\n"
291 "an inverted index based engine, & combines "
294 cut_assert_equal_uint(104, result_len);
300 GError *
error = NULL;
301 unsigned int n_results;
302 unsigned int max_tagged_len;
303 unsigned int result_len;
305 keyword = utf8_to_euc_jp(
"検索", &error);
306 cut_assert_g_error(error);
315 text_ja_euc, strlen(text_ja_euc),
316 &n_results, &max_tagged_len));
317 cut_assert_equal_uint(2, n_results);
318 cut_assert_equal_uint(108, max_tagged_len);
319 result = g_new(gchar, max_tagged_len);
322 cut_assert_equal_string(
"型の全文[[検索]]エンジンです。"
323 "DBMSやスクリプト言語処理系等に\n"
324 "組み込むことによって、その全文[[検索]]機能を強",
325 take_euc_jp_to_utf8(result));
326 cut_assert_equal_uint(107, result_len);
329 cut_assert_equal_string(
"です。コンパクトな実装ですが、大規模な文書\n"
330 "量と[[検索]]要求を処理できるように設計されて"
332 take_euc_jp_to_utf8(result));
333 cut_assert_equal_uint(103, result_len);
339 GError *
error = NULL;
340 unsigned int n_results;
341 unsigned int max_tagged_len;
342 unsigned int result_len;
344 keyword = utf8_to_sjis(
"処理", &error);
345 cut_assert_g_error(error);
354 text_ja_sjis, strlen(text_ja_sjis),
355 &n_results, &max_tagged_len));
356 cut_assert_equal_uint(2, n_results);
357 cut_assert_equal_uint(104, max_tagged_len);
358 result = g_new(gchar, max_tagged_len);
361 cut_assert_equal_string(
"み型の全文検索エンジンです。"
362 "DBMSやスクリプト言語[[処理]]系等に\n"
363 "組み込むことによって、その全文検索機能を",
364 take_sjis_to_utf8(result));
365 cut_assert_equal_uint(103, result_len);
368 cut_assert_equal_string(
"パクトな実装ですが、大規模な文書\n"
369 "量と検索要求を[[処理]]できるように設計"
370 "されています。また、純粋なn-gram",
371 take_sjis_to_utf8(result));
372 cut_assert_equal_uint(103, result_len);
378 unsigned int n_results;
379 unsigned int max_tagged_len;
380 unsigned int result_len;
381 const gchar keyword[] =
"エンジン";
390 text_ja_utf8, strlen(text_ja_utf8),
391 &n_results, &max_tagged_len));
392 cut_assert_equal_uint(2, n_results);
393 cut_assert_equal_uint(105, max_tagged_len);
394 result = g_new(gchar, max_tagged_len);
397 cut_assert_equal_string(
"Groongaは組み込み型の全文検索[[エンジン]]です。"
400 cut_assert_equal_uint(102, result_len);
403 cut_assert_equal_string(
"度な転置\n"
404 "インデックスタイプの[[エンジン]]です。"
407 cut_assert_equal_uint(104, result_len);
413 unsigned int n_results;
414 unsigned int max_tagged_len;
419 &n_results, &max_tagged_len));
420 cut_assert_equal_uint(0, n_results);
426 unsigned int n_results;
427 unsigned int max_tagged_len;
432 &n_results, &max_tagged_len));
436 &n_results, &max_tagged_len));
439 &n_results, &max_tagged_len));
442 NULL, &max_tagged_len));
451 unsigned int n_results;
452 unsigned int max_tagged_len;
453 unsigned int result_len;
454 const gchar text[] =
"Ⅶ¨abcde";
455 const gchar keyword[] =
"ab";
466 &n_results, &max_tagged_len));
467 cut_assert_equal_uint(1, n_results);
468 cut_assert_equal_uint(15, max_tagged_len);
469 result = g_new(gchar, max_tagged_len);
472 cut_assert_equal_string(
"Ⅶ¨[[ab]]cde",
474 cut_assert_equal_uint(14, result_len);
480 unsigned int n_results;
481 unsigned int max_tagged_len;
482 unsigned int result_len;
483 const gchar keyword[] =
"転置インデックス";
492 text_ja_utf8, strlen(text_ja_utf8),
493 &n_results, &max_tagged_len));
494 cut_assert_equal_uint(0, n_results);
507 text_ja_utf8, strlen(text_ja_utf8),
508 &n_results, &max_tagged_len));
509 cut_assert_equal_uint(1, n_results);
510 cut_assert_equal_uint(105, max_tagged_len);
511 result = g_new(gchar, max_tagged_len);
514 cut_assert_equal_string(
"備えた、高速かつ高精度な[[転置\n"
515 "インデックス]]タイプのエンジンです。コン",
517 cut_assert_equal_uint(104, result_len);
523 #define ADD_DATUM(label, expected, flags) \
524 gcut_add_datum(label, \
525 "expected", G_TYPE_STRING, (expected),\
526 "flags", G_TYPE_INT, (flags), \
528 const gchar tag_after_space[] =
529 "Groonga is an [[embeddable]] fulltext search engine, which you can use in\n"
530 "conjunction with various scrip";
531 const gchar tag_before_space[] =
532 "Groonga is an[[ embeddable]] fulltext search engine, which you can use in\n"
533 "conjunction with various scrip";
539 ADD_DATUM(
"skip_spaces", tag_after_space,
541 ADD_DATUM(
"copy_tag and skip_spaces", tag_after_space,
545 ADD_DATUM(
"normalize and copy_tag", tag_before_space,
547 ADD_DATUM(
"normalize and skip_spaces", tag_after_space,
549 ADD_DATUM(
"normalize, copy_tag and skip_spaces", tag_after_space,
557 unsigned int n_results;
558 unsigned int max_tagged_len;
559 const gchar keyword[] =
"embeddable";
560 const gchar *expected;
562 unsigned int text_len, keyword_len, result_len, expected_len;
565 default_flags = gcut_data_get_int(data,
"flags");
567 text_len = strlen(text);
568 keyword_len = strlen(keyword);
569 expected = gcut_data_get_string(data,
"expected");
570 expected_len = strlen(expected);
578 cut_assert_equal_uint(1, n_results);
579 cut_assert_equal_uint(expected_len + 1, max_tagged_len);
580 result = g_new(gchar, max_tagged_len);
583 cut_assert_equal_string(expected, result);
584 cut_assert_equal_uint(expected_len, result_len);
590 unsigned int n_results;
591 unsigned int max_tagged_len;
592 unsigned int result_len;
593 const gchar keyword[] =
"x";
601 &n_results, &max_tagged_len));
602 cut_assert_equal_uint(2, n_results);
603 cut_assert_equal_uint(113, max_tagged_len);
604 result = g_new(gchar, max_tagged_len);
607 cut_assert_equal_string(
"Groonga is an embeddable fullte[[x]]t search "
608 "engine, which you can use in\n"
609 "conjunction with various scrip",
611 cut_assert_equal_uint(104, result_len);
614 cut_assert_equal_string(
"an inverted inde[[x]] based engine, & "
615 "combines the best of n-gram\n"
616 "inde[[x]]ing and word inde[[x]]ing to achieve ",
618 cut_assert_equal_uint(112, result_len);
624 const gchar open_tag[] =
"((*";
625 const gchar close_tag[] =
"*))";
626 unsigned int n_results;
627 unsigned int max_tagged_len;
628 unsigned int result_len;
629 const gchar keyword[] =
"engine";
633 open_tag, strlen(open_tag),
634 close_tag, strlen(close_tag)));
637 &n_results, &max_tagged_len));
638 cut_assert_equal_uint(2, n_results);
639 cut_assert_equal_uint(107, max_tagged_len);
640 result = g_new(gchar, max_tagged_len);
643 cut_assert_equal_string(
"Groonga is an embeddable fulltext search "
644 "((*engine*)), which you can use in\n"
645 "conjunction with various scrip",
647 cut_assert_equal_uint(106, result_len);
650 cut_assert_equal_string(
" databases. Groonga is\n"
651 "an inverted index based ((*engine*)), "
652 "& combines the best of n-gram\n"
655 cut_assert_equal_uint(106, result_len);
661 const gchar open_tag[] =
"((*";
662 const gchar close_tag[] =
"*))";
663 unsigned int n_results;
664 unsigned int max_tagged_len;
665 unsigned int result_len;
666 const gchar keyword1[] =
"fulltext";
667 const gchar keyword2[] =
"groonga";
671 open_tag, strlen(open_tag),
672 close_tag, strlen(close_tag)));
677 &n_results, &max_tagged_len));
678 cut_assert_equal_uint(2, n_results);
679 cut_assert_equal_uint(107, max_tagged_len);
680 result = g_new(gchar, max_tagged_len);
683 cut_assert_equal_string(
"Groonga is an embeddable ((*fulltext*)) search "
684 "engine, which you can use in\n"
685 "conjunction with various scrip",
687 cut_assert_equal_uint(106, result_len);
690 cut_assert_equal_string(
"exing to achieve fast, precise searches. While\n"
691 "[[groonga]] codebase is rather compact it is "
694 cut_assert_equal_uint(104, result_len);
700 unsigned int n_results;
701 unsigned int max_tagged_len;
702 unsigned int result_len;
703 const gchar keyword[] =
"index";
710 &n_results, &max_tagged_len));
711 cut_assert_equal_uint(1, n_results);
712 cut_assert_equal_uint(113, max_tagged_len);
713 result = g_new(gchar, max_tagged_len);
718 result, &result_len));
724 const gchar open_tag[] =
"<<";
725 unsigned int n_results;
726 unsigned int max_tagged_len;
727 unsigned int result_len;
728 const gchar keyword[] =
"indexing";
733 open_tag, strlen(open_tag), NULL, 0));
736 &n_results, &max_tagged_len));
737 cut_assert_equal_uint(1, n_results);
738 cut_assert_equal_uint(113, max_tagged_len);
739 result = g_new(gchar, max_tagged_len);
742 cut_assert_equal_string(
"ngine, & combines the best of n-gram\n"
743 "<<indexing]] and word <<indexing]] to achieve fast, "
744 "precise searches. W",
746 cut_assert_equal_uint(112, result_len);
752 const gchar close_tag[] =
">&>";
753 unsigned int n_results;
754 unsigned int max_tagged_len;
755 unsigned int result_len;
756 const gchar keyword[] =
"Ruby";
757 const gchar expected[] =
758 "y not required.)</li>\n"
759 " <li>[[Ruby>&> 1.8.1 or later "
760 "(for [[Ruby>&> binding.)"
761 "<a class="external" "
767 NULL, 0, close_tag, strlen(close_tag)));
770 &n_results, &max_tagged_len));
771 cut_assert_equal_uint(1, n_results);
772 cut_assert_equal_uint(strlen(expected) + 1, max_tagged_len);
773 result = g_new(gchar, max_tagged_len);
776 cut_assert_equal_string(expected, result);
777 cut_assert_equal_uint(strlen(expected), result_len);
789 default_max_results = -1;
790 cut_assert_null(open_snip());
792 default_max_results = 0;
793 cut_assert_null(open_snip());
795 default_max_results = 16;
796 cut_assert_not_null(open_snip());
798 default_max_results = 17;
799 cut_assert_null(open_snip());
805 const gchar *original_default_open_tag, *original_default_close_tag;
806 unsigned int original_default_open_tag_len, original_default_close_tag_len;
810 cut_assert_not_null(open_snip());
812 original_default_open_tag = cut_take_string(default_open_tag);
813 original_default_open_tag_len = default_open_tag_len;
814 original_default_close_tag = cut_take_string(default_close_tag);
815 original_default_close_tag_len = default_close_tag_len;
817 default_open_tag = NULL;
818 default_open_tag_len = 0;
819 default_close_tag = NULL;
820 default_close_tag_len = 0;
821 cut_assert_not_null(open_snip());
823 default_open_tag = g_strdup(original_default_open_tag);
824 default_open_tag_len = original_default_open_tag_len;
825 cut_assert_not_null(open_snip());
826 g_free(default_open_tag);
827 default_open_tag = NULL;
828 default_open_tag_len = 0;
830 default_close_tag = g_strdup(original_default_open_tag);
831 default_close_tag_len = original_default_open_tag_len;
832 cut_assert_not_null(open_snip());
838 unsigned int n_conds = 0, max_n_conds = 32U;
839 const gchar keyword[] =
"Groonga";
840 unsigned int keyword_len;
841 const gchar open_tag[] =
"<<";
842 const gchar close_tag[] =
">>";
843 unsigned int open_tag_len, close_tag_len;
845 keyword_len = strlen(keyword);
846 open_tag_len = strlen(open_tag);
847 close_tag_len = strlen(close_tag);
852 keyword, keyword_len,
853 open_tag, open_tag_len,
854 close_tag, close_tag_len));
859 keyword, keyword_len,
860 open_tag, open_tag_len,
861 close_tag, close_tag_len));
865 open_tag, open_tag_len,
866 close_tag, close_tag_len));
870 open_tag, open_tag_len,
871 close_tag, close_tag_len));
873 while (n_conds < max_n_conds) {
875 keyword, keyword_len,
876 open_tag, open_tag_len,
877 close_tag, close_tag_len),
878 cut_message(
"cond #%d", n_conds));
884 keyword, keyword_len,
885 open_tag, open_tag_len,
886 close_tag, close_tag_len),
887 cut_message(
"cond #%d", n_conds));
893 const gchar *sub_text;
897 cut_assert_operator_int(strlen(text), >, default_width);
903 sub_text = text + strlen(text) - default_width;
905 sub_text, strlen(sub_text),
911 sub_text, strlen(sub_text),
918 const gchar keyword[] =
"Groonga";
919 unsigned int keyword_len;
920 const gchar open_tag[] =
"<<";
921 const gchar close_tag[] =
">>";
922 unsigned int open_tag_len, close_tag_len;
924 keyword_len = strlen(keyword);
925 open_tag_len = strlen(open_tag);
926 close_tag_len = strlen(close_tag);
933 keyword, keyword_len,
934 open_tag, open_tag_len,
935 close_tag, close_tag_len));
937 keyword, keyword_len,
938 open_tag, open_tag_len,
941 keyword, keyword_len,
943 close_tag, close_tag_len));
945 keyword, keyword_len,