26 #ifndef HAVE_STRNCASECMP
27 # ifdef HAVE__STRNICMP
28 # define strncasecmp(s1,s2,n) _strnicmp(s1,s2,n)
32 #define VAR GRN_PROC_GET_VAR_BY_OFFSET
33 #define CONST_STR_LEN(x) x, x ? sizeof(x) - 1 : 0
34 #define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x)
36 #define MIN_LEARN_DISTANCE (60 * GRN_TIME_USEC_PER_SEC)
93 grn_parse_suggest_types(
grn_obj *text)
104 const char string[] =
"complete";
105 size_t length =
sizeof(string) - 1;
106 if (nptr + length <= end && memcmp(nptr,
string, length) == 0) {
113 const char string[] =
"correct";
114 size_t length =
sizeof(string) - 1;
115 if (nptr + length <= end && memcmp(nptr,
string, length) == 0) {
122 const char string[] =
"suggest";
123 size_t length =
sizeof(string) - 1;
124 if (nptr + length <= end && memcmp(nptr,
string, length) == 0) {
137 grn_obj *res,
int query_type,
int frequency_threshold,
138 double conditional_probability_threshold)
140 int32_t max_score = 0;
148 switch (query_type) {
162 ((
grn_ii *)co)->n_elements - 1, 0))) {
164 grn_obj post, pair_freq, item_freq, item_freq2, item_boost;
172 int pfreq, ifreq, ifreq2, boost;
173 double conditional_probability;
189 conditional_probability = (double)pfreq / (
double)ifreq2;
191 conditional_probability = 0.0;
194 if (pfreq >= frequency_threshold && ifreq >= frequency_threshold &&
195 conditional_probability >= conditional_probability_threshold &&
199 int32_t score = pfreq;
201 if (max_score < score + boost) { max_score = score + boost; }
204 &post_id,
sizeof(
grn_id), &value, &added)) {
224 #define DEFAULT_LIMIT 10
225 #define DEFAULT_SORTBY "-_score"
226 #define DEFAULT_OUTPUT_COLUMNS "_key,_score"
227 #define DEFAULT_FREQUENCY_THRESHOLD 100
228 #define DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD 0.2
243 if (!sortby_val || !sortby_len) {
247 if (!oc_val || !oc_len) {
254 ":",
"sort(%d)", limit);
284 if (score >= frequency_threshold) {
299 grn_obj *output_columns,
int offset,
int limit,
300 int frequency_threshold,
double conditional_probability_threshold,
318 const char *normalized;
319 unsigned int normalized_length_in_bytes;
322 &normalized_length_in_bytes,
328 normalized_length_in_bytes,
338 complete_add_item(ctx, p->
rid, res, frequency_threshold,
339 items_freq, items_boost,
340 &item_freq, &item_boost);
352 cooccurrence_search(ctx, items, items_boost, tid, res,
COMPLETE,
354 conditional_probability_threshold);
360 normalized_length_in_bytes,
364 complete_add_item(ctx,
id, res, frequency_threshold,
365 items_freq, items_boost, &item_freq, &item_boost);
371 output(ctx, items, res, tid, sortby, output_columns, offset, limit);
383 grn_obj *output_columns,
int offset,
int limit,
384 int frequency_threshold,
double conditional_probability_threshold,
389 grn_obj item_freq2, item_boost;
396 max_score = cooccurrence_search(ctx, items, items_boost, tid, res,
CORRECT,
398 conditional_probability_threshold);
400 ":",
"cooccur(%d)", max_score);
404 max_score < frequency_threshold))) {
420 0, NULL, 0, 0, -1, 0);
439 if (score >= frequency_threshold) {
continue; }
494 "error on building expr. for calicurating edit distance");
501 output(ctx, items, res, tid, sortby, output_columns, offset, limit);
513 grn_obj *output_columns,
int offset,
int limit,
514 int frequency_threshold,
double conditional_probability_threshold)
520 cooccurrence_search(ctx, items, items_boost, tid, res,
SUGGEST,
521 frequency_threshold, conditional_probability_threshold);
522 output(ctx, items, res, tid, sortby, output_columns, offset, limit);
536 if (mode_length == 3 &&
539 }
else if (mode_length == 2 &&
552 grn_obj *items, *col, *items_boost;
557 double conditional_probability_threshold =
562 types = grn_parse_suggest_types(
VAR(0));
577 prefix_search_mode = parse_search_mode(ctx,
VAR(10));
578 similar_search_mode = parse_search_mode(ctx,
VAR(11));
586 complete(ctx, items, items_boost, col,
VAR(3),
VAR(4),
587 VAR(5), offset, limit,
588 frequency_threshold, conditional_probability_threshold,
596 correct(ctx, items, items_boost,
VAR(3),
VAR(4),
597 VAR(5), offset, limit,
598 frequency_threshold, conditional_probability_threshold,
599 similar_search_mode);
603 suggest(ctx, items, items_boost,
VAR(3),
VAR(4),
604 VAR(5), offset, limit,
605 frequency_threshold, conditional_probability_threshold);
640 learner->
pairs = pairs;
644 learner_init_values(ctx, learner);
650 grn_id events_id, event_types_id;
651 grn_obj *seqs, *events, *post_item, *items, *pairs;
671 pairs = learner->
pairs;
708 unsigned int weight = 1;
738 unsigned int events_name_size;
739 unsigned int events_name_prefix_size;
744 events_name_prefix_size = strlen(
"event_");
745 if (events_name_size > events_name_prefix_size) {
748 events_name + events_name_prefix_size,
749 events_name_size - events_name_prefix_size);
776 learner_init_weight(ctx, learner);
825 learner_increment(ctx, learner, column, learner->
post_item_id);
836 learner_learn_for_complete_and_correcnt(
grn_ctx *ctx,
839 grn_obj *pre_item, *post_item, *pre_events;
843 int64_t post_time_value;
856 int64_t learn_distance;
880 learner_increment(ctx, learner, learner->
pairs_freq1, pair_id);
883 learner_increment(ctx, learner, learner->
pairs_freq0, pair_id);
896 unsigned int token_flags = 0;
926 learner_increment(ctx, learner, learner->
pairs_freq2, pair_id);
948 if (learner_is_valid_input(ctx, learner)) {
949 learner_init_columns(ctx, learner);
950 learner_init_dataset_name(ctx, learner);
951 learner_init_configuration(ctx, learner);
952 learner_init_buffers(ctx, learner);
953 learner_increment_item_freq(ctx, learner, learner->
items_freq);
954 learner_set_last_post_time(ctx, learner);
956 learner_init_submit_learn(ctx, learner);
957 learner_increment_item_freq(ctx, learner, learner->
items_freq2);
958 learner_learn_for_complete_and_correcnt(ctx, learner);
959 learner_learn_for_suggest(ctx, learner);
960 learner_fin_submit_learn(ctx, learner);
962 learner_append_post_event(ctx, learner);
963 learner_fin_buffers(ctx, learner);
964 learner_fin_configuration(ctx, learner);
965 learner_fin_dataset_name(ctx, learner);
966 learner_fin_columns(ctx, learner);
973 int learn_distance_in_seconds = 0;
983 learner_init(ctx, &learner,
984 post_event, post_type, post_item, seq, post_time, pairs);
985 learner_learn(ctx, &learner);
1030 command_suggest, NULL, NULL, 12, vars);
1033 func_suggest_preparer, NULL, NULL, 0, NULL);