Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
snip.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 2 -*- */
2 /* Copyright(C) 2009-2012 Brazil
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Lesser General Public
6  License version 2.1 as published by the Free Software Foundation.
7 
8  This library is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  Lesser General Public License for more details.
12 
13  You should have received a copy of the GNU Lesser General Public
14  License along with this library; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 #include "groonga_in.h"
18 #include <string.h>
19 #include <stddef.h>
20 #include "snip.h"
21 #include "ctx.h"
22 
23 #if !defined MAX
24 #define MAX(a, b) ((a) > (b) ? (a) : (b))
25 #endif
26 
27 #if !defined MIN
28 #define MIN(a, b) ((a) < (b) ? (a) : (b))
29 #endif
30 
31 static int
32 grn_bm_check_euc(const unsigned char *x, const size_t y)
33 {
34  const unsigned char *p;
35  for (p = x + y - 1; p >= x && *p >= 0x80U; p--);
36  return (int) ((x + y - p) & 1);
37 }
38 
39 static int
40 grn_bm_check_sjis(const unsigned char *x, const size_t y)
41 {
42  const unsigned char *p;
43  for (p = x + y - 1; p >= x; p--)
44  if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU))
45  break;
46  return (int) ((x + y - p) & 1);
47 }
48 
49 /*
50 static void
51 grn_bm_suffixes(const unsigned char *x, size_t m, size_t *suff)
52 {
53  size_t f, g;
54  intptr_t i;
55  f = 0;
56  suff[m - 1] = m;
57  g = m - 1;
58  for (i = m - 2; i >= 0; --i) {
59  if (i > (intptr_t) g && suff[i + m - 1 - f] < i - g)
60  suff[i] = suff[i + m - 1 - f];
61  else {
62  if (i < (intptr_t) g)
63  g = i;
64  f = i;
65  while (g > 0 && x[g] == x[g + m - 1 - f])
66  --g;
67  suff[i] = f - g;
68  }
69  }
70 }
71 */
72 
73 static void
74 grn_bm_preBmBc(const unsigned char *x, size_t m, size_t *bmBc)
75 {
76  size_t i;
77  for (i = 0; i < ASIZE; ++i) {
78  bmBc[i] = m;
79  }
80  for (i = 0; i < m - 1; ++i) {
81  bmBc[(unsigned int) x[i]] = m - (i + 1);
82  }
83 }
84 
85 #define GRN_BM_COMPARE do { \
86  if (string_checks[found]) { \
87  size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \
88  /* calc real offset */\
89  for (i = cond->last_found; i < found; i++) { \
90  if (string_checks[i] > 0) { \
91  found_alpha_head = i; \
92  offset += string_checks[i]; \
93  } \
94  } \
95  /* if real offset is in a character, move it the head of the character */ \
96  if (string_checks[found] < 0) { \
97  offset -= string_checks[found_alpha_head]; \
98  cond->last_found = found_alpha_head; \
99  } else { \
100  cond->last_found = found; \
101  } \
102  cond->start_offset = cond->last_offset = offset; \
103  if (flags & GRN_SNIP_SKIP_LEADING_SPACES) { \
104  while (cond->start_offset < string_original_length_in_bytes && \
105  (i = grn_isspace(string_original + cond->start_offset, \
106  string_encoding))) { cond->start_offset += i; } \
107  } \
108  for (i = cond->last_found; i < found + m; i++) { \
109  if (string_checks[i] > 0) { \
110  offset += string_checks[i]; \
111  } \
112  } \
113  cond->end_offset = offset; \
114  cond->found = found + shift; \
115  cond->found_alpha_head = found_alpha_head; \
116  /* printf("bm: cond:%p found:%zd last_found:%zd st_off:%zd ed_off:%zd\n", cond, cond->found,cond->last_found,cond->start_offset,cond->end_offset); */ \
117  return; \
118  } \
119 } while (0)
120 
121 #define GRN_BM_BM_COMPARE do { \
122  if (p[-2] == ck) { \
123  for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \
124  } \
125  if (i > m) { \
126  found = p - y - m; \
127  GRN_BM_COMPARE; \
128  } \
129  } \
130 } while (0)
131 
132 void
133 grn_bm_tunedbm(grn_ctx *ctx, snip_cond *cond, grn_obj *string, int flags)
134 {
135  register unsigned char *limit, ck;
136  register const unsigned char *p, *cp;
137  register size_t *bmBc, delta1, i;
138 
139  const unsigned char *x;
140  unsigned char *y;
141  size_t shift, found;
142 
143  const char *string_original;
144  unsigned int string_original_length_in_bytes;
145  const short *string_checks;
146  grn_encoding string_encoding;
147  const char *string_norm, *keyword_norm;
148  unsigned int n, m;
149 
150  grn_string_get_original(ctx, string,
151  &string_original, &string_original_length_in_bytes);
152  string_checks = grn_string_get_checks(ctx, string);
153  string_encoding = grn_string_get_encoding(ctx, string);
154  grn_string_get_normalized(ctx, string, &string_norm, &n, NULL);
155  grn_string_get_normalized(ctx, cond->keyword, &keyword_norm, &m, NULL);
156 
157  y = (unsigned char *)string_norm;
158  if (m == 1) {
159  if (n > cond->found) {
160  shift = 1;
161  p = memchr(y + cond->found, keyword_norm[0], n - cond->found);
162  if (p != NULL) {
163  found = p - y;
165  }
166  }
167  cond->stopflag = SNIPCOND_STOP;
168  return;
169  }
170 
171  x = (unsigned char *)keyword_norm;
172  bmBc = cond->bmBc;
173  shift = cond->shift;
174 
175  /* Restart */
176  p = y + m + cond->found;
177  cp = x + m;
178  ck = cp[-2];
179 
180  /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */
181  if (n - cond->found > 12 * m) {
182  limit = y + n - 11 * m;
183  while (p <= limit) {
184  p += bmBc[p[-1]];
185  if(!(delta1 = bmBc[p[-1]])) {
186  goto check;
187  }
188  p += delta1;
189  p += bmBc[p[-1]];
190  p += bmBc[p[-1]];
191  if(!(delta1 = bmBc[p[-1]])) {
192  goto check;
193  }
194  p += delta1;
195  p += bmBc[p[-1]];
196  p += bmBc[p[-1]];
197  if(!(delta1 = bmBc[p[-1]])) {
198  goto check;
199  }
200  p += delta1;
201  p += bmBc[p[-1]];
202  p += bmBc[p[-1]];
203  continue;
204  check:
206  p += shift;
207  }
208  }
209  /* limit check + search */
210  limit = y + n;
211  while(p <= limit) {
212  if (!(delta1 = bmBc[p[-1]])) {
214  p += shift;
215  }
216  p += delta1;
217  }
218  cond->stopflag = SNIPCOND_STOP;
219 }
220 
221 static size_t
222 count_mapped_chars(const char *str, const char *end)
223 {
224  const char *p;
225  size_t dl;
226 
227  dl = 0;
228  for (p = str; p != end; p++) {
229  switch (*p) {
230  case '<':
231  case '>':
232  dl += 4; /* &lt; or &gt; */
233  break;
234  case '&':
235  dl += 5; /* &amp; */
236  break;
237  case '"':
238  dl += 6; /* &quot; */
239  break;
240  default:
241  dl++;
242  break;
243  }
244  }
245  return dl;
246 }
247 
248 grn_rc
250 {
251  if (!cond) {
252  return GRN_INVALID_ARGUMENT;
253  }
254  if (cond->keyword) {
255  grn_obj_close(ctx, cond->keyword);
256  }
257  return GRN_SUCCESS;
258 }
259 
260 grn_rc
261 grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len,
262  grn_encoding enc, int flags)
263 {
264  const char *norm;
265  unsigned int norm_blen;
266  grn_obj *normalizer = NULL;
267  int f = GRN_STR_REMOVEBLANK;
268  memset(sc, 0, sizeof(snip_cond));
269  if (flags & GRN_SNIP_NORMALIZE) { normalizer = GRN_NORMALIZER_AUTO; }
270  if (!(sc->keyword = grn_string_open(ctx, keyword, keyword_len,
271  normalizer, f))) {
272  GRN_LOG(ctx, GRN_LOG_ALERT,
273  "grn_string_open on snip_cond_init failed!");
275  }
276  grn_string_get_normalized(ctx, sc->keyword, &norm, &norm_blen, NULL);
277  if (!norm_blen) {
278  grn_snip_cond_close(ctx, sc);
279  return GRN_INVALID_ARGUMENT;
280  }
281  if (norm_blen != 1) {
282  grn_bm_preBmBc((unsigned char *)norm, norm_blen, sc->bmBc);
283  sc->shift = sc->bmBc[(unsigned char)norm[norm_blen - 1]];
284  sc->bmBc[(unsigned char)norm[norm_blen - 1]] = 0;
285  }
286  return GRN_SUCCESS;
287 }
288 
289 void
291 {
292  cond->found = 0;
293  cond->last_found = 0;
294  cond->last_offset = 0;
295  cond->start_offset = 0;
296  cond->end_offset = 0;
297 
298  cond->count = 0;
299  cond->stopflag = SNIPCOND_NONSTOP;
300 }
301 
302 inline static char *
303 grn_snip_strndup(grn_ctx *ctx, const char *string, unsigned int string_len)
304 {
305  char *copied_string;
306 
307  copied_string = GRN_MALLOC(string_len + 1);
308  if (!copied_string) {
309  return NULL;
310  }
311  memcpy(copied_string, string, string_len);
312  copied_string[string_len]= '\0'; /* not required, but for ql use */
313  return copied_string;
314 }
315 
316 inline static grn_rc
317 grn_snip_cond_set_tag(grn_ctx *ctx,
318  const char **dest_tag, size_t *dest_tag_len,
319  const char *tag, unsigned int tag_len,
320  const char *default_tag, unsigned int default_tag_len,
321  int copy_tag)
322 {
323  if (tag) {
324  if (copy_tag) {
325  char *copied_tag;
326  copied_tag = grn_snip_strndup(ctx, tag, tag_len);
327  if (!copied_tag) {
329  }
330  *dest_tag = copied_tag;
331  } else {
332  *dest_tag = tag;
333  }
334  *dest_tag_len = tag_len;
335  } else {
336  *dest_tag = default_tag;
337  *dest_tag_len = default_tag_len;
338  }
339  return GRN_SUCCESS;
340 }
341 
342 grn_rc
344  const char *keyword, unsigned int keyword_len,
345  const char *opentag, unsigned int opentag_len,
346  const char *closetag, unsigned int closetag_len)
347 {
348  grn_rc rc;
349  int copy_tag;
350  snip_cond *cond;
351  unsigned int norm_blen;
352 
353  if (!snip || !keyword || !keyword_len || snip->cond_len >= MAX_SNIP_COND_COUNT) {
354  return GRN_INVALID_ARGUMENT;
355  }
356  cond = snip->cond + snip->cond_len;
357  if ((rc = grn_snip_cond_init(ctx, cond, keyword, keyword_len,
358  snip->encoding, snip->flags))) {
359  return rc;
360  }
361  grn_string_get_normalized(ctx, cond->keyword, NULL, &norm_blen, NULL);
362  if (norm_blen > snip->width) {
363  grn_snip_cond_close(ctx, cond);
364  return GRN_INVALID_ARGUMENT;
365  }
366 
367  copy_tag = snip->flags & GRN_SNIP_COPY_TAG;
368  rc = grn_snip_cond_set_tag(ctx,
369  &(cond->opentag), &(cond->opentag_len),
370  opentag, opentag_len,
371  snip->defaultopentag, snip->defaultopentag_len,
372  copy_tag);
373  if (rc) {
374  grn_snip_cond_close(ctx, cond);
375  return rc;
376  }
377 
378  rc = grn_snip_cond_set_tag(ctx,
379  &(cond->closetag), &(cond->closetag_len),
380  closetag, closetag_len,
382  copy_tag);
383  if (rc) {
384  if (opentag && copy_tag) {
385  GRN_FREE((void *)cond->opentag);
386  }
387  grn_snip_cond_close(ctx, cond);
388  return rc;
389  }
390 
391  snip->cond_len++;
392  return GRN_SUCCESS;
393 }
394 
395 static size_t
396 grn_snip_find_firstbyte(const char *string, grn_encoding encoding, size_t offset,
397  size_t doffset)
398 {
399  switch (encoding) {
400  case GRN_ENC_EUC_JP:
401  while (!(grn_bm_check_euc((unsigned char *) string, offset)))
402  offset += doffset;
403  break;
404  case GRN_ENC_SJIS:
405  if (!(grn_bm_check_sjis((unsigned char *) string, offset)))
406  offset += doffset;
407  break;
408  case GRN_ENC_UTF8:
409  while (string[offset] <= (char)0xc0)
410  offset += doffset;
411  break;
412  default:
413  break;
414  }
415  return offset;
416 }
417 
418 inline static grn_rc
419 grn_snip_set_default_tag(grn_ctx *ctx,
420  const char **dest_tag, size_t *dest_tag_len,
421  const char *tag, unsigned int tag_len,
422  int copy_tag)
423 {
424  if (copy_tag && tag) {
425  char *copied_tag;
426  copied_tag = grn_snip_strndup(ctx, tag, tag_len);
427  if (!copied_tag) {
429  }
430  *dest_tag = copied_tag;
431  } else {
432  *dest_tag = tag;
433  }
434  *dest_tag_len = tag_len;
435  return GRN_SUCCESS;
436 }
437 
438 grn_snip *
439 grn_snip_open(grn_ctx *ctx, int flags, unsigned int width,
440  unsigned int max_results,
441  const char *defaultopentag, unsigned int defaultopentag_len,
442  const char *defaultclosetag, unsigned int defaultclosetag_len,
443  grn_snip_mapping *mapping)
444 {
445  int copy_tag;
446  grn_snip *ret = NULL;
447  if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) {
448  GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open");
449  return NULL;
450  }
451  if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
452  GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open");
453  GRN_FREE(ret);
454  return NULL;
455  }
457  ret->encoding = ctx->encoding;
458  ret->flags = flags;
459  ret->width = width;
460  ret->max_results = max_results;
461  ret->defaultopentag = NULL;
462  ret->defaultclosetag = NULL;
463 
464  copy_tag = flags & GRN_SNIP_COPY_TAG;
465  if (grn_snip_set_default_tag(ctx,
466  &(ret->defaultopentag),
467  &(ret->defaultopentag_len),
468  defaultopentag, defaultopentag_len,
469  copy_tag)) {
470  GRN_FREE(ret);
471  GRN_API_RETURN(NULL);
472  }
473 
474  if (grn_snip_set_default_tag(ctx,
475  &(ret->defaultclosetag),
476  &(ret->defaultclosetag_len),
477  defaultclosetag, defaultclosetag_len,
478  copy_tag)) {
479  if (copy_tag && ret->defaultopentag) {
480  GRN_FREE((void *)ret->defaultopentag);
481  }
482  GRN_FREE(ret);
483  GRN_API_RETURN(NULL);
484  }
485 
486  ret->cond_len = 0;
487  ret->mapping = mapping;
488  ret->nstr = NULL;
489  ret->tag_count = 0;
490  ret->snip_count = 0;
491 
493  {
494  grn_obj *db;
495  grn_id id;
496  db = grn_ctx_db(ctx);
497  id = grn_obj_register(ctx, db, NULL, 0);
498  DB_OBJ(ret)->header.domain = GRN_ID_NIL;
499  DB_OBJ(ret)->range = GRN_ID_NIL;
500  grn_db_obj_init(ctx, db, id, DB_OBJ(ret));
501  }
502 
503  GRN_API_RETURN(ret);
504 }
505 
506 static grn_rc
507 exec_clean(grn_ctx *ctx, grn_snip *snip)
508 {
509  snip_cond *cond, *cond_end;
510  if (snip->nstr) {
511  grn_obj_close(ctx, snip->nstr);
512  snip->nstr = NULL;
513  }
514  snip->tag_count = 0;
515  snip->snip_count = 0;
516  for (cond = snip->cond, cond_end = cond + snip->cond_len;
517  cond < cond_end; cond++) {
518  grn_snip_cond_reinit(cond);
519  }
520  return GRN_SUCCESS;
521 }
522 
523 /* It should be renamed to grn_snip_close() and marked as internal.
524  * TODO: 3.0 */
525 grn_rc
527 {
528  snip_cond *cond, *cond_end;
529  if (!snip) { return GRN_INVALID_ARGUMENT; }
531  if (snip->flags & GRN_SNIP_COPY_TAG) {
532  int i;
533  snip_cond *sc;
534  const char *dot = snip->defaultopentag, *dct = snip->defaultclosetag;
535  for (i = snip->cond_len, sc = snip->cond; i; i--, sc++) {
536  if (sc->opentag != dot) { GRN_FREE((void *)sc->opentag); }
537  if (sc->closetag != dct) { GRN_FREE((void *)sc->closetag); }
538  }
539  if (dot) { GRN_FREE((void *)dot); }
540  if (dct) { GRN_FREE((void *)dct); }
541  }
542  if (snip->nstr) {
543  grn_obj_close(ctx, snip->nstr);
544  }
545  for (cond = snip->cond, cond_end = cond + snip->cond_len;
546  cond < cond_end; cond++) {
547  grn_snip_cond_close(ctx, cond);
548  }
549  GRN_FREE(snip);
551 }
552 
553 /* Just for backward compatibility. It should be replaced with
554  * grn_snip_close_real() when groonga 3.0.
555  * TODO: 3.0 */
556 grn_rc
558 {
559  return grn_obj_close(ctx, (grn_obj *)snip);
560 }
561 
562 
563 grn_rc
564 grn_snip_exec(grn_ctx *ctx, grn_snip *snip, const char *string, unsigned int string_len,
565  unsigned int *nresults, unsigned int *max_tagged_len)
566 {
567  size_t i;
568  grn_obj *normalizer = NULL;
570  if (!snip || !string || !nresults || !max_tagged_len) {
571  return GRN_INVALID_ARGUMENT;
572  }
574  exec_clean(ctx, snip);
575  *nresults = 0;
576  if (snip->flags & GRN_SNIP_NORMALIZE) { normalizer = GRN_NORMALIZER_AUTO; }
577  snip->nstr = grn_string_open(ctx, string, string_len, normalizer, f);
578  if (!snip->nstr) {
579  exec_clean(ctx, snip);
580  GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !");
581  GRN_API_RETURN(ctx->rc);
582  }
583  for (i = 0; i < snip->cond_len; i++) {
584  grn_bm_tunedbm(ctx, snip->cond + i, snip->nstr, snip->flags);
585  }
586 
587  {
588  _snip_tag_result *tag_result = snip->tag_result;
589  _snip_result *snip_result = snip->snip_result;
590  size_t last_end_offset = 0, last_last_end_offset = 0;
591  unsigned int unfound_cond_count = snip->cond_len;
592 
593  *max_tagged_len = 0;
594  while (1) {
595  size_t tagged_len = 0, last_tag_end = 0;
596  int_least8_t all_stop = 1, found_cond = 0;
597  snip_result->tag_count = 0;
598 
599  while (1) {
600  size_t min_start_offset = (size_t) -1;
601  size_t max_end_offset = 0;
602  snip_cond *cond = NULL;
603 
604  /* get condition which have minimum offset and is not stopped */
605  for (i = 0; i < snip->cond_len; i++) {
606  if (snip->cond[i].stopflag == SNIPCOND_NONSTOP &&
607  (min_start_offset > snip->cond[i].start_offset ||
608  (min_start_offset == snip->cond[i].start_offset &&
609  max_end_offset < snip->cond[i].end_offset))) {
610  min_start_offset = snip->cond[i].start_offset;
611  max_end_offset = snip->cond[i].end_offset;
612  cond = &snip->cond[i];
613  }
614  }
615  if (!cond) {
616  break;
617  }
618  /* check whether condtion is the first condition in snippet */
619  if (snip_result->tag_count == 0) {
620  /* skip condition if the number of rest snippet field is smaller than */
621  /* the number of unfound keywords. */
622  if (snip->max_results - *nresults <= unfound_cond_count && cond->count > 0) {
623  int_least8_t exclude_other_cond = 1;
624  for (i = 0; i < snip->cond_len; i++) {
625  if ((snip->cond + i) != cond
626  && snip->cond[i].end_offset <= cond->start_offset + snip->width
627  && snip->cond[i].count == 0) {
628  exclude_other_cond = 0;
629  }
630  }
631  if (exclude_other_cond) {
632  grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
633  continue;
634  }
635  }
636  snip_result->start_offset = cond->start_offset;
637  snip_result->first_tag_result_idx = snip->tag_count;
638  } else {
639  if (cond->start_offset >= snip_result->start_offset + snip->width) {
640  break;
641  }
642  /* check nesting to make valid HTML */
643  /* ToDo: allow <test><te>te</te><st>st</st></test> */
644  if (cond->start_offset < last_tag_end) {
645  grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
646  continue;
647  }
648  }
649  if (cond->end_offset > snip_result->start_offset + snip->width) {
650  /* If a keyword gets across a snippet, */
651  /* it was skipped and never to be tagged. */
652  cond->stopflag = SNIPCOND_ACROSS;
653  grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
654  } else {
655  found_cond = 1;
656  if (cond->count == 0) {
657  unfound_cond_count--;
658  }
659  cond->count++;
660  last_end_offset = cond->end_offset;
661 
662  tag_result->cond = cond;
663  tag_result->start_offset = cond->start_offset;
664  tag_result->end_offset = last_tag_end = cond->end_offset;
665 
666  snip_result->tag_count++;
667  tag_result++;
668  tagged_len += cond->opentag_len + cond->closetag_len;
669  if (++snip->tag_count >= MAX_SNIP_TAG_COUNT) {
670  break;
671  }
672  grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
673  }
674  }
675  if (!found_cond) {
676  break;
677  }
678  if (snip_result->start_offset + last_end_offset < snip->width) {
679  snip_result->start_offset = 0;
680  } else {
681  snip_result->start_offset =
682  MAX(MIN
683  ((snip_result->start_offset + last_end_offset - snip->width) / 2,
684  string_len - snip->width), last_last_end_offset);
685  }
686  snip_result->start_offset =
687  grn_snip_find_firstbyte(string, snip->encoding, snip_result->start_offset, 1);
688 
689  snip_result->end_offset = snip_result->start_offset + snip->width;
690  if (snip_result->end_offset < string_len) {
691  snip_result->end_offset =
692  grn_snip_find_firstbyte(string, snip->encoding, snip_result->end_offset, -1);
693  } else {
694  snip_result->end_offset = string_len;
695  }
696  last_last_end_offset = snip_result->end_offset;
697 
698  if (snip->mapping == (grn_snip_mapping *) -1) {
699  tagged_len +=
700  count_mapped_chars(&string[snip_result->start_offset],
701  &string[snip_result->end_offset]) + 1;
702  } else {
703  tagged_len += snip_result->end_offset - snip_result->start_offset + 1;
704  }
705 
706  *max_tagged_len = MAX(*max_tagged_len, tagged_len);
707 
708  snip_result->last_tag_result_idx = snip->tag_count - 1;
709  (*nresults)++;
710  snip_result++;
711 
712  if (*nresults == snip->max_results || snip->tag_count == MAX_SNIP_TAG_COUNT) {
713  break;
714  }
715  for (i = 0; i < snip->cond_len; i++) {
716  if (snip->cond[i].stopflag != SNIPCOND_STOP) {
717  all_stop = 0;
718  snip->cond[i].stopflag = SNIPCOND_NONSTOP;
719  }
720  }
721  if (all_stop) {
722  break;
723  }
724  }
725  }
726  snip->snip_count = *nresults;
727  snip->string = string;
728 
729  snip->max_tagged_len = *max_tagged_len;
730 
731  GRN_API_RETURN(ctx->rc);
732 }
733 
734 grn_rc
735 grn_snip_get_result(grn_ctx *ctx, grn_snip *snip, const unsigned int index, char *result, unsigned int *result_len)
736 {
737  char *p;
738  size_t i, j, k;
739  _snip_result *sres;
740 
741  if (snip->snip_count <= index || !snip->nstr) {
742  return GRN_INVALID_ARGUMENT;
743  }
744 
745  GRN_ASSERT(snip->snip_count != 0 && snip->tag_count != 0);
746 
748  sres = &snip->snip_result[index];
749  j = sres->first_tag_result_idx;
750  for (p = result, i = sres->start_offset; i < sres->end_offset; i++) {
751  for (; j <= sres->last_tag_result_idx && snip->tag_result[j].start_offset == i; j++) {
752  if (snip->tag_result[j].end_offset > sres->end_offset) {
753  continue;
754  }
755  memcpy(p, snip->tag_result[j].cond->opentag, snip->tag_result[j].cond->opentag_len);
756  p += snip->tag_result[j].cond->opentag_len;
757  }
758 
759  if (snip->mapping == GRN_SNIP_MAPPING_HTML_ESCAPE) {
760  switch (snip->string[i]) {
761  case '<':
762  *p++ = '&';
763  *p++ = 'l';
764  *p++ = 't';
765  *p++ = ';';
766  break;
767  case '>':
768  *p++ = '&';
769  *p++ = 'g';
770  *p++ = 't';
771  *p++ = ';';
772  break;
773  case '&':
774  *p++ = '&';
775  *p++ = 'a';
776  *p++ = 'm';
777  *p++ = 'p';
778  *p++ = ';';
779  break;
780  case '"':
781  *p++ = '&';
782  *p++ = 'q';
783  *p++ = 'u';
784  *p++ = 'o';
785  *p++ = 't';
786  *p++ = ';';
787  break;
788  default:
789  *p++ = snip->string[i];
790  break;
791  }
792  } else {
793  *p++ = snip->string[i];
794  }
795 
796  for (k = sres->last_tag_result_idx;
797  snip->tag_result[k].end_offset <= sres->end_offset; k--) {
798  /* TODO: avoid all loop */
799  if (snip->tag_result[k].end_offset == i + 1) {
800  memcpy(p, snip->tag_result[k].cond->closetag,
801  snip->tag_result[k].cond->closetag_len);
802  p += snip->tag_result[k].cond->closetag_len;
803  }
804  if (k <= sres->first_tag_result_idx) {
805  break;
806  }
807  };
808  }
809  *p = '\0';
810 
811  if(result_len) { *result_len = (unsigned int)(p - result); }
812  GRN_ASSERT((unsigned int)(p - result) <= snip->max_tagged_len);
813 
814  GRN_API_RETURN(ctx->rc);
815 }