24 #define MAX(a, b) ((a) > (b) ? (a) : (b))
28 #define MIN(a, b) ((a) < (b) ? (a) : (b))
32 grn_bm_check_euc(
const unsigned char *x,
const size_t y)
34 const unsigned char *p;
35 for (p = x + y - 1; p >= x && *p >= 0x80U; p--);
36 return (
int) ((x + y - p) & 1);
40 grn_bm_check_sjis(
const unsigned char *x,
const size_t y)
42 const unsigned char *p;
43 for (p = x + y - 1; p >= x; p--)
44 if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU))
46 return (
int) ((x + y - p) & 1);
74 grn_bm_preBmBc(
const unsigned char *x,
size_t m,
size_t *bmBc)
77 for (i = 0; i <
ASIZE; ++
i) {
80 for (i = 0; i < m - 1; ++
i) {
81 bmBc[(
unsigned int) x[i]] = m - (i + 1);
85 #define GRN_BM_COMPARE do { \
86 if (string_checks[found]) { \
87 size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \
89 for (i = cond->last_found; i < found; i++) { \
90 if (string_checks[i] > 0) { \
91 found_alpha_head = i; \
92 offset += string_checks[i]; \
96 if (string_checks[found] < 0) { \
97 offset -= string_checks[found_alpha_head]; \
98 cond->last_found = found_alpha_head; \
100 cond->last_found = found; \
102 cond->start_offset = cond->last_offset = offset; \
103 if (flags & GRN_SNIP_SKIP_LEADING_SPACES) { \
104 while (cond->start_offset < string_original_length_in_bytes && \
105 (i = grn_isspace(string_original + cond->start_offset, \
106 string_encoding))) { cond->start_offset += i; } \
108 for (i = cond->last_found; i < found + m; i++) { \
109 if (string_checks[i] > 0) { \
110 offset += string_checks[i]; \
113 cond->end_offset = offset; \
114 cond->found = found + shift; \
115 cond->found_alpha_head = found_alpha_head; \
121 #define GRN_BM_BM_COMPARE do { \
123 for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \
135 register unsigned char *limit, ck;
136 register const unsigned char *p, *cp;
137 register size_t *bmBc, delta1,
i;
139 const unsigned char *x;
143 const char *string_original;
144 unsigned int string_original_length_in_bytes;
145 const short *string_checks;
147 const char *string_norm, *keyword_norm;
151 &string_original, &string_original_length_in_bytes);
157 y = (
unsigned char *)string_norm;
159 if (n > cond->
found) {
161 p = memchr(y + cond->
found, keyword_norm[0], n - cond->
found);
171 x = (
unsigned char *)keyword_norm;
176 p = y + m + cond->
found;
181 if (n - cond->
found > 12 * m) {
182 limit = y + n - 11 * m;
185 if(!(delta1 = bmBc[p[-1]])) {
191 if(!(delta1 = bmBc[p[-1]])) {
197 if(!(delta1 = bmBc[p[-1]])) {
212 if (!(delta1 = bmBc[p[-1]])) {
222 count_mapped_chars(
const char *str,
const char *end)
228 for (p = str; p != end; p++) {
265 unsigned int norm_blen;
273 "grn_string_open on snip_cond_init failed!");
281 if (norm_blen != 1) {
282 grn_bm_preBmBc((
unsigned char *)norm, norm_blen, sc->
bmBc);
283 sc->
shift = sc->
bmBc[(
unsigned char)norm[norm_blen - 1]];
284 sc->
bmBc[(
unsigned char)norm[norm_blen - 1]] = 0;
303 grn_snip_strndup(
grn_ctx *ctx,
const char *
string,
unsigned int string_len)
308 if (!copied_string) {
311 memcpy(copied_string,
string, string_len);
312 copied_string[string_len]=
'\0';
313 return copied_string;
317 grn_snip_cond_set_tag(
grn_ctx *ctx,
318 const char **dest_tag,
size_t *dest_tag_len,
319 const char *tag,
unsigned int tag_len,
320 const char *default_tag,
unsigned int default_tag_len,
326 copied_tag = grn_snip_strndup(ctx, tag, tag_len);
330 *dest_tag = copied_tag;
334 *dest_tag_len = tag_len;
336 *dest_tag = default_tag;
337 *dest_tag_len = default_tag_len;
344 const char *keyword,
unsigned int keyword_len,
345 const char *opentag,
unsigned int opentag_len,
346 const char *closetag,
unsigned int closetag_len)
351 unsigned int norm_blen;
362 if (norm_blen > snip->
width) {
368 rc = grn_snip_cond_set_tag(ctx,
370 opentag, opentag_len,
378 rc = grn_snip_cond_set_tag(ctx,
380 closetag, closetag_len,
384 if (opentag && copy_tag) {
396 grn_snip_find_firstbyte(
const char *
string,
grn_encoding encoding,
size_t offset,
401 while (!(grn_bm_check_euc((
unsigned char *)
string, offset)))
405 if (!(grn_bm_check_sjis((
unsigned char *)
string, offset)))
409 while (
string[offset] <= (
char)0xc0)
419 grn_snip_set_default_tag(
grn_ctx *ctx,
420 const char **dest_tag,
size_t *dest_tag_len,
421 const char *tag,
unsigned int tag_len,
424 if (copy_tag && tag) {
426 copied_tag = grn_snip_strndup(ctx, tag, tag_len);
430 *dest_tag = copied_tag;
434 *dest_tag_len = tag_len;
440 unsigned int max_results,
441 const char *defaultopentag,
unsigned int defaultopentag_len,
442 const char *defaultclosetag,
unsigned int defaultclosetag_len,
465 if (grn_snip_set_default_tag(ctx,
468 defaultopentag, defaultopentag_len,
474 if (grn_snip_set_default_tag(ctx,
477 defaultclosetag, defaultclosetag_len,
516 for (cond = snip->
cond, cond_end = cond + snip->
cond_len;
517 cond < cond_end; cond++) {
535 for (i = snip->
cond_len, sc = snip->
cond; i; i--, sc++) {
545 for (cond = snip->
cond, cond_end = cond + snip->
cond_len;
546 cond < cond_end; cond++) {
565 unsigned int *nresults,
unsigned int *max_tagged_len)
570 if (!snip || !
string || !nresults || !max_tagged_len) {
574 exec_clean(ctx, snip);
579 exec_clean(ctx, snip);
583 for (i = 0; i < snip->
cond_len; i++) {
590 size_t last_end_offset = 0, last_last_end_offset = 0;
591 unsigned int unfound_cond_count = snip->
cond_len;
595 size_t tagged_len = 0, last_tag_end = 0;
600 size_t min_start_offset = (size_t) -1;
601 size_t max_end_offset = 0;
605 for (i = 0; i < snip->
cond_len; i++) {
609 max_end_offset < snip->cond[i].
end_offset))) {
612 cond = &snip->
cond[
i];
622 if (snip->
max_results - *nresults <= unfound_cond_count && cond->count > 0) {
624 for (i = 0; i < snip->
cond_len; i++) {
625 if ((snip->
cond + i) != cond
628 exclude_other_cond = 0;
631 if (exclude_other_cond) {
656 if (cond->
count == 0) {
657 unfound_cond_count--;
662 tag_result->
cond = cond;
678 if (snip_result->
start_offset + last_end_offset < snip->width) {
684 string_len - snip->
width), last_last_end_offset);
696 last_last_end_offset = snip_result->
end_offset;
706 *max_tagged_len =
MAX(*max_tagged_len, tagged_len);
715 for (i = 0; i < snip->
cond_len; i++) {
750 for (p = result, i = sres->
start_offset; i < sres->end_offset; i++) {
760 switch (snip->
string[i]) {
804 if (k <= sres->first_tag_result_idx) {
811 if(result_len) { *result_len = (
unsigned int)(p - result); }