28 #ifndef HAVE_STRNCASECMP
29 # ifdef HAVE__STRNICMP
30 # define strncasecmp(s1,s2,n) _strnicmp(s1,s2,n)
34 #define MAX_SYNONYM_BYTES 4096
39 static char *win32_synonyms_file = NULL;
43 if (!win32_synonyms_file) {
48 size_t base_dir_length;
51 base_dir_length = strlen(base_dir);
53 malloc(base_dir_length + strlen(
"/") + strlen(relative_path) + 1);
54 strcpy(synonyms_file, base_dir);
55 strcat(synonyms_file,
"/");
56 strcat(synonyms_file, relative_path);
57 win32_synonyms_file = synonyms_file;
59 return win32_synonyms_file;
71 is_comment_mark(
char character)
73 return character ==
'#';
77 detect_coding_part(
grn_ctx *ctx,
const char *line,
size_t line_length)
80 grn_obj null_terminated_line_buffer;
82 const char *coding_part_keyword =
"coding: ";
83 const char *coding_part;
84 const char *encoding_name;
87 GRN_TEXT_PUT(ctx, &null_terminated_line_buffer, line, line_length);
91 coding_part = strstr(c_line, coding_part_keyword);
93 encoding_name = coding_part + strlen(coding_part_keyword);
94 if (strncasecmp(encoding_name,
"utf-8", strlen(
"utf-8")) == 0 ||
95 strncasecmp(encoding_name,
"utf8", strlen(
"utf8")) == 0) {
97 }
else if (strncasecmp(encoding_name,
"sjis", strlen(
"sjis")) == 0 ||
98 strncasecmp(encoding_name,
"Shift_JIS", strlen(
"Shift_JIS")) == 0) {
100 }
else if (strncasecmp(encoding_name,
"EUC-JP", strlen(
"EUC-JP")) == 0 ||
101 strncasecmp(encoding_name,
"euc_jp", strlen(
"euc_jp")) == 0) {
103 }
else if (strncasecmp(encoding_name,
"latin1", strlen(
"latin1")) == 0) {
105 }
else if (strncasecmp(encoding_name,
"KOI8-R", strlen(
"KOI8-R")) == 0 ||
106 strncasecmp(encoding_name,
"koi8r", strlen(
"koi8r")) == 0) {
118 guess_encoding(
grn_ctx *ctx,
const char **line,
size_t *line_length)
120 const char bom[] = {0xef, 0xbb, 0xbf};
121 size_t bom_length =
sizeof(bom);
123 if (*line_length >= bom_length && memcmp(*line, bom, bom_length) == 0) {
125 *line_length -= bom_length;
129 if (!is_comment_mark((*line)[0])) {
133 return detect_coding_part(ctx, (*line) + 1, (*line_length) - 1);
137 parse_synonyms_file_line(
grn_ctx *ctx,
const char *line,
int line_length,
142 if (is_comment_mark(line[i])) {
146 while (i < line_length) {
147 char character = line[
i];
149 if (character ==
'\t') {
155 if (i == line_length) {
160 while (i < line_length) {
161 char character = line[
i];
163 if (character ==
'\t') {
173 void *value_location = NULL;
176 &value_location, NULL);
179 "[plugin][query-expander][tsv] "
180 "failed to register key: <%.*s>",
200 path = getenv(
"GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE");
204 file = fopen(path,
"r");
207 "[plugin][query-expander][tsv] "
208 "synonyms file doesn't exist: <%s>",
223 if (number_of_lines == 1) {
224 encoding = guess_encoding(ctx, &line_value, &line_length);
228 parse_synonyms_file_line(ctx, line_value, line_length, &key, &value);
249 expanded_term = args[1];
254 const char *query = value;
288 func_query_expander_tsv, NULL, NULL,