MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fts0ast.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "mem0mem.h"
27 #include "fts0ast.h"
28 #include "fts0pars.h"
29 #include "fts0fts.h"
30 
31 /* The FTS ast visit pass. */
40 };
41 
42 /******************************************************************/
45 static
47 fts_ast_node_create(void)
48 /*=====================*/
49 {
50  fts_ast_node_t* node;
51 
52  node = (fts_ast_node_t*) ut_malloc(sizeof(*node));
53  memset(node, 0x0, sizeof(*node));
54 
55  return(node);
56 }
57 
58 /******************************************************************/
61 UNIV_INTERN
64 /*=====================*/
65  void* arg,
66  fts_ast_oper_t oper)
67 {
68  fts_ast_node_t* node = fts_ast_node_create();
69 
70  node->type = FTS_AST_OPER;
71  node->oper = oper;
72 
74 
75  return(node);
76 }
77 
78 /******************************************************************/
82 UNIV_INTERN
85 /*=====================*/
86  void* arg,
87  const char* ptr)
88 {
89  fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg);
90  ulint len = strlen(ptr);
91  ulint cur_pos = 0;
92  fts_ast_node_t* node = NULL;
93  fts_ast_node_t* node_list = NULL;
94  fts_ast_node_t* first_node = NULL;
95 
96  /* Scan the incoming string and filter out any "non-word" characters */
97  while (cur_pos < len) {
98  fts_string_t str;
99  ulint offset;
100  ulint cur_len;
101 
103  state->charset,
104  reinterpret_cast<const byte*>(ptr) + cur_pos,
105  reinterpret_cast<const byte*>(ptr) + len, &str, &offset);
106 
107  if (cur_len == 0) {
108  break;
109  }
110 
111  cur_pos += cur_len;
112 
113  if (str.f_n_char > 0) {
114  /* If the subsequent term (after the first one)'s size
115  is less than fts_min_token_size, we shall ignore
116  that. This is to make consistent with MyISAM behavior */
117  if (first_node && (str.f_n_char < fts_min_token_size)) {
118  continue;
119  }
120 
121  node = fts_ast_node_create();
122 
123  node->type = FTS_AST_TERM;
124 
125  node->term.ptr = static_cast<byte*>(ut_malloc(
126  str.f_len + 1));
127  memcpy(node->term.ptr, str.f_str, str.f_len);
128  node->term.ptr[str.f_len] = '\0';
129 
131  static_cast<fts_ast_state_t*>(arg), node);
132 
133  if (first_node) {
134  /* There is more than one word, create
135  a list to organize them */
136  if (!node_list) {
137  node_list = fts_ast_create_node_list(
138  static_cast<fts_ast_state_t*>(
139  arg),
140  first_node);
141  }
142 
143  fts_ast_add_node(node_list, node);
144  } else {
145  first_node = node;
146  }
147  }
148  }
149 
150  return((node_list != NULL) ? node_list : first_node);
151 }
152 
153 /******************************************************************/
157 UNIV_INTERN
160 /*=====================*/
161  void* arg,
162  const char* ptr)
163 {
164  ulint len = strlen(ptr);
165  fts_ast_node_t* node = NULL;
166 
167 
168  ut_ad(len >= 1);
169 
170  if (len <= 2) {
171  /* There is a way to directly supply null terminator
172  in the query string (by using 0x220022) and get here,
173  and certainly it would not make a valid query text */
174  ut_ad(ptr[0] == '\"');
175 
176  if (len == 2) {
177  ut_ad(ptr[1] == '\"');
178  }
179 
180  return(NULL);
181  }
182 
183  node = fts_ast_node_create();
184 
186  len -= 2;
187 
188  node->type = FTS_AST_TEXT;
189  node->text.ptr = static_cast<byte*>(ut_malloc(len + 1));
190 
192  memcpy(node->text.ptr, ptr + 1, len);
193  node->text.ptr[len] = 0;
194  node->text.distance = ULINT_UNDEFINED;
195 
197 
198  return(node);
199 }
200 
201 /******************************************************************/
205 UNIV_INTERN
208 /*=====================*/
209  void* arg,
210  fts_ast_node_t* expr)
211 {
212  fts_ast_node_t* node = fts_ast_node_create();
213 
214  node->type = FTS_AST_LIST;
215  node->list.head = node->list.tail = expr;
216 
218 
219  return(node);
220 }
221 
222 /******************************************************************/
226 UNIV_INTERN
229 /*============================*/
230  void* arg,
231  fts_ast_node_t* expr)
232 {
233  fts_ast_node_t* node = fts_ast_node_create();
234 
235  node->type = FTS_AST_SUBEXP_LIST;
236  node->list.head = node->list.tail = expr;
237 
239 
240  return(node);
241 }
242 
243 /******************************************************************/
245 static
246 void
247 fts_ast_free_list(
248 /*==============*/
249  fts_ast_node_t* node)
250 {
251  ut_a(node->type == FTS_AST_LIST
252  || node->type == FTS_AST_SUBEXP_LIST);
253 
254  for (node = node->list.head;
255  node != NULL;
256  node = fts_ast_free_node(node)) {
257 
259  }
260 }
261 
262 /********************************************************************/
265 UNIV_INTERN
268 /*==============*/
269  fts_ast_node_t* node)
270 {
271  fts_ast_node_t* next_node;
272 
273  switch (node->type) {
274  case FTS_AST_TEXT:
275  if (node->text.ptr) {
276  ut_free(node->text.ptr);
277  node->text.ptr = NULL;
278  }
279  break;
280 
281  case FTS_AST_TERM:
282  if (node->term.ptr) {
283  ut_free(node->term.ptr);
284  node->term.ptr = NULL;
285  }
286  break;
287 
288  case FTS_AST_LIST:
289  case FTS_AST_SUBEXP_LIST:
290  fts_ast_free_list(node);
291  node->list.head = node->list.tail = NULL;
292  break;
293 
294  case FTS_AST_OPER:
295  break;
296 
297  default:
298  ut_error;
299  }
300 
302  next_node = node->next;
303 
304  ut_free(node);
305 
306  return(next_node);
307 }
308 
309 /******************************************************************/
313 UNIV_INTERN
316 /*=============*/
317  fts_ast_node_t* node,
318  fts_ast_node_t* elem)
319 {
320  if (!elem) {
321  return(NULL);
322  }
323 
324  ut_a(!elem->next);
325  ut_a(node->type == FTS_AST_LIST
326  || node->type == FTS_AST_SUBEXP_LIST);
327 
328  if (!node->list.head) {
329  ut_a(!node->list.tail);
330 
331  node->list.head = node->list.tail = elem;
332  } else {
333  ut_a(node->list.tail);
334 
335  node->list.tail->next = elem;
336  node->list.tail = elem;
337  }
338 
339  return(node);
340 }
341 
342 /******************************************************************/
345 UNIV_INTERN
346 void
348 /*===================*/
349  fts_ast_state_t*state,
350  fts_ast_node_t* node)
351 {
352  if (!state->list.head) {
353  ut_a(!state->list.tail);
354 
355  state->list.head = state->list.tail = node;
356  } else {
357  state->list.tail->next_alloc = node;
358  state->list.tail = node;
359  }
360 }
361 
362 /******************************************************************/
364 UNIV_INTERN
365 void
367 /*======================*/
368  fts_ast_node_t* node)
370 {
371  if (!node) {
372  return;
373  }
374 
375  /* If it's a node list, the wildcard should be set to the tail node*/
376  if (node->type == FTS_AST_LIST) {
377  ut_ad(node->list.tail != NULL);
378  node = node->list.tail;
379  }
380 
381  ut_a(node->type == FTS_AST_TERM);
382  ut_a(!node->term.wildcard);
383 
384  node->term.wildcard = TRUE;
385 }
386 
387 /******************************************************************/
389 UNIV_INTERN
390 void
392 /*======================*/
393  fts_ast_node_t* node,
394  ulint distance)
396 {
397  ut_a(node->type == FTS_AST_TEXT);
398  ut_a(node->text.distance == ULINT_UNDEFINED);
399 
400  node->text.distance = distance;
401 }
402 
403 /******************************************************************/
405 UNIV_INTERN
406 void
408 /*===============*/
409  fts_ast_state_t*state)
410 {
411  fts_ast_node_t* node = state->list.head;
412 
413  /* Free the nodes that were allocated during parsing. */
414  while (node) {
415  fts_ast_node_t* next = node->next_alloc;
416 
417  if (node->type == FTS_AST_TEXT && node->text.ptr) {
418  ut_free(node->text.ptr);
419  node->text.ptr = NULL;
420  } else if (node->type == FTS_AST_TERM && node->term.ptr) {
421  ut_free(node->term.ptr);
422  node->term.ptr = NULL;
423  }
424 
425  ut_free(node);
426  node = next;
427  }
428 
429  state->root = state->list.head = state->list.tail = NULL;
430 }
431 
432 /******************************************************************/
434 UNIV_INTERN
435 void
437 /*===============*/
438  fts_ast_node_t* node)
439 {
440  switch (node->type) {
441  case FTS_AST_TEXT:
442  printf("TEXT: %s\n", node->text.ptr);
443  break;
444 
445  case FTS_AST_TERM:
446  printf("TERM: %s\n", node->term.ptr);
447  break;
448 
449  case FTS_AST_LIST:
450  printf("LIST: ");
451  node = node->list.head;
452 
453  while (node) {
454  fts_ast_node_print(node);
455  node = node->next;
456  }
457  break;
458 
459  case FTS_AST_SUBEXP_LIST:
460  printf("SUBEXP_LIST: ");
461  node = node->list.head;
462 
463  while (node) {
464  fts_ast_node_print(node);
465  node = node->next;
466  }
467  case FTS_AST_OPER:
468  printf("OPER: %d\n", node->oper);
469  break;
470 
471  default:
472  ut_error;
473  }
474 }
475 
476 /******************************************************************/
481 UNIV_INTERN
482 dberr_t
484 /*==========*/
485  fts_ast_oper_t oper,
486  fts_ast_node_t* node,
487  fts_ast_callback visitor,
488  void* arg,
489  bool* has_ignore)
493 {
494  dberr_t error = DB_SUCCESS;
495  fts_ast_node_t* oper_node = NULL;
496  fts_ast_node_t* start_node;
497  bool revisit = false;
498  bool will_be_ignored = false;
500 
501  start_node = node->list.head;
502 
503  ut_a(node->type == FTS_AST_LIST
504  || node->type == FTS_AST_SUBEXP_LIST);
505 
506  if (oper == FTS_EXIST_SKIP) {
507  visit_pass = FTS_PASS_EXIST;
508  } else if (oper == FTS_IGNORE_SKIP) {
509  visit_pass = FTS_PASS_IGNORE;
510  }
511 
512  /* In the first pass of the tree, at the leaf level of the
513  tree, FTS_EXIST and FTS_IGNORE operation will be ignored.
514  It will be repeated at the level above the leaf level.
515 
516  The basic idea here is that when we encounter FTS_EXIST or
517  FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP
518  or FTS_IGNORE_SKIP, and term node & text node with the operators
519  is ignored in the first pass. We have two passes during the revisit:
520  We process nodes with FTS_EXIST_SKIP in the exist pass, and then
521  process nodes with FTS_IGNORE_SKIP in the ignore pass.
522 
523  The order should be restrictly followed, or we will get wrong results.
524  For example, we have a query 'a +b -c d +e -f'.
525  first pass: process 'a' and 'd' by union;
526  exist pass: process '+b' and '+e' by intersection;
527  ignore pass: process '-c' and '-f' by difference. */
528 
529  for (node = node->list.head;
530  node && (error == DB_SUCCESS);
531  node = node->next) {
532 
533  switch(node->type) {
534  case FTS_AST_LIST:
535  if (visit_pass != FTS_PASS_FIRST) {
536  break;
537  }
538 
539  error = fts_ast_visit(oper, node, visitor,
540  arg, &will_be_ignored);
541 
542  /* If will_be_ignored is set to true, then
543  we encountered and ignored a FTS_EXIST or FTS_IGNORE
544  operator. */
545  if (will_be_ignored) {
546  revisit = true;
547  /* Remember oper for list in case '-abc&def',
548  ignored oper is from previous node of list.*/
549  node->oper = oper;
550  }
551 
552  break;
553 
554  case FTS_AST_SUBEXP_LIST:
555  if (visit_pass != FTS_PASS_FIRST) {
556  break;
557  }
558 
559  error = fts_ast_visit_sub_exp(node, visitor, arg);
560  break;
561 
562  case FTS_AST_OPER:
563  oper = node->oper;
564  oper_node = node;
565 
566  /* Change the operator for revisit */
567  if (oper == FTS_EXIST) {
568  oper_node->oper = FTS_EXIST_SKIP;
569  } else if (oper == FTS_IGNORE) {
570  oper_node->oper = FTS_IGNORE_SKIP;
571  }
572 
573  break;
574 
575  default:
576  if (node->visited) {
577  continue;
578  }
579 
580  ut_a(oper == FTS_NONE || !oper_node
581  || oper_node->oper == oper
582  || oper_node->oper == FTS_EXIST_SKIP
583  || oper_node->oper == FTS_IGNORE_SKIP);
584 
585  if (oper== FTS_EXIST || oper == FTS_IGNORE) {
586  *has_ignore = true;
587  continue;
588  }
589 
590  /* Process leaf node accroding to its pass.*/
591  if (oper == FTS_EXIST_SKIP
592  && visit_pass == FTS_PASS_EXIST) {
593  error = visitor(FTS_EXIST, node, arg);
594  node->visited = true;
595  } else if (oper == FTS_IGNORE_SKIP
596  && visit_pass == FTS_PASS_IGNORE) {
597  error = visitor(FTS_IGNORE, node, arg);
598  node->visited = true;
599  } else if (visit_pass == FTS_PASS_FIRST) {
600  error = visitor(oper, node, arg);
601  node->visited = true;
602  }
603  }
604  }
605 
606  if (revisit) {
607  /* Exist pass processes the skipped FTS_EXIST operation. */
608  for (node = start_node;
609  node && error == DB_SUCCESS;
610  node = node->next) {
611 
612  if (node->type == FTS_AST_LIST
613  && node->oper != FTS_IGNORE) {
614  error = fts_ast_visit(FTS_EXIST_SKIP, node,
615  visitor, arg, &will_be_ignored);
616  }
617  }
618 
619  /* Ignore pass processes the skipped FTS_IGNORE operation. */
620  for (node = start_node;
621  node && error == DB_SUCCESS;
622  node = node->next) {
623 
624  if (node->type == FTS_AST_LIST) {
625  error = fts_ast_visit(FTS_IGNORE_SKIP, node,
626  visitor, arg, &will_be_ignored);
627  }
628  }
629  }
630 
631  return(error);
632 }