MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ft_boolean_search.c
1 /* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17 
18 /* TODO: add caching - pre-read several index entries at once */
19 
20 /*
21  Added optimization for full-text queries with plus-words. It was
22  implemented by sharing maximal document id (max_docid) variable
23  inside plus subtree. max_docid could be used by any word in plus
24  subtree, but it could be updated by plus-word only.
25 
26  Fulltext "smarter index merge" optimization assumes that rows
27  it gets are ordered by doc_id. That is not the case when we
28  search for a word with truncation operator. It may return
29  rows in random order. Thus we may not use "smarter index merge"
30  optimization with "trunc-words".
31 
32  The idea is: there is no need to search for docid smaller than
33  biggest docid inside current plus subtree or any upper plus subtree.
34 
35  Examples:
36  +word1 word2
37  share same max_docid
38  max_docid updated by word1
39  +word1 +(word2 word3)
40  share same max_docid
41  max_docid updated by word1
42  +(word1 -word2) +(+word3 word4)
43  share same max_docid
44  max_docid updated by word3
45  +word1 word2 (+word3 word4 (+word5 word6))
46  three subexpressions (including the top-level one),
47  every one has its own max_docid, updated by its plus word.
48  but for the search word6 uses
49  max(word1.max_docid, word3.max_docid, word5.max_docid),
50  while word4 uses, accordingly,
51  max(word1.max_docid, word3.max_docid).
52 */
53 
54 #define FT_CORE
55 #include "ftdefs.h"
56 
57 /* search with boolean queries */
58 
59 static double _wghts[11]=
60 {
61  0.131687242798354,
62  0.197530864197531,
63  0.296296296296296,
64  0.444444444444444,
65  0.666666666666667,
66  1.000000000000000,
67  1.500000000000000,
68  2.250000000000000,
69  3.375000000000000,
70  5.062500000000000,
71  7.593750000000000};
72 static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
73 
74 static double _nwghts[11]=
75 {
76  -0.065843621399177,
77  -0.098765432098766,
78  -0.148148148148148,
79  -0.222222222222222,
80  -0.333333333333334,
81  -0.500000000000000,
82  -0.750000000000000,
83  -1.125000000000000,
84  -1.687500000000000,
85  -2.531250000000000,
86  -3.796875000000000};
87 static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
88 
89 #define FTB_FLAG_TRUNC 1
90 /* At most one of the following flags can be set */
91 #define FTB_FLAG_YES 2
92 #define FTB_FLAG_NO 4
93 #define FTB_FLAG_WONLY 8
94 
95 #define CMP_NUM(a,b) (((a) < (b)) ? -1 : ((a) == (b)) ? 0 : 1)
96 
97 typedef struct st_ftb_expr FTB_EXPR;
99 {
100  FTB_EXPR *up;
101  uint flags;
102 /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
103  my_off_t docid[2];
104  my_off_t max_docid;
105  float weight;
106  float cur_weight;
107  LIST *phrase; /* phrase words */
108  LIST *document; /* for phrase search */
109  uint yesses; /* number of "yes" words matched */
110  uint nos; /* number of "no" words matched */
111  uint ythresh; /* number of "yes" words in expr */
112  uint yweaks; /* number of "yes" words for scan only */
113 };
114 
115 typedef struct st_ftb_word
116 {
117  FTB_EXPR *up;
118  uint flags;
119 /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
120  my_off_t docid[2]; /* for index search and for scan */
121  my_off_t key_root;
122  FTB_EXPR *max_docid_expr;
123  MI_KEYDEF *keyinfo;
124  struct st_ftb_word *prev;
125  float weight;
126  uint ndepth;
127  uint len;
128  uchar off;
129  uchar word[1];
130 } FTB_WORD;
131 
132 typedef struct st_ft_info
133 {
134  struct _ft_vft *please;
135  MI_INFO *info;
136  const CHARSET_INFO *charset;
137  FTB_EXPR *root;
138  FTB_WORD **list;
139  FTB_WORD *last_word;
140  MEM_ROOT mem_root;
141  QUEUE queue;
142  TREE no_dupes;
143  my_off_t lastpos;
144  uint keynr;
145  uchar with_scan;
146  enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
147 } FTB;
148 
149 static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
150 {
151  int i;
152 
153  /* if a==curdoc, take it as a < b */
154  if (v && a->docid[0] == *v)
155  return -1;
156 
157  /* ORDER BY docid, ndepth DESC */
158  i=CMP_NUM(a->docid[0], b->docid[0]);
159  if (!i)
160  i=CMP_NUM(b->ndepth,a->ndepth);
161  return i;
162 }
163 
164 static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
165 {
166  /* ORDER BY word, ndepth */
167  int i= ha_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
168  (uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0);
169  if (!i)
170  i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
171  return i;
172 }
173 
174 
175 typedef struct st_my_ftb_param
176 {
177  FTB *ftb;
178  FTB_EXPR *ftbe;
179  uchar *up_quot;
180  uint depth;
181 } MY_FTB_PARAM;
182 
183 
184 static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
185  char *word, int word_len,
187 {
188  MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
189  FTB_WORD *ftbw;
190  FTB_EXPR *ftbe, *tmp_expr;
191  FT_WORD *phrase_word;
192  LIST *tmp_element;
193  int r= info->weight_adjust;
194  float weight= (float)
195  (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
196 
197  switch (info->type) {
198  case FT_TOKEN_WORD:
199  ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
200  sizeof(FTB_WORD) +
201  (info->trunc ? MI_MAX_KEY_BUFF :
202  word_len * ftb_param->ftb->charset->mbmaxlen +
203  HA_FT_WLEN +
204  ftb_param->ftb->info->s->rec_reflength));
205  ftbw->len= word_len + 1;
206  ftbw->flags= 0;
207  ftbw->off= 0;
208  if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
209  if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
210  if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
211  ftbw->weight= weight;
212  ftbw->up= ftb_param->ftbe;
213  ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
214  ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
215  ftbw->key_root= HA_OFFSET_ERROR;
216  memcpy(ftbw->word + 1, word, word_len);
217  ftbw->word[0]= word_len;
218  if (info->yesno > 0) ftbw->up->ythresh++;
219  ftb_param->ftb->queue.max_elements++;
220  ftbw->prev= ftb_param->ftb->last_word;
221  ftb_param->ftb->last_word= ftbw;
222  ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
223  for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
224  if (! (tmp_expr->flags & FTB_FLAG_YES))
225  break;
226  ftbw->max_docid_expr= tmp_expr;
227  /* fall through */
228  case FT_TOKEN_STOPWORD:
229  if (! ftb_param->up_quot) break;
230  phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
231  tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
232  phrase_word->pos= (uchar*) word;
233  phrase_word->len= word_len;
234  tmp_element->data= (void *)phrase_word;
235  ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
236  /* Allocate document list at this point.
237  It allows to avoid huge amount of allocs/frees for each row.*/
238  tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
239  tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
240  ftb_param->ftbe->document=
241  list_add(ftb_param->ftbe->document, tmp_element);
242  break;
243  case FT_TOKEN_LEFT_PAREN:
244  ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
245  ftbe->flags= 0;
246  if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
247  if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
248  ftbe->weight= weight;
249  ftbe->up= ftb_param->ftbe;
250  ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
251  ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
252  ftbe->phrase= NULL;
253  ftbe->document= 0;
254  if (info->quot) ftb_param->ftb->with_scan|= 2;
255  if (info->yesno > 0) ftbe->up->ythresh++;
256  ftb_param->ftbe= ftbe;
257  ftb_param->depth++;
258  ftb_param->up_quot= (uchar*) info->quot;
259  break;
260  case FT_TOKEN_RIGHT_PAREN:
261  if (ftb_param->ftbe->document)
262  {
263  /* Circuit document list */
264  for (tmp_element= ftb_param->ftbe->document;
265  tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
266  tmp_element->next= ftb_param->ftbe->document;
267  ftb_param->ftbe->document->prev= tmp_element;
268  }
269  info->quot= 0;
270  if (ftb_param->ftbe->up)
271  {
272  DBUG_ASSERT(ftb_param->depth);
273  ftb_param->ftbe= ftb_param->ftbe->up;
274  ftb_param->depth--;
275  ftb_param->up_quot= 0;
276  }
277  break;
278  case FT_TOKEN_EOF:
279  default:
280  break;
281  }
282  return(0);
283 }
284 
285 
286 static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
287  char *query, int len)
288 {
289  MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
291  const CHARSET_INFO *cs= ftb_param->ftb->charset;
292  uchar **start= (uchar**) &query;
293  uchar *end= (uchar*) query + len;
294  FT_WORD w;
295 
296  info.prev= ' ';
297  info.quot= 0;
298  while (ft_get_word(cs, start, end, &w, &info))
299  param->mysql_add_word(param, (char*) w.pos, w.len, &info);
300  return(0);
301 }
302 
303 
304 static int _ftb_parse_query(FTB *ftb, uchar *query, uint len,
305  struct st_mysql_ftparser *parser)
306 {
307  MYSQL_FTPARSER_PARAM *param;
308  MY_FTB_PARAM ftb_param;
309  DBUG_ENTER("_ftb_parse_query");
310  DBUG_ASSERT(parser);
311 
312  if (ftb->state != UNINITIALIZED)
313  DBUG_RETURN(0);
314  if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
315  DBUG_RETURN(1);
316 
317  ftb_param.ftb= ftb;
318  ftb_param.depth= 0;
319  ftb_param.ftbe= ftb->root;
320  ftb_param.up_quot= 0;
321 
322  param->mysql_parse= ftb_parse_query_internal;
323  param->mysql_add_word= ftb_query_add_word;
324  param->mysql_ftparam= (void *)&ftb_param;
325  param->cs= ftb->charset;
326  param->doc= (char*) query;
327  param->length= len;
328  param->flags= 0;
329  param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
330  DBUG_RETURN(parser->parse(param));
331 }
332 
333 
334 static int _ftb_no_dupes_cmp(const void* not_used __attribute__((unused)),
335  const void *a,const void *b)
336 {
337  return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
338 }
339 
340 /*
341  When performing prefix search (a word with truncation operator), we
342  must preserve original prefix to ensure that characters which may be
343  expanded/contracted do not break the prefix. This is done by storing
344  newly found key immediatly after the original word in ftbw->word
345  buffer.
346 
347  ftbw->word= LENGTH WORD [ LENGTH1 WORD1 ] WEIGHT REFERENCE
348  LENGTH - 1 byte, length of the WORD
349  WORD - LENGTH bytes, the word itself
350  LENGTH1 - 1 byte, length of the WORD1, present in case of prefix search
351  WORD1 - LENGTH bytes, the word itself, present in case of prefix search
352  WEIGHT - 4 bytes (HA_FT_WLEN), either weight or number of subkeys
353  REFERENCE - rec_reflength bytes, pointer to the record
354 
355  returns 1 if the search was finished (must-word wasn't found)
356 */
357 static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
358 {
359  int r;
360  int subkeys=1;
361  my_bool can_go_down;
362  MI_INFO *info=ftb->info;
363  uint UNINIT_VAR(off), extra= HA_FT_WLEN + info->s->rec_reflength;
364  uchar *lastkey_buf=ftbw->word+ftbw->off;
365 
366  if (ftbw->flags & FTB_FLAG_TRUNC)
367  lastkey_buf+=ftbw->len;
368 
369  if (init_search)
370  {
371  ftbw->key_root=info->s->state.key_root[ftb->keynr];
372  ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
373 
374  r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len,
375  SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
376  }
377  else
378  {
379  uint sflag= SEARCH_BIGGER;
380  my_off_t max_docid=0;
381  FTB_EXPR *tmp;
382 
383  for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
384  set_if_bigger(max_docid, tmp->max_docid);
385 
386  if (ftbw->docid[0] < max_docid)
387  {
388  sflag|= SEARCH_SAME;
389  _mi_dpointer(info, (uchar*) (lastkey_buf + HA_FT_WLEN +
390  (ftbw->off ? 0 : lastkey_buf[0] + 1)),
391  max_docid);
392  }
393  r=_mi_search(info, ftbw->keyinfo, (uchar*) lastkey_buf,
394  USE_WHOLE_KEY, sflag, ftbw->key_root);
395  }
396 
397  can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
398  /* Skip rows inserted by concurrent insert */
399  while (!r)
400  {
401  if (can_go_down)
402  {
403  /* going down ? */
404  off=info->lastkey_length-extra;
405  subkeys=ft_sintXkorr(info->lastkey+off);
406  }
407  if (subkeys<0 || info->lastpos < info->state->data_file_length)
408  break;
409  r= _mi_search_next(info, ftbw->keyinfo, info->lastkey,
410  info->lastkey_length,
411  SEARCH_BIGGER, ftbw->key_root);
412  }
413 
414  if (!r && !ftbw->off)
415  {
416  r= ha_compare_text(ftb->charset,
417  info->lastkey+1,
418  info->lastkey_length-extra-1,
419  (uchar*) ftbw->word+1,
420  ftbw->len-1,
421  (my_bool) (ftbw->flags & FTB_FLAG_TRUNC),0);
422  }
423 
424  if (r) /* not found */
425  {
426  if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
427  {
428  ftbw->docid[0]=HA_OFFSET_ERROR;
429  if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
430  {
431  /*
432  This word MUST BE present in every document returned,
433  so we can stop the search right now
434  */
435  ftb->state=INDEX_DONE;
436  return 1; /* search is done */
437  }
438  else
439  return 0;
440  }
441 
442  /*
443  Going up to the first-level tree to continue search there.
444  Only done when performing prefix search.
445 
446  Key buffer data pointer as well as docid[0] may be smaller
447  than values we got while searching first-level tree. Thus
448  they must be restored to original values to avoid dead-loop,
449  when subsequent search for a bigger value eventually ends up
450  in this same second-level tree.
451  */
452  _mi_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
453  ftbw->docid[0]= ftbw->key_root;
454  ftbw->key_root=info->s->state.key_root[ftb->keynr];
455  ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
456  ftbw->off=0;
457  return _ft2_search_no_lock(ftb, ftbw, 0);
458  }
459 
460  /* matching key found */
461  memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
462  if (lastkey_buf == ftbw->word)
463  ftbw->len=info->lastkey_length-extra;
464 
465  /* going down ? */
466  if (subkeys<0)
467  {
468  /*
469  yep, going down, to the second-level tree
470  TODO here: subkey-based optimization
471  */
472  ftbw->off=off;
473  ftbw->key_root=info->lastpos;
474  ftbw->keyinfo=& info->s->ft2_keyinfo;
475  r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root);
476  DBUG_ASSERT(r==0); /* found something */
477  memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
478  }
479  ftbw->docid[0]=info->lastpos;
480  if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
481  ftbw->max_docid_expr->max_docid= info->lastpos;
482  return 0;
483 }
484 
485 static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
486 {
487  int r;
488  MYISAM_SHARE *share= ftb->info->s;
489  if (share->concurrent_insert)
490  mysql_rwlock_rdlock(&share->key_root_lock[ftb->keynr]);
491  r= _ft2_search_no_lock(ftb, ftbw, init_search);
492  if (share->concurrent_insert)
493  mysql_rwlock_unlock(&share->key_root_lock[ftb->keynr]);
494  return r;
495 }
496 
497 static void _ftb_init_index_search(FT_INFO *ftb)
498 {
499  int i;
500  FTB_WORD *ftbw;
501 
502  if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
503  return;
504  ftb->state=INDEX_SEARCH;
505 
506  for (i=ftb->queue.elements; i; i--)
507  {
508  ftbw=(FTB_WORD *)(ftb->queue.root[i]);
509 
510  if (ftbw->flags & FTB_FLAG_TRUNC)
511  {
512  /*
513  special treatment for truncation operator
514  1. there are some (besides this) +words
515  | no need to search in the index, it can never ADD new rows
516  | to the result, and to remove half-matched rows we do scan anyway
517  2. -trunc*
518  | same as 1.
519  3. in 1 and 2, +/- need not be on the same expr. level,
520  but can be on any upper level, as in +word +(trunc1* trunc2*)
521  4. otherwise
522  | We have to index-search for this prefix.
523  | It may cause duplicates, as in the index (sorted by <word,docid>)
524  | <aaaa,row1>
525  | <aabb,row2>
526  | <aacc,row1>
527  | Searching for "aa*" will find row1 twice...
528  */
529  FTB_EXPR *ftbe;
530  for (ftbe=(FTB_EXPR*)ftbw;
531  ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
532  ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
533  {
534  if (ftbe->flags & FTB_FLAG_NO || /* 2 */
535  ftbe->up->ythresh - ftbe->up->yweaks >
536  (uint) test(ftbe->flags & FTB_FLAG_YES)) /* 1 */
537  {
538  FTB_EXPR *top_ftbe=ftbe->up;
539  ftbw->docid[0]=HA_OFFSET_ERROR;
540  for (ftbe=(FTB_EXPR *)ftbw;
541  ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
542  ftbe=ftbe->up)
543  ftbe->up->yweaks++;
544  ftbe=0;
545  break;
546  }
547  }
548  if (!ftbe)
549  continue;
550  /* 4 */
551  if (!is_tree_inited(& ftb->no_dupes))
552  init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
553  _ftb_no_dupes_cmp,0,0,0);
554  else
555  reset_tree(& ftb->no_dupes);
556  }
557 
558  ftbw->off=0; /* in case of reinit */
559  if (_ft2_search(ftb, ftbw, 1))
560  return;
561  }
562  queue_fix(& ftb->queue);
563 }
564 
565 
566 FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, uchar *query,
567  uint query_len, const CHARSET_INFO *cs)
568 {
569  FTB *ftb;
570  FTB_EXPR *ftbe;
571  FTB_WORD *ftbw;
572 
573  if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
574  return 0;
575  ftb->please= (struct _ft_vft *) & _ft_vft_boolean;
576  ftb->state=UNINITIALIZED;
577  ftb->info=info;
578  ftb->keynr=keynr;
579  ftb->charset=cs;
580  DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
581  ftb->with_scan=0;
582  ftb->lastpos=HA_OFFSET_ERROR;
583  memset(&ftb->no_dupes, 0, sizeof(TREE));
584  ftb->last_word= 0;
585 
586  init_alloc_root(&ftb->mem_root, 1024, 1024);
587  ftb->queue.max_elements= 0;
588  if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
589  goto err;
590  ftbe->weight=1;
591  ftbe->flags=FTB_FLAG_YES;
592  ftbe->nos=1;
593  ftbe->up=0;
594  ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
595  ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
596  ftbe->phrase= NULL;
597  ftbe->document= 0;
598  ftb->root=ftbe;
599  if (unlikely(_ftb_parse_query(ftb, query, query_len,
600  keynr == NO_SUCH_KEY ? &ft_default_parser :
601  info->s->keyinfo[keynr].parser)))
602  goto err;
603  /*
604  Hack: instead of init_queue, we'll use reinit queue to be able
605  to alloc queue with alloc_root()
606  */
607  if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
608  (ftb->queue.max_elements + 1) *
609  sizeof(void *))))
610  goto err;
611  reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
612  (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0);
613  for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
614  queue_insert(&ftb->queue, (uchar *)ftbw);
615  ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
616  sizeof(FTB_WORD *)*ftb->queue.elements);
617  memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
618  my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
619  (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset);
620  if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
621  ftb->state=READY;
622  return ftb;
623 err:
624  free_root(& ftb->mem_root, MYF(0));
625  my_free(ftb);
626  return 0;
627 }
628 
629 
631 {
632  LIST *phrase;
633  LIST *document;
634  const CHARSET_INFO *cs;
635  uint phrase_length;
636  uint document_length;
637  uint match;
639 
640 
641 static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
642  char *word, int word_len,
643  MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
644 {
645  MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
646  FT_WORD *w= (FT_WORD *)phrase_param->document->data;
647  LIST *phrase, *document;
648  w->pos= (uchar*) word;
649  w->len= word_len;
650  phrase_param->document= phrase_param->document->prev;
651  if (phrase_param->phrase_length > phrase_param->document_length)
652  {
653  phrase_param->document_length++;
654  return 0;
655  }
656  /* TODO: rewrite phrase search to avoid
657  comparing the same word twice. */
658  for (phrase= phrase_param->phrase, document= phrase_param->document->next;
659  phrase; phrase= phrase->next, document= document->next)
660  {
661  FT_WORD *phrase_word= (FT_WORD *)phrase->data;
662  FT_WORD *document_word= (FT_WORD *)document->data;
663  if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
664  phrase_word->len,
665  (uchar*) document_word->pos, document_word->len))
666  return 0;
667  }
668  phrase_param->match++;
669  return 0;
670 }
671 
672 
673 static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
674  char *document, int len)
675 {
676  FT_WORD word;
677  MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
678  const uchar *docend= (uchar*) document + len;
679  while (ft_simple_get_word(phrase_param->cs, (uchar**) &document, docend,
680  &word, FALSE))
681  {
682  param->mysql_add_word(param, (char*) word.pos, word.len, 0);
683  if (phrase_param->match)
684  break;
685  }
686  return 0;
687 }
688 
689 
690 /*
691  Checks if given buffer matches phrase list.
692 
693  SYNOPSIS
694  _ftb_check_phrase()
695  s0 start of buffer
696  e0 end of buffer
697  phrase broken into list phrase
698  cs charset info
699 
700  RETURN VALUE
701  1 is returned if phrase found, 0 else.
702  -1 is returned if error occurs.
703 */
704 
705 static int _ftb_check_phrase(FTB *ftb, const uchar *document, uint len,
706  FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
707 {
708  MY_FTB_PHRASE_PARAM ftb_param;
709  MYSQL_FTPARSER_PARAM *param;
710  DBUG_ENTER("_ftb_check_phrase");
711  DBUG_ASSERT(parser);
712 
713  if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
714  DBUG_RETURN(0);
715 
716  ftb_param.phrase= ftbe->phrase;
717  ftb_param.document= ftbe->document;
718  ftb_param.cs= ftb->charset;
719  ftb_param.phrase_length= list_length(ftbe->phrase);
720  ftb_param.document_length= 1;
721  ftb_param.match= 0;
722 
723  param->mysql_parse= ftb_check_phrase_internal;
724  param->mysql_add_word= ftb_phrase_add_word;
725  param->mysql_ftparam= (void *)&ftb_param;
726  param->cs= ftb->charset;
727  param->doc= (char *) document;
728  param->length= len;
729  param->flags= 0;
730  param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
731  if (unlikely(parser->parse(param)))
732  return -1;
733  DBUG_RETURN(ftb_param.match ? 1 : 0);
734 }
735 
736 
737 static int _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
738 {
739  FT_SEG_ITERATOR ftsi;
740  FTB_EXPR *ftbe;
741  float weight=ftbw->weight;
742  int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0);
743  my_off_t curdoc=ftbw->docid[mode];
744  struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
745  &ft_default_parser :
746  ftb->info->s->keyinfo[ftb->keynr].parser;
747 
748  for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
749  {
750  ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
751  if (ftbe->docid[mode] != curdoc)
752  {
753  ftbe->cur_weight=0;
754  ftbe->yesses=ftbe->nos=0;
755  ftbe->docid[mode]=curdoc;
756  }
757  if (ftbe->nos)
758  break;
759  if (yn_flag & FTB_FLAG_YES)
760  {
761  weight /= ftbe->ythresh;
762  ftbe->cur_weight += weight;
763  if ((int) ++ftbe->yesses == ythresh)
764  {
765  yn_flag=ftbe->flags;
766  weight=ftbe->cur_weight*ftbe->weight;
767  if (mode && ftbe->phrase)
768  {
769  int found= 0;
770 
771  memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
772  while (_mi_ft_segiterator(&ftsi) && !found)
773  {
774  if (!ftsi.pos)
775  continue;
776  found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
777  if (unlikely(found < 0))
778  return 1;
779  }
780  if (!found)
781  break;
782  } /* ftbe->quot */
783  }
784  else
785  break;
786  }
787  else
788  if (yn_flag & FTB_FLAG_NO)
789  {
790  /*
791  NOTE: special sort function of queue assures that all
792  (yn_flag & FTB_FLAG_NO) != 0
793  events for every particular subexpression will
794  "auto-magically" happen BEFORE all the
795  (yn_flag & FTB_FLAG_YES) != 0 events. So no
796  already matched expression can become not-matched again.
797  */
798  ++ftbe->nos;
799  break;
800  }
801  else
802  {
803  if (ftbe->ythresh)
804  weight/=3;
805  ftbe->cur_weight += weight;
806  if ((int) ftbe->yesses < ythresh)
807  break;
808  if (!(yn_flag & FTB_FLAG_WONLY))
809  yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
810  weight*= ftbe->weight;
811  }
812  }
813  return 0;
814 }
815 
816 
817 int ft_boolean_read_next(FT_INFO *ftb, char *record)
818 {
819  FTB_EXPR *ftbe;
820  FTB_WORD *ftbw;
821  MI_INFO *info=ftb->info;
822  my_off_t curdoc;
823 
824  if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
825  return -1;
826 
827  /* black magic ON */
828  if ((int) _mi_check_index(info, ftb->keynr) < 0)
829  return my_errno;
830  if (_mi_readinfo(info, F_RDLCK, 1))
831  return my_errno;
832  /* black magic OFF */
833 
834  if (!ftb->queue.elements)
835  return my_errno=HA_ERR_END_OF_FILE;
836 
837  /* Attention!!! Address of a local variable is used here! See err: label */
838  ftb->queue.first_cmp_arg=(void *)&curdoc;
839 
840  while (ftb->state == INDEX_SEARCH &&
841  (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
842  HA_OFFSET_ERROR)
843  {
844  while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
845  {
846  if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
847  {
848  my_errno= HA_ERR_OUT_OF_MEM;
849  goto err;
850  }
851 
852  /* update queue */
853  _ft2_search(ftb, ftbw, 0);
854  queue_replaced(& ftb->queue);
855  }
856 
857  ftbe=ftb->root;
858  if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
859  ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
860  {
861  /* curdoc matched ! */
862  if (is_tree_inited(&ftb->no_dupes) &&
863  tree_insert(&ftb->no_dupes, &curdoc, 0,
864  ftb->no_dupes.custom_arg)->count >1)
865  /* but it managed already to get past this line once */
866  continue;
867 
868  info->lastpos=curdoc;
869  /* Clear all states, except that the table was updated */
870  info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
871 
872  if (!(*info->read_record)(info,curdoc, (uchar*) record))
873  {
874  info->update|= HA_STATE_AKTIV; /* Record is read */
875  if (ftb->with_scan &&
876  ft_boolean_find_relevance(ftb,(uchar*) record,0)==0)
877  continue; /* no match */
878  my_errno=0;
879  goto err;
880  }
881  goto err;
882  }
883  }
884  ftb->state=INDEX_DONE;
885  my_errno=HA_ERR_END_OF_FILE;
886 err:
887  ftb->queue.first_cmp_arg=(void *)0;
888  return my_errno;
889 }
890 
891 
892 typedef struct st_my_ftb_find_param
893 {
894  FT_INFO *ftb;
895  FT_SEG_ITERATOR *ftsi;
897 
898 
899 static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
900  char *word, int len,
901  MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
902 {
903  MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
904  FT_INFO *ftb= ftb_param->ftb;
905  FTB_WORD *ftbw;
906  int a, b, c;
907  /*
908  Find right-most element in the array of query words matching this
909  word from a document.
910  */
911  for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
912  {
913  ftbw= ftb->list[c];
914  if (ha_compare_text(ftb->charset, (uchar*)word, len,
915  (uchar*)ftbw->word+1, ftbw->len-1,
916  (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0)
917  b= c;
918  else
919  a= c;
920  }
921  /*
922  If there were no words with truncation operator, we iterate to the
923  beginning of an array until array element is equal to the word from
924  a document. This is done mainly because the same word may be
925  mentioned twice (or more) in the query.
926 
927  In case query has words with truncation operator we must iterate
928  to the beginning of the array. There may be non-matching query words
929  between matching word with truncation operator and the right-most
930  matching element. E.g., if we're looking for 'aaa15' in an array of
931  'aaa1* aaa14 aaa15 aaa16'.
932 
933  Worse of that there still may be match even if the binary search
934  above didn't find matching element. E.g., if we're looking for
935  'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
936  stop at 'aaa16'.
937  */
938  for (; c >= 0; c--)
939  {
940  ftbw= ftb->list[c];
941  if (ha_compare_text(ftb->charset, (uchar*)word, len,
942  (uchar*)ftbw->word + 1,ftbw->len - 1,
943  (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
944  {
945  if (ftb->with_scan & FTB_FLAG_TRUNC)
946  continue;
947  else
948  break;
949  }
950  if (ftbw->docid[1] == ftb->info->lastpos)
951  continue;
952  ftbw->docid[1]= ftb->info->lastpos;
953  if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
954  return 1;
955  }
956  return(0);
957 }
958 
959 
960 static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
961  char *doc, int len)
962 {
963  MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
964  FT_INFO *ftb= ftb_param->ftb;
965  uchar *end= (uchar*) doc + len;
966  FT_WORD w;
967  while (ft_simple_get_word(ftb->charset, (uchar**) &doc, end, &w, TRUE))
968  param->mysql_add_word(param, (char*) w.pos, w.len, 0);
969  return(0);
970 }
971 
972 
973 float ft_boolean_find_relevance(FT_INFO *ftb, uchar *record, uint length)
974 {
975  FTB_EXPR *ftbe;
976  FT_SEG_ITERATOR ftsi, ftsi2;
977  my_off_t docid=ftb->info->lastpos;
978  MY_FTB_FIND_PARAM ftb_param;
979  MYSQL_FTPARSER_PARAM *param;
980  struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
981  &ft_default_parser :
982  ftb->info->s->keyinfo[ftb->keynr].parser;
983 
984  if (docid == HA_OFFSET_ERROR)
985  return -2.0;
986  if (!ftb->queue.elements)
987  return 0;
988  if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
989  return 0;
990 
991  if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
992  {
993  FTB_EXPR *x;
994  uint i;
995 
996  for (i=0; i < ftb->queue.elements; i++)
997  {
998  ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
999  for (x=ftb->list[i]->up; x; x=x->up)
1000  x->docid[1]=HA_OFFSET_ERROR;
1001  }
1002  }
1003 
1004  ftb->lastpos=docid;
1005 
1006  if (ftb->keynr==NO_SUCH_KEY)
1007  _mi_ft_segiterator_dummy_init(record, length, &ftsi);
1008  else
1009  _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
1010  memcpy(&ftsi2, &ftsi, sizeof(ftsi));
1011 
1012  ftb_param.ftb= ftb;
1013  ftb_param.ftsi= &ftsi2;
1014  param->mysql_parse= ftb_find_relevance_parse;
1015  param->mysql_add_word= ftb_find_relevance_add_word;
1016  param->mysql_ftparam= (void *)&ftb_param;
1017  param->flags= 0;
1018  param->cs= ftb->charset;
1019  param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1020  while (_mi_ft_segiterator(&ftsi))
1021  {
1022  if (!ftsi.pos)
1023  continue;
1024  param->doc= (char *)ftsi.pos;
1025  param->length= ftsi.len;
1026  if (unlikely(parser->parse(param)))
1027  return 0;
1028  }
1029  ftbe=ftb->root;
1030  if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
1031  ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1032  { /* row matched ! */
1033  return ftbe->cur_weight;
1034  }
1035  else
1036  { /* match failed ! */
1037  return 0.0;
1038  }
1039 }
1040 
1041 
1042 void ft_boolean_close_search(FT_INFO *ftb)
1043 {
1044  if (is_tree_inited(& ftb->no_dupes))
1045  {
1046  delete_tree(& ftb->no_dupes);
1047  }
1048  free_root(& ftb->mem_root, MYF(0));
1049  my_free(ftb);
1050 }
1051 
1052 
1053 float ft_boolean_get_relevance(FT_INFO *ftb)
1054 {
1055  return ftb->root->cur_weight;
1056 }
1057 
1058 
1059 void ft_boolean_reinit_search(FT_INFO *ftb)
1060 {
1061  _ftb_init_index_search(ftb);
1062 }
1063