MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ft_nlq_search.c
1 /* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17 
18 #define FT_CORE
19 #include "ftdefs.h"
20 
21 /* search with natural language queries */
22 
23 typedef struct ft_doc_rec
24 {
25  my_off_t dpos;
26  double weight;
27 } FT_DOC;
28 
29 struct st_ft_info
30 {
31  struct _ft_vft *please;
32  MI_INFO *info;
33  int ndocs;
34  int curdoc;
35  FT_DOC doc[1];
36 };
37 
38 typedef struct st_all_in_one
39 {
40  MI_INFO *info;
41  uint keynr;
42  const CHARSET_INFO *charset;
43  uchar *keybuff;
44  TREE dtree;
45 } ALL_IN_ONE;
46 
47 typedef struct st_ft_superdoc
48 {
49  FT_DOC doc;
50  FT_WORD *word_ptr;
51  double tmp_weight;
52 } FT_SUPERDOC;
53 
54 static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)),
55  FT_SUPERDOC *p1, FT_SUPERDOC *p2)
56 {
57  if (p1->doc.dpos < p2->doc.dpos)
58  return -1;
59  if (p1->doc.dpos == p2->doc.dpos)
60  return 0;
61  return 1;
62 }
63 
64 static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
65 {
66  int UNINIT_VAR(subkeys), r;
67  uint keylen, doc_cnt;
68  FT_SUPERDOC sdoc, *sptr;
69  TREE_ELEMENT *selem;
70  double gweight=1;
71  MI_INFO *info=aio->info;
72  MYISAM_SHARE *share= info->s;
73  uchar *keybuff=aio->keybuff;
74  MI_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr;
75  my_off_t key_root;
76  uint extra= HA_FT_WLEN + info->s->rec_reflength;
77 #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
78  float tmp_weight;
79 #else
80 #error
81 #endif
82 
83  DBUG_ENTER("walk_and_match");
84 
85  word->weight=LWS_FOR_QUERY;
86 
87  keylen=_ft_make_key(info,aio->keynr,keybuff,word,0);
88  keylen-=HA_FT_WLEN;
89  doc_cnt=0;
90 
91  if (share->concurrent_insert)
92  mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
93 
94  key_root= share->state.key_root[aio->keynr];
95 
96  /* Skip rows inserted by current inserted */
97  for (r=_mi_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ;
98  !r &&
99  (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
100  info->lastpos >= info->state->data_file_length ;
101  r= _mi_search_next(info, keyinfo, info->lastkey,
102  info->lastkey_length, SEARCH_BIGGER, key_root))
103  ;
104 
105  if (share->concurrent_insert)
106  mysql_rwlock_unlock(&share->key_root_lock[aio->keynr]);
107 
108  info->update|= HA_STATE_AKTIV; /* for _mi_test_if_changed() */
109 
110  /* The following should be safe, even if we compare doubles */
111  while (!r && gweight)
112  {
113 
114  if (keylen &&
115  ha_compare_text(aio->charset,info->lastkey+1,
116  info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
117  break;
118 
119  if (subkeys<0)
120  {
121  if (doc_cnt)
122  DBUG_RETURN(1); /* index is corrupted */
123  /*
124  TODO here: unsafe optimization, should this word
125  be skipped (based on subkeys) ?
126  */
127  keybuff+=keylen;
128  keyinfo=& info->s->ft2_keyinfo;
129  key_root=info->lastpos;
130  keylen=0;
131  if (share->concurrent_insert)
132  mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
133  r=_mi_search_first(info, keyinfo, key_root);
134  goto do_skip;
135  }
136 #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
137  ft_floatXget(tmp_weight, info->lastkey+info->lastkey_length-extra);
138 #else
139 #error
140 #endif
141  /* The following should be safe, even if we compare doubles */
142  if (tmp_weight==0)
143  DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */
144 
145  sdoc.doc.dpos=info->lastpos;
146 
147  /* saving document matched into dtree */
148  if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg)))
149  DBUG_RETURN(1);
150 
151  sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem);
152 
153  if (selem->count==1) /* document's first match */
154  sptr->doc.weight=0;
155  else
156  sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight;
157 
158  sptr->word_ptr=word;
159  sptr->tmp_weight=tmp_weight;
160 
161  doc_cnt++;
162 
163  gweight=word->weight*GWS_IN_USE;
164  if (gweight < 0 || doc_cnt > 2000000)
165  gweight=0;
166 
167  if (share->concurrent_insert)
168  mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
169 
170  if (_mi_test_if_changed(info) == 0)
171  r=_mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
172  SEARCH_BIGGER, key_root);
173  else
174  r=_mi_search(info, keyinfo, info->lastkey, info->lastkey_length,
175  SEARCH_BIGGER, key_root);
176 do_skip:
177  while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
178  !r && info->lastpos >= info->state->data_file_length)
179  r= _mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
180  SEARCH_BIGGER, key_root);
181 
182  if (share->concurrent_insert)
183  mysql_rwlock_unlock(&share->key_root_lock[aio->keynr]);
184  }
185  word->weight=gweight;
186 
187  DBUG_RETURN(0);
188 }
189 
190 
191 static int walk_and_copy(FT_SUPERDOC *from,
192  uint32 count __attribute__((unused)), FT_DOC **to)
193 {
194  DBUG_ENTER("walk_and_copy");
195  from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
196  (*to)->dpos=from->doc.dpos;
197  (*to)->weight=from->doc.weight;
198  (*to)++;
199  DBUG_RETURN(0);
200 }
201 
202 static int walk_and_push(FT_SUPERDOC *from,
203  uint32 count __attribute__((unused)), QUEUE *best)
204 {
205  DBUG_ENTER("walk_and_copy");
206  from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
207  set_if_smaller(best->elements, ft_query_expansion_limit-1);
208  queue_insert(best, (uchar *)& from->doc);
209  DBUG_RETURN(0);
210 }
211 
212 
213 static int FT_DOC_cmp(void *unused __attribute__((unused)),
214  FT_DOC *a, FT_DOC *b)
215 {
216  double c= b->weight - a->weight;
217  return ((c < 0) ? -1 : (c > 0) ? 1 : 0);
218 }
219 
220 
221 FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, uchar *query,
222  uint query_len, uint flags, uchar *record)
223 {
224  TREE wtree;
225  ALL_IN_ONE aio;
226  FT_DOC *dptr;
227  FT_INFO *dlist=NULL;
228  my_off_t saved_lastpos=info->lastpos;
229  struct st_mysql_ftparser *parser;
230  MYSQL_FTPARSER_PARAM *ftparser_param;
231  DBUG_ENTER("ft_init_nlq_search");
232 
233 /* black magic ON */
234  if ((int) (keynr = _mi_check_index(info,keynr)) < 0)
235  DBUG_RETURN(NULL);
236  if (_mi_readinfo(info,F_RDLCK,1))
237  DBUG_RETURN(NULL);
238 /* black magic OFF */
239 
240  aio.info=info;
241  aio.keynr=keynr;
242  aio.charset=info->s->keyinfo[keynr].seg->charset;
243  aio.keybuff=info->lastkey+info->s->base.max_key_length;
244  parser= info->s->keyinfo[keynr].parser;
245  if (! (ftparser_param= ftparser_call_initializer(info, keynr, 0)))
246  goto err;
247 
248  memset(&wtree, 0, sizeof(wtree));
249 
250  init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
251  NULL, NULL);
252 
253  ft_parse_init(&wtree, aio.charset);
254  ftparser_param->flags= 0;
255  if (ft_parse(&wtree, query, query_len, parser, ftparser_param,
256  &wtree.mem_root))
257  goto err;
258 
259  if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
260  left_root_right))
261  goto err;
262 
263  if (flags & FT_EXPAND && ft_query_expansion_limit)
264  {
265  QUEUE best;
266  init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
267  0);
268  tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
269  &best, left_root_right);
270  while (best.elements)
271  {
272  my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
273  if (!(*info->read_record)(info,docid,record))
274  {
275  info->update|= HA_STATE_AKTIV;
276  ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY;
277  if (unlikely(_mi_ft_parse(&wtree, info, keynr, record, ftparser_param,
278  &wtree.mem_root)))
279  {
280  delete_queue(&best);
281  goto err;
282  }
283  }
284  }
285  delete_queue(&best);
286  reset_tree(&aio.dtree);
287  if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
288  left_root_right))
289  goto err;
290 
291  }
292 
293  /*
294  If ndocs == 0, this will not allocate RAM for FT_INFO.doc[],
295  so if ndocs == 0, FT_INFO.doc[] must not be accessed.
296  */
297  dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
298  sizeof(FT_DOC)*
299  (int)(aio.dtree.elements_in_tree-1),
300  MYF(0));
301  if (!dlist)
302  goto err;
303 
304  dlist->please= (struct _ft_vft *) & _ft_vft_nlq;
305  dlist->ndocs=aio.dtree.elements_in_tree;
306  dlist->curdoc=-1;
307  dlist->info=aio.info;
308  dptr=dlist->doc;
309 
310  tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy,
311  &dptr, left_root_right);
312 
313  if (flags & FT_SORTED)
314  my_qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp,
315  0);
316 
317 err:
318  delete_tree(&aio.dtree);
319  delete_tree(&wtree);
320  info->lastpos=saved_lastpos;
321  DBUG_RETURN(dlist);
322 }
323 
324 
325 int ft_nlq_read_next(FT_INFO *handler, char *record)
326 {
327  MI_INFO *info= (MI_INFO *) handler->info;
328 
329  if (++handler->curdoc >= handler->ndocs)
330  {
331  --handler->curdoc;
332  return HA_ERR_END_OF_FILE;
333  }
334 
335  info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
336 
337  info->lastpos=handler->doc[handler->curdoc].dpos;
338  if (!(*info->read_record)(info,info->lastpos,(uchar*) record))
339  {
340  info->update|= HA_STATE_AKTIV; /* Record is read */
341  return 0;
342  }
343  return my_errno;
344 }
345 
346 
347 float ft_nlq_find_relevance(FT_INFO *handler,
348  uchar *record __attribute__((unused)),
349  uint length __attribute__((unused)))
350 {
351  int a,b,c;
352  FT_DOC *docs=handler->doc;
353  my_off_t docid=handler->info->lastpos;
354 
355  if (docid == HA_POS_ERROR)
356  return -5.0;
357 
358  /* Assuming docs[] is sorted by dpos... */
359 
360  for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
361  {
362  if (docs[c].dpos > docid)
363  b=c;
364  else
365  a=c;
366  }
367  /* bounds check to avoid accessing unallocated handler->doc */
368  if (a < handler->ndocs && docs[a].dpos == docid)
369  return (float) docs[a].weight;
370  else
371  return 0.0;
372 }
373 
374 
375 void ft_nlq_close_search(FT_INFO *handler)
376 {
377  my_free(handler);
378 }
379 
380 
381 float ft_nlq_get_relevance(FT_INFO *handler)
382 {
383  return (float) handler->doc[handler->curdoc].weight;
384 }
385 
386 
387 void ft_nlq_reinit_search(FT_INFO *handler)
388 {
389  handler->curdoc=-1;
390 }
391