MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ftdefs.h
1 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17 
18 /* some definitions for full-text indices */
19 
20 #include "fulltext.h"
21 #include <m_ctype.h>
22 #include <my_tree.h>
23 #include <queues.h>
24 #include <mysql/plugin.h>
25 
26 #define true_word_char(ctype, character) \
27  ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \
28  (character) == '_')
29 #define misc_word_char(X) 0
30 
31 #define FT_MAX_WORD_LEN_FOR_SORT 31
32 
33 #define FTPARSER_MEMROOT_ALLOC_SIZE 65536
34 
35 #define COMPILE_STOPWORDS_IN
36 
37 /* Interested readers may consult SMART
38  (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z)
39  for an excellent implementation of vector space model we use.
40  It also demonstrate the usage of different weghting techniques.
41  This code, though, is completely original and is not based on the
42  SMART code but was in some cases inspired by it.
43 
44  NORM_PIVOT was taken from the article
45  A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization",
46  ACM SIGIR'96, 21-29, 1996
47  */
48 
49 #define LWS_FOR_QUERY LWS_TF
50 #define LWS_IN_USE LWS_LOG
51 #define PRENORM_IN_USE PRENORM_AVG
52 #define NORM_IN_USE NORM_PIVOT
53 #define GWS_IN_USE GWS_PROB
54 /*==============================================================*/
55 #define LWS_TF (count)
56 #define LWS_BINARY (count>0)
57 #define LWS_SQUARE (count*count)
58 #define LWS_LOG (count?(log( (double) count)+1):0)
59 /*--------------------------------------------------------------*/
60 #define PRENORM_NONE (p->weight)
61 #define PRENORM_MAX (p->weight/docstat.max)
62 #define PRENORM_AUG (0.4+0.6*p->weight/docstat.max)
63 #define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq)
64 #define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq)))
65 /*--------------------------------------------------------------*/
66 #define NORM_NONE (1)
67 #define NORM_SUM (docstat.nsum)
68 #define NORM_COS (sqrt(docstat.nsum2))
69 
70 #define PIVOT_VAL (0.0115)
71 #define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq)
72 /*---------------------------------------------------------------*/
73 #define GWS_NORM (1/sqrt(sum2))
74 #define GWS_GFIDF (sum/doc_cnt)
75 /* Mysterious, but w/o (double) GWS_IDF performs better :-o */
76 #define GWS_IDF log(aio->info->state->records/doc_cnt)
77 #define GWS_IDF1 log((double)aio->info->state->records/doc_cnt)
78 #define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 )
79 #define GWS_FREQ (1.0/doc_cnt)
80 #define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2)
81 #define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3)
82 #define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records))
83 /*=================================================================*/
84 
85 /* Boolean search operators */
86 #define FTB_YES (ft_boolean_syntax[0])
87 #define FTB_EGAL (ft_boolean_syntax[1])
88 #define FTB_NO (ft_boolean_syntax[2])
89 #define FTB_INC (ft_boolean_syntax[3])
90 #define FTB_DEC (ft_boolean_syntax[4])
91 #define FTB_LBR (ft_boolean_syntax[5])
92 #define FTB_RBR (ft_boolean_syntax[6])
93 #define FTB_NEG (ft_boolean_syntax[7])
94 #define FTB_TRUNC (ft_boolean_syntax[8])
95 #define FTB_LQUOT (ft_boolean_syntax[10])
96 #define FTB_RQUOT (ft_boolean_syntax[11])
97 
98 typedef struct st_ft_word {
99  uchar * pos;
100  uint len;
101  double weight;
102 } FT_WORD;
103 
104 int is_stopword(char *word, uint len);
105 
106 uint _ft_make_key(MI_INFO *, uint , uchar *, FT_WORD *, my_off_t);
107 
108 uchar ft_get_word(const CHARSET_INFO *, uchar **, uchar *, FT_WORD *,
110 uchar ft_simple_get_word(const CHARSET_INFO *, uchar **, const uchar *,
111  FT_WORD *, my_bool);
112 
113 typedef struct _st_ft_seg_iterator {
114  uint num, len;
115  HA_KEYSEG *seg;
116  const uchar *rec, *pos;
118 
119 void _mi_ft_segiterator_init(MI_INFO *, uint, const uchar *, FT_SEG_ITERATOR *);
120 void _mi_ft_segiterator_dummy_init(const uchar *, uint, FT_SEG_ITERATOR *);
121 uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
122 
123 void ft_parse_init(TREE *, const CHARSET_INFO *);
124 int ft_parse(TREE *, uchar *, int, struct st_mysql_ftparser *parser,
126 FT_WORD * ft_linearize(TREE *, MEM_ROOT *);
127 FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, const uchar *, MEM_ROOT *);
128 uint _mi_ft_parse(TREE *, MI_INFO *, uint, const uchar *,
130 
131 FT_INFO *ft_init_nlq_search(MI_INFO *, uint, uchar *, uint, uint, uchar *);
132 FT_INFO *ft_init_boolean_search(MI_INFO *, uint, uchar *, uint,
133  const CHARSET_INFO *);
134 
135 extern const struct _ft_vft _ft_vft_nlq;
136 int ft_nlq_read_next(FT_INFO *, char *);
137 float ft_nlq_find_relevance(FT_INFO *, uchar *, uint);
138 void ft_nlq_close_search(FT_INFO *);
139 float ft_nlq_get_relevance(FT_INFO *);
140 my_off_t ft_nlq_get_docid(FT_INFO *);
141 void ft_nlq_reinit_search(FT_INFO *);
142 
143 extern const struct _ft_vft _ft_vft_boolean;
144 int ft_boolean_read_next(FT_INFO *, char *);
145 float ft_boolean_find_relevance(FT_INFO *, uchar *, uint);
146 void ft_boolean_close_search(FT_INFO *);
147 float ft_boolean_get_relevance(FT_INFO *);
148 my_off_t ft_boolean_get_docid(FT_INFO *);
149 void ft_boolean_reinit_search(FT_INFO *);
150 MYSQL_FTPARSER_PARAM* ftparser_alloc_param(MI_INFO *info);
151 extern MYSQL_FTPARSER_PARAM *ftparser_call_initializer(MI_INFO *info,
152  uint keynr,
153  uint paramnr);
154 extern void ftparser_call_deinitializer(MI_INFO *info);