MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
plugin_ftparser.h
1 /* Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 #ifndef _my_plugin_ftparser_h
17 #define _my_plugin_ftparser_h
18 #include "plugin.h"
19 
20 /*************************************************************************
21  API for Full-text parser plugin. (MYSQL_FTPARSER_PLUGIN)
22 */
23 
24 #define MYSQL_FTPARSER_INTERFACE_VERSION 0x0100
25 
26 /* Parsing modes. Set in MYSQL_FTPARSER_PARAM::mode */
27 enum enum_ftparser_mode
28 {
29 /*
30  Fast and simple mode. This mode is used for indexing, and natural
31  language queries.
32 
33  The parser is expected to return only those words that go into the
34  index. Stopwords or too short/long words should not be returned. The
35  'boolean_info' argument of mysql_add_word() does not have to be set.
36 */
37  MYSQL_FTPARSER_SIMPLE_MODE= 0,
38 
39 /*
40  Parse with stopwords mode. This mode is used in boolean searches for
41  "phrase matching."
42 
43  The parser is not allowed to ignore words in this mode. Every word
44  should be returned, including stopwords and words that are too short
45  or long. The 'boolean_info' argument of mysql_add_word() does not
46  have to be set.
47 */
48  MYSQL_FTPARSER_WITH_STOPWORDS= 1,
49 
50 /*
51  Parse in boolean mode. This mode is used to parse a boolean query string.
52 
53  The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
54  structure in the 'boolean_info' argument to mysql_add_word().
55  Usually that means that the parser should recognize boolean operators
56  in the parsing stream and set appropriate fields in
57  MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As for
58  MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
59  Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
60 */
61  MYSQL_FTPARSER_FULL_BOOLEAN_INFO= 2
62 };
63 
64 /*
65  Token types for boolean mode searching (used for the type member of
66  MYSQL_FTPARSER_BOOLEAN_INFO struct)
67 
68  FT_TOKEN_EOF: End of data.
69  FT_TOKEN_WORD: Regular word.
70  FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
71  FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
72  FT_TOKEN_STOPWORD: Stopword.
73 */
74 
75 enum enum_ft_token_type
76 {
77  FT_TOKEN_EOF= 0,
78  FT_TOKEN_WORD= 1,
79  FT_TOKEN_LEFT_PAREN= 2,
80  FT_TOKEN_RIGHT_PAREN= 3,
81  FT_TOKEN_STOPWORD= 4
82 };
83 
84 /*
85  This structure is used in boolean search mode only. It conveys
86  boolean-mode metadata to the MySQL search engine for every word in
87  the search query. A valid instance of this structure must be filled
88  in by the plugin parser and passed as an argument in the call to
89  mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
90  structure) when a query is parsed in boolean mode.
91 
92  type: The token type. Should be one of the enum_ft_token_type values.
93 
94  yesno: Whether the word must be present for a match to occur:
95  >0 Must be present
96  <0 Must not be present
97  0 Neither; the word is optional but its presence increases the relevance
98  With the default settings of the ft_boolean_syntax system variable,
99  >0 corresponds to the '+' operator, <0 corrresponds to the '-' operator,
100  and 0 means neither operator was used.
101 
102  weight_adjust: A weighting factor that determines how much a match
103  for the word counts. Positive values increase, negative - decrease the
104  relative word's importance in the query.
105 
106  wasign: The sign of the word's weight in the query. If it's non-negative
107  the match for the word will increase document relevance, if it's
108  negative - decrease (the word becomes a "noise word", the less of it the
109  better).
110 
111  trunc: Corresponds to the '*' operator in the default setting of the
112  ft_boolean_syntax system variable.
113 */
114 
116 {
117  enum enum_ft_token_type type;
118  int yesno;
119  int weight_adjust;
120  char wasign;
121  char trunc;
122  /* These are parser state and must be removed. */
123  char prev;
124  char *quot;
126 
127 /*
128  The following flag means that buffer with a string (document, word)
129  may be overwritten by the caller before the end of the parsing (that is
130  before st_mysql_ftparser::deinit() call). If one needs the string
131  to survive between two successive calls of the parsing function, she
132  needs to save a copy of it. The flag may be set by MySQL before calling
133  st_mysql_ftparser::parse(), or it may be set by a plugin before calling
134  st_mysql_ftparser_param::mysql_parse() or
135  st_mysql_ftparser_param::mysql_add_word().
136 */
137 #define MYSQL_FTFLAGS_NEED_COPY 1
138 
139 /*
140  An argument of the full-text parser plugin. This structure is
141  filled in by MySQL server and passed to the parsing function of the
142  plugin as an in/out parameter.
143 
144  mysql_parse: A pointer to the built-in parser implementation of the
145  server. It's set by the server and can be used by the parser plugin
146  to invoke the MySQL default parser. If plugin's role is to extract
147  textual data from .doc, .pdf or .xml content, it might extract
148  plaintext from the content, and then pass the text to the default
149  MySQL parser to be parsed.
150 
151  mysql_add_word: A server callback to add a new word. When parsing
152  a document, the server sets this to point at a function that adds
153  the word to MySQL full-text index. When parsing a search query,
154  this function will add the new word to the list of words to search
155  for. The boolean_info argument can be NULL for all cases except
156  when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO.
157 
158  ftparser_state: A generic pointer. The plugin can set it to point
159  to information to be used internally for its own purposes.
160 
161  mysql_ftparam: This is set by the server. It is used by MySQL functions
162  called via mysql_parse() and mysql_add_word() callback. The plugin
163  should not modify it.
164 
165  cs: Information about the character set of the document or query string.
166 
167  doc: A pointer to the document or query string to be parsed.
168 
169  length: Length of the document or query string, in bytes.
170 
171  flags: See MYSQL_FTFLAGS_* constants above.
172 
173  mode: The parsing mode. With boolean operators, with stopwords, or
174  nothing. See enum_ftparser_mode above.
175 */
176 
178 {
179  int (*mysql_parse)(struct st_mysql_ftparser_param *,
180  char *doc, int doc_len);
181  int (*mysql_add_word)(struct st_mysql_ftparser_param *,
182  char *word, int word_len,
183  MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
184  void *ftparser_state;
185  void *mysql_ftparam;
186  const struct charset_info_st *cs;
187  char *doc;
188  int length;
189  int flags;
190  enum enum_ftparser_mode mode;
192 
193 /*
194  Full-text parser descriptor.
195 
196  interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
197  The parsing, initialization, and deinitialization functions are
198  invoked per SQL statement for which the parser is used.
199 */
200 
202 {
203  int interface_version;
204  int (*parse)(MYSQL_FTPARSER_PARAM *param);
205  int (*init)(MYSQL_FTPARSER_PARAM *param);
206  int (*deinit)(MYSQL_FTPARSER_PARAM *param);
207 };
208 
209 
210 #endif
211