MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
myisam_ftdump.c
1 /* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* Written by Sergei A. Golubchik, who has a shared copyright to this code
17  added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
18 
19 #include "ftdefs.h"
20 #include <my_getopt.h>
21 
22 static void usage();
23 static void complain(int val);
24 static my_bool get_one_option(int, const struct my_option *, char *);
25 
26 static int count=0, stats=0, dump=0, lstats=0;
27 static my_bool verbose;
28 static char *query=NULL;
29 static uint lengths[256];
30 
31 #define MAX_LEN (HA_FT_MAXBYTELEN+10)
32 #define HOW_OFTEN_TO_WRITE 10000
33 
34 static struct my_option my_long_options[] =
35 {
36  {"help", 'h', "Display help and exit.",
37  0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
38  {"help", '?', "Synonym for -h.",
39  0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
40  {"count", 'c', "Calculate per-word stats (counts and global weights).",
41  0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
42  {"dump", 'd', "Dump index (incl. data offsets and word weights).",
43  0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
44  {"length", 'l', "Report length distribution.",
45  0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
46  {"stats", 's', "Report global stats.",
47  0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
48  {"verbose", 'v', "Be verbose.",
49  &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
50  { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
51 };
52 
53 
54 int main(int argc,char *argv[])
55 {
56  int error=0, subkeys;
57  uint keylen, keylen2=0, inx, doc_cnt=0;
58  float weight= 1.0;
59  double gws, min_gws=0, avg_gws=0;
60  MI_INFO *info;
61  char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
62  ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
63  struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
64 
65  MY_INIT(argv[0]);
66  if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
67  exit(error);
68  if (count || dump)
69  verbose=0;
70  if (!count && !dump && !lstats && !query)
71  stats=1;
72 
73  if (verbose)
74  setbuf(stdout,NULL);
75 
76  if (argc < 2)
77  usage();
78 
79  {
80  char *end;
81  inx= (uint) strtoll(argv[1], &end, 10);
82  if (*end)
83  usage();
84  }
85 
86  init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0);
87 
88  if (!(info=mi_open(argv[0], O_RDONLY,
89  HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
90  {
91  error=my_errno;
92  goto err;
93  }
94 
95  *buf2=0;
96  aio->info=info;
97 
98  if ((inx >= info->s->base.keys) ||
99  !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
100  {
101  printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
102  goto err;
103  }
104 
105  mi_lock_database(info, F_EXTRA_LCK);
106 
107  info->lastpos= HA_OFFSET_ERROR;
108  info->update|= HA_STATE_PREV_FOUND;
109 
110  while (!(error=mi_rnext(info,NULL,inx)))
111  {
112  keylen=*(info->lastkey);
113 
114  subkeys=ft_sintXkorr(info->lastkey+keylen+1);
115  if (subkeys >= 0)
116  ft_floatXget(weight, info->lastkey+keylen+1);
117 
118 #ifdef HAVE_SNPRINTF
119  snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
120 #else
121  sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
122 #endif
123  my_casedn_str(default_charset_info,buf);
124  total++;
125  lengths[keylen]++;
126 
127  if (count || stats)
128  {
129  if (strcmp(buf, buf2))
130  {
131  if (*buf2)
132  {
133  uniq++;
134  avg_gws+=gws=GWS_IN_USE;
135  if (count)
136  printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
137  if (maxlen<keylen2)
138  {
139  maxlen=keylen2;
140  strmov(buf_maxlen, buf2);
141  }
142  if (max_doc_cnt < doc_cnt)
143  {
144  max_doc_cnt=doc_cnt;
145  strmov(buf_min_gws, buf2);
146  min_gws=gws;
147  }
148  }
149  strmov(buf2, buf);
150  keylen2=keylen;
151  doc_cnt=0;
152  }
153  doc_cnt+= (subkeys >= 0 ? 1 : -subkeys);
154  }
155  if (dump)
156  {
157  if (subkeys>=0)
158  printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
159  else
160  printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf);
161  }
162  if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
163  printf("%10ld\r",total);
164  }
165  mi_lock_database(info, F_UNLCK);
166 
167  if (count || stats)
168  {
169  if (*buf2)
170  {
171  uniq++;
172  avg_gws+=gws=GWS_IN_USE;
173  if (count)
174  printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
175  if (maxlen<keylen2)
176  {
177  maxlen=keylen2;
178  strmov(buf_maxlen, buf2);
179  }
180  if (max_doc_cnt < doc_cnt)
181  {
182  max_doc_cnt=doc_cnt;
183  strmov(buf_min_gws, buf2);
184  min_gws=gws;
185  }
186  }
187  }
188 
189  if (stats)
190  {
191  count=0;
192  for (inx=0;inx<256;inx++)
193  {
194  count+=lengths[inx];
195  if ((ulong) count >= total/2)
196  break;
197  }
198  printf("Total rows: %lu\nTotal words: %lu\n"
199  "Unique words: %lu\nLongest word: %lu chars (%s)\n"
200  "Median length: %u\n"
201  "Average global weight: %f\n"
202  "Most common word: %lu times, weight: %f (%s)\n",
203  (long) info->state->records, total, uniq, maxlen, buf_maxlen,
204  inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
205  }
206  if (lstats)
207  {
208  count=0;
209  for (inx=0; inx<256; inx++)
210  {
211  count+=lengths[inx];
212  if (count && lengths[inx])
213  printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
214  (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
215  100.0*count/total);
216  }
217  }
218 
219 err:
220  if (error && error != HA_ERR_END_OF_FILE)
221  printf("got error %d\n",my_errno);
222  if (info)
223  mi_close(info);
224  return 0;
225 }
226 
227 
228 static my_bool
229 get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
230  char *argument __attribute__((unused)))
231 {
232  switch(optid) {
233  case 'd':
234  dump=1;
235  complain(count || query);
236  break;
237  case 's':
238  stats=1;
239  complain(query!=0);
240  break;
241  case 'c':
242  count= 1;
243  complain(dump || query);
244  break;
245  case 'l':
246  lstats=1;
247  complain(query!=0);
248  break;
249  case '?':
250  case 'h':
251  usage();
252  }
253  return 0;
254 }
255 
256 
257 static void usage()
258 {
259  printf("Use: myisam_ftdump <table_name> <index_num>\n");
260  my_print_help(my_long_options);
261  my_print_variables(my_long_options);
262  exit(1);
263 }
264 
265 
266 static void complain(int val) /* Kinda assert :-) */
267 {
268  if (val)
269  {
270  printf("You cannot use these options together!\n");
271  exit(1);
272  }
273 }
274 
275 #include "mi_extrafunc.h"