MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
handler.cc
Go to the documentation of this file.
1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software Foundation,
14  Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
15 
22 #include "binlog.h"
23 #include "sql_priv.h"
24 #include "unireg.h"
25 #include "rpl_handler.h"
26 #include "sql_cache.h" // query_cache, query_cache_*
27 #include "key.h" // key_copy, key_unpack, key_cmp_if_same, key_cmp
28 #include "sql_table.h" // build_table_filename
29 #include "sql_parse.h" // check_stack_overrun
30 #include "sql_acl.h" // SUPER_ACL
31 #include "sql_base.h" // free_io_cache
32 #include "discover.h" // writefrm
33 #include "log_event.h" // *_rows_log_event
34 #include "rpl_filter.h"
35 #include <myisampack.h>
36 #include "transaction.h"
37 #include <errno.h>
38 #include "probes_mysql.h"
39 #include <mysql/psi/mysql_table.h>
40 #include "debug_sync.h" // DEBUG_SYNC
41 #include <my_bit.h>
42 #include <list>
43 
44 #ifdef WITH_PARTITION_STORAGE_ENGINE
45 #include "ha_partition.h"
46 #endif
47 
48 using std::min;
49 using std::max;
50 using std::list;
51 
52 // This is a temporary backporting fix.
53 #ifndef HAVE_LOG2
54 /*
55  This will be slightly slower and perhaps a tiny bit less accurate than
56  doing it the IEEE754 way but log2() should be available on C99 systems.
57 */
58 inline double log2(double x)
59 {
60  return (log(x) / M_LN2);
61 }
62 #endif
63 
64 /*
65  While we have legacy_db_type, we have this array to
66  check for dups and to find handlerton from legacy_db_type.
67  Remove when legacy_db_type is finally gone
68 */
69 st_plugin_int *hton2plugin[MAX_HA];
70 
71 static handlerton *installed_htons[128];
72 
73 #define BITMAP_STACKBUF_SIZE (128/8)
74 
75 KEY_CREATE_INFO default_key_create_info=
76  { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
77 
78 /* number of entries in handlertons[] */
79 ulong total_ha= 0;
80 /* number of storage engines (from handlertons[]) that support 2pc */
81 ulong total_ha_2pc= 0;
82 /* size of savepoint storage area (see ha_init) */
83 ulong savepoint_alloc_size= 0;
84 
85 static const LEX_STRING sys_table_aliases[]=
86 {
87  { C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") },
88  { C_STRING_WITH_LEN("NDB") }, { C_STRING_WITH_LEN("NDBCLUSTER") },
89  { C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") },
90  { C_STRING_WITH_LEN("MERGE") }, { C_STRING_WITH_LEN("MRG_MYISAM") },
91  {NullS, 0}
92 };
93 
94 const char *ha_row_type[] = {
95  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
96  /* Reserved to be "PAGE" in future versions */ "?",
97  "?","?","?"
98 };
99 
100 const char *tx_isolation_names[] =
101 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
102  NullS};
103 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
104  tx_isolation_names, NULL};
105 
106 #ifndef DBUG_OFF
107 
108 const char *ha_legacy_type_name(legacy_db_type legacy_type)
109 {
110  switch (legacy_type)
111  {
112  case DB_TYPE_UNKNOWN:
113  return "DB_TYPE_UNKNOWN";
114  case DB_TYPE_DIAB_ISAM:
115  return "DB_TYPE_DIAB_ISAM";
116  case DB_TYPE_HASH:
117  return "DB_TYPE_HASH";
118  case DB_TYPE_MISAM:
119  return "DB_TYPE_MISAM";
120  case DB_TYPE_PISAM:
121  return "DB_TYPE_PISAM";
122  case DB_TYPE_RMS_ISAM:
123  return "DB_TYPE_RMS_ISAM";
124  case DB_TYPE_HEAP:
125  return "DB_TYPE_HEAP";
126  case DB_TYPE_ISAM:
127  return "DB_TYPE_ISAM";
128  case DB_TYPE_MRG_ISAM:
129  return "DB_TYPE_MRG_ISAM";
130  case DB_TYPE_MYISAM:
131  return "DB_TYPE_MYISAM";
132  case DB_TYPE_MRG_MYISAM:
133  return "DB_TYPE_MRG_MYISAM";
134  case DB_TYPE_BERKELEY_DB:
135  return "DB_TYPE_BERKELEY_DB";
136  case DB_TYPE_INNODB:
137  return "DB_TYPE_INNODB";
138  case DB_TYPE_GEMINI:
139  return "DB_TYPE_GEMINI";
140  case DB_TYPE_NDBCLUSTER:
141  return "DB_TYPE_NDBCLUSTER";
142  case DB_TYPE_EXAMPLE_DB:
143  return "DB_TYPE_EXAMPLE_DB";
144  case DB_TYPE_ARCHIVE_DB:
145  return "DB_TYPE_ARCHIVE_DB";
146  case DB_TYPE_CSV_DB:
147  return "DB_TYPE_CSV_DB";
148  case DB_TYPE_FEDERATED_DB:
149  return "DB_TYPE_FEDERATED_DB";
150  case DB_TYPE_BLACKHOLE_DB:
151  return "DB_TYPE_BLACKHOLE_DB";
152  case DB_TYPE_PARTITION_DB:
153  return "DB_TYPE_PARTITION_DB";
154  case DB_TYPE_BINLOG:
155  return "DB_TYPE_BINLOG";
156  case DB_TYPE_SOLID:
157  return "DB_TYPE_SOLID";
158  case DB_TYPE_PBXT:
159  return "DB_TYPE_PBXT";
160  case DB_TYPE_TABLE_FUNCTION:
161  return "DB_TYPE_TABLE_FUNCTION";
162  case DB_TYPE_MEMCACHE:
163  return "DB_TYPE_MEMCACHE";
164  case DB_TYPE_FALCON:
165  return "DB_TYPE_FALCON";
166  case DB_TYPE_MARIA:
167  return "DB_TYPE_MARIA";
168  case DB_TYPE_PERFORMANCE_SCHEMA:
169  return "DB_TYPE_PERFORMANCE_SCHEMA";
170  default:
171  return "DB_TYPE_DYNAMIC";
172  }
173 }
174 #endif
175 
181 const char* mysqld_system_database= "mysql";
182 
183 // System tables that belong to mysqld_system_database.
184 st_system_tablename mysqld_system_tables[]= {
185  {mysqld_system_database, "db"},
186  {mysqld_system_database, "user"},
187  {mysqld_system_database, "host"},
188  {mysqld_system_database, "func"},
189  {mysqld_system_database, "proc"},
190  {mysqld_system_database, "event"},
191  {mysqld_system_database, "plugin"},
192  {mysqld_system_database, "servers"},
193  {mysqld_system_database, "procs_priv"},
194  {mysqld_system_database, "tables_priv"},
195  {mysqld_system_database, "proxies_priv"},
196  {mysqld_system_database, "columns_priv"},
197  {mysqld_system_database, "time_zone"},
198  {mysqld_system_database, "time_zone_name"},
199  {mysqld_system_database, "time_zone_leap_second"},
200  {mysqld_system_database, "time_zone_transition"},
201  {mysqld_system_database, "time_zone_transition_type"},
202  {mysqld_system_database, "help_category"},
203  {mysqld_system_database, "help_keyword"},
204  {mysqld_system_database, "help_relation"},
205  {mysqld_system_database, "help_topic"},
206  {(const char *)NULL, (const char *)NULL} /* This must be at the end */
207 };
208 
213 static const char **known_system_databases= NULL;
214 static const char **ha_known_system_databases();
215 
216 // Called for each SE to get SE specific system database.
217 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
218  void *arg);
219 
220 // Called for each SE to check if given db.table_name is a system table.
221 static my_bool check_engine_system_table_handlerton(THD *unused,
222  plugin_ref plugin,
223  void *arg);
230 {
231  const char *db; // IN param
232  const char *table_name; // IN param
233  bool is_sql_layer_system_table; // IN param
234  legacy_db_type db_type; // IN param
235 
236  enum enum_sys_tbl_chk_status
237  {
238  // db.table_name is not a supported system table.
239  NOT_KNOWN_SYSTEM_TABLE,
240  /*
241  db.table_name is a system table,
242  but may not be supported by SE.
243  */
244  KNOWN_SYSTEM_TABLE,
245  /*
246  db.table_name is a system table,
247  and is supported by SE.
248  */
249  SUPPORTED_SYSTEM_TABLE
250  } status; // OUT param
251 };
252 
253 
254 static plugin_ref ha_default_plugin(THD *thd)
255 {
256  if (thd->variables.table_plugin)
257  return thd->variables.table_plugin;
258  return my_plugin_lock(thd, &global_system_variables.table_plugin);
259 }
260 
261 
274 {
275  plugin_ref plugin= ha_default_plugin(thd);
276  DBUG_ASSERT(plugin);
277  handlerton *hton= plugin_data(plugin, handlerton*);
278  DBUG_ASSERT(hton);
279  return hton;
280 }
281 
282 
283 static plugin_ref ha_default_temp_plugin(THD *thd)
284 {
285  if (thd->variables.temp_table_plugin)
286  return thd->variables.temp_table_plugin;
287  return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
288 }
289 
290 
303 {
304  plugin_ref plugin= ha_default_temp_plugin(thd);
305  DBUG_ASSERT(plugin);
306  handlerton *hton= plugin_data(plugin, handlerton*);
307  DBUG_ASSERT(hton);
308  return hton;
309 }
310 
311 
324  bool is_temp_table)
325 {
326  const LEX_STRING *table_alias;
327  plugin_ref plugin;
328 
329 redo:
330  /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
331  if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
332  (const uchar *)name->str, name->length,
333  (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
334  return is_temp_table ?
335  ha_default_plugin(thd) : ha_default_temp_plugin(thd);
336 
337  if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
338  {
339  handlerton *hton= plugin_data(plugin, handlerton *);
340  if (!(hton->flags & HTON_NOT_USER_SELECTABLE))
341  return plugin;
342 
343  /*
344  unlocking plugin immediately after locking is relatively low cost.
345  */
346  plugin_unlock(thd, plugin);
347  }
348 
349  /*
350  We check for the historical aliases.
351  */
352  for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
353  {
354  if (!my_strnncoll(&my_charset_latin1,
355  (const uchar *)name->str, name->length,
356  (const uchar *)table_alias->str, table_alias->length))
357  {
358  name= table_alias + 1;
359  goto redo;
360  }
361  }
362 
363  return NULL;
364 }
365 
366 
367 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
368 {
369  if (hton)
370  {
371  st_plugin_int **plugin= hton2plugin + hton->slot;
372 
373 #ifdef DBUG_OFF
374  return my_plugin_lock(thd, plugin);
375 #else
376  return my_plugin_lock(thd, &plugin);
377 #endif
378  }
379  return NULL;
380 }
381 
382 
383 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
384 {
385  plugin_ref plugin;
386  switch (db_type) {
387  case DB_TYPE_DEFAULT:
388  return ha_default_handlerton(thd);
389  default:
390  if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
391  (plugin= ha_lock_engine(thd, installed_htons[db_type])))
392  return plugin_data(plugin, handlerton*);
393  /* fall through */
394  case DB_TYPE_UNKNOWN:
395  return NULL;
396  }
397 }
398 
399 
403 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
404  bool no_substitute, bool report_error)
405 {
406  handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
407  if (ha_storage_engine_is_enabled(hton))
408  return hton;
409 
410  if (no_substitute)
411  {
412  if (report_error)
413  {
414  const char *engine_name= ha_resolve_storage_engine_name(hton);
415  my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
416  }
417  return NULL;
418  }
419 
420  (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
421 
422  switch (database_type) {
423  case DB_TYPE_MRG_ISAM:
424  return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
425  default:
426  break;
427  }
428 
429  return ha_default_handlerton(thd);
430 } /* ha_checktype */
431 
432 
433 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
434  handlerton *db_type)
435 {
436  handler *file;
437  DBUG_ENTER("get_new_handler");
438  DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
439 
440  if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
441  {
442  if ((file= db_type->create(db_type, share, alloc)))
443  file->init();
444  DBUG_RETURN(file);
445  }
446  /*
447  Try the default table type
448  Here the call to current_thd() is ok as we call this function a lot of
449  times but we enter this branch very seldom.
450  */
451  DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
452 }
453 
454 
455 #ifdef WITH_PARTITION_STORAGE_ENGINE
456 handler *get_ha_partition(partition_info *part_info)
457 {
458  ha_partition *partition;
459  DBUG_ENTER("get_ha_partition");
460  if ((partition= new ha_partition(partition_hton, part_info)))
461  {
462  if (partition->initialize_partition(current_thd->mem_root))
463  {
464  delete partition;
465  partition= 0;
466  }
467  else
468  partition->init();
469  }
470  else
471  {
472  my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR),
473  static_cast<int>(sizeof(ha_partition)));
474  }
475  DBUG_RETURN(((handler*) partition));
476 }
477 #endif
478 
479 
480 static const char **handler_errmsgs;
481 
482 C_MODE_START
483 static const char **get_handler_errmsgs()
484 {
485  return handler_errmsgs;
486 }
487 C_MODE_END
488 
489 
499 int ha_init_errors(void)
500 {
501 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
502 
503  /* Allocate a pointer array for the error message strings. */
504  /* Zerofill it to avoid uninitialized gaps. */
505  if (! (handler_errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
506  MYF(MY_WME | MY_ZEROFILL))))
507  return 1;
508 
509  /* Set the dedicated error messages. */
510  SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
511  SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
512  SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
513  SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
514  SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
515  SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
516  SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
517  SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
518  SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
519  SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
520  SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
521  SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
522  SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
523  SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
524  SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
525  SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
526  SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");
527  SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
528  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
529  SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
530  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
531  SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
532  SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
533  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
534  SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
535  SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
536  SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
537  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
538  SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
539  SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
540  SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
541  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
542  SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
543  SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
544  SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
545  SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
546  SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
547  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
548  SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
549  SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
550  SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
551  SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
552  SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
553  SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
554  SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
555  SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
556  SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists");
557  SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, "FTS query exceeds result cache limit");
558 
559  /* Register the error messages for use with my_error(). */
560  return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
561 }
562 
563 
572 static int ha_finish_errors(void)
573 {
574  const char **errmsgs;
575 
576  /* Allocate a pointer array for the error message strings. */
577  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
578  return 1;
579  my_free(errmsgs);
580  return 0;
581 }
582 
583 
584 int ha_finalize_handlerton(st_plugin_int *plugin)
585 {
586  handlerton *hton= (handlerton *)plugin->data;
587  DBUG_ENTER("ha_finalize_handlerton");
588 
589  /* hton can be NULL here, if ha_initialize_handlerton() failed. */
590  if (!hton)
591  goto end;
592 
593  switch (hton->state)
594  {
595  case SHOW_OPTION_NO:
596  case SHOW_OPTION_DISABLED:
597  break;
598  case SHOW_OPTION_YES:
599  if (installed_htons[hton->db_type] == hton)
600  installed_htons[hton->db_type]= NULL;
601  break;
602  };
603 
604  if (hton->panic)
605  hton->panic(hton, HA_PANIC_CLOSE);
606 
607  if (plugin->plugin->deinit)
608  {
609  /*
610  Today we have no defined/special behavior for uninstalling
611  engine plugins.
612  */
613  DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
614  if (plugin->plugin->deinit(NULL))
615  {
616  DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
617  plugin->name.str));
618  }
619  }
620 
621  /*
622  In case a plugin is uninstalled and re-installed later, it should
623  reuse an array slot. Otherwise the number of uninstall/install
624  cycles would be limited.
625  */
626  if (hton->slot != HA_SLOT_UNDEF)
627  {
628  /* Make sure we are not unpluging another plugin */
629  DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
630  DBUG_ASSERT(hton->slot < MAX_HA);
631  hton2plugin[hton->slot]= NULL;
632  }
633 
634  my_free(hton);
635 
636  end:
637  DBUG_RETURN(0);
638 }
639 
640 
641 int ha_initialize_handlerton(st_plugin_int *plugin)
642 {
643  handlerton *hton;
644  DBUG_ENTER("ha_initialize_handlerton");
645  DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
646 
647  hton= (handlerton *)my_malloc(sizeof(handlerton),
648  MYF(MY_WME | MY_ZEROFILL));
649 
650  if (hton == NULL)
651  {
652  sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
653  plugin->name.str);
654  goto err_no_hton_memory;
655  }
656 
657  hton->slot= HA_SLOT_UNDEF;
658  /* Historical Requirement */
659  plugin->data= hton; // shortcut for the future
660  if (plugin->plugin->init && plugin->plugin->init(hton))
661  {
662  sql_print_error("Plugin '%s' init function returned error.",
663  plugin->name.str);
664  goto err;
665  }
666 
667  /*
668  the switch below and hton->state should be removed when
669  command-line options for plugins will be implemented
670  */
671  DBUG_PRINT("info", ("hton->state=%d", hton->state));
672  switch (hton->state) {
673  case SHOW_OPTION_NO:
674  break;
675  case SHOW_OPTION_YES:
676  {
677  uint tmp;
678  ulong fslot;
679  /* now check the db_type for conflict */
680  if (hton->db_type <= DB_TYPE_UNKNOWN ||
681  hton->db_type >= DB_TYPE_DEFAULT ||
682  installed_htons[hton->db_type])
683  {
684  int idx= (int) DB_TYPE_FIRST_DYNAMIC;
685 
686  while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
687  idx++;
688 
689  if (idx == (int) DB_TYPE_DEFAULT)
690  {
691  sql_print_warning("Too many storage engines!");
692  goto err_deinit;
693  }
694  if (hton->db_type != DB_TYPE_UNKNOWN)
695  sql_print_warning("Storage engine '%s' has conflicting typecode. "
696  "Assigning value %d.", plugin->plugin->name, idx);
697  hton->db_type= (enum legacy_db_type) idx;
698  }
699 
700  /*
701  In case a plugin is uninstalled and re-installed later, it should
702  reuse an array slot. Otherwise the number of uninstall/install
703  cycles would be limited. So look for a free slot.
704  */
705  DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
706  for (fslot= 0; fslot < total_ha; fslot++)
707  {
708  if (!hton2plugin[fslot])
709  break;
710  }
711  if (fslot < total_ha)
712  hton->slot= fslot;
713  else
714  {
715  if (total_ha >= MAX_HA)
716  {
717  sql_print_error("Too many plugins loaded. Limit is %lu. "
718  "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
719  goto err_deinit;
720  }
721  hton->slot= total_ha++;
722  }
723  installed_htons[hton->db_type]= hton;
724  tmp= hton->savepoint_offset;
725  hton->savepoint_offset= savepoint_alloc_size;
726  savepoint_alloc_size+= tmp;
727  hton2plugin[hton->slot]=plugin;
728  if (hton->prepare)
729  total_ha_2pc++;
730  break;
731  }
732  /* fall through */
733  default:
734  hton->state= SHOW_OPTION_DISABLED;
735  break;
736  }
737 
738  /*
739  This is entirely for legacy. We will create a new "disk based" hton and a
740  "memory" hton which will be configurable longterm. We should be able to
741  remove partition and myisammrg.
742  */
743  switch (hton->db_type) {
744  case DB_TYPE_HEAP:
745  heap_hton= hton;
746  break;
747  case DB_TYPE_MYISAM:
748  myisam_hton= hton;
749  break;
750  case DB_TYPE_PARTITION_DB:
751  partition_hton= hton;
752  break;
753  default:
754  break;
755  };
756 
757  DBUG_RETURN(0);
758 
759 err_deinit:
760  /*
761  Let plugin do its inner deinitialization as plugin->init()
762  was successfully called before.
763  */
764  if (plugin->plugin->deinit)
765  (void) plugin->plugin->deinit(NULL);
766 
767 err:
768  my_free(hton);
769 err_no_hton_memory:
770  plugin->data= NULL;
771  DBUG_RETURN(1);
772 }
773 
774 int ha_init()
775 {
776  int error= 0;
777  DBUG_ENTER("ha_init");
778 
779  DBUG_ASSERT(total_ha < MAX_HA);
780  /*
781  Check if there is a transaction-capable storage engine besides the
782  binary log (which is considered a transaction-capable storage engine in
783  counting total_ha)
784  */
785  opt_using_transactions= total_ha>(ulong)opt_bin_log;
786  savepoint_alloc_size+= sizeof(SAVEPOINT);
787 
788  /*
789  Initialize system database name cache.
790  This cache is used to do a quick check if a given
791  db.tablename is a system table.
792  */
793  known_system_databases= ha_known_system_databases();
794 
795  DBUG_RETURN(error);
796 }
797 
798 int ha_end()
799 {
800  int error= 0;
801  DBUG_ENTER("ha_end");
802 
803 
804  /*
805  This should be eventualy based on the graceful shutdown flag.
806  So if flag is equal to HA_PANIC_CLOSE, the deallocate
807  the errors.
808  */
809  if (ha_finish_errors())
810  error= 1;
811 
812  DBUG_RETURN(error);
813 }
814 
815 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
816  void *path)
817 {
818  handlerton *hton= plugin_data(plugin, handlerton *);
819  if (hton->state == SHOW_OPTION_YES && hton->drop_database)
820  hton->drop_database(hton, (char *)path);
821  return FALSE;
822 }
823 
824 
825 void ha_drop_database(char* path)
826 {
827  plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
828 }
829 
830 
831 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
832  void *unused)
833 {
834  handlerton *hton= plugin_data(plugin, handlerton *);
835  /*
836  there's no need to rollback here as all transactions must
837  be rolled back already
838  */
839  if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
840  {
841  if (hton->close_connection)
842  hton->close_connection(hton, thd);
843  /* make sure ha_data is reset and ha_data_lock is released */
844  thd_set_ha_data(thd, hton, NULL);
845  }
846  return FALSE;
847 }
848 
849 
854 void ha_close_connection(THD* thd)
855 {
856  plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
857 }
858 
859 /* ========================================================================
860  ======================= TRANSACTIONS ===================================*/
861 
1165 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
1166 {
1167  THD_TRANS *trans;
1168  Ha_trx_info *ha_info;
1169  DBUG_ENTER("trans_register_ha");
1170  DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1171 
1172  if (all)
1173  {
1174  trans= &thd->transaction.all;
1175  thd->server_status|= SERVER_STATUS_IN_TRANS;
1176  if (thd->tx_read_only)
1177  thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1178  DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1179  }
1180  else
1181  trans= &thd->transaction.stmt;
1182 
1183  ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1184 
1185  if (ha_info->is_started())
1186  DBUG_VOID_RETURN; /* already registered, return */
1187 
1188  ha_info->register_ha(trans, ht_arg);
1189 
1190  trans->no_2pc|=(ht_arg->prepare==0);
1191  if (thd->transaction.xid_state.xid.is_null())
1192  thd->transaction.xid_state.xid.set(thd->query_id);
1193  DBUG_VOID_RETURN;
1194 }
1195 
1202 int ha_prepare(THD *thd)
1203 {
1204  int error=0, all=1;
1205  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1206  Ha_trx_info *ha_info= trans->ha_list;
1207  DBUG_ENTER("ha_prepare");
1208 
1209  if (ha_info)
1210  {
1211  for (; ha_info; ha_info= ha_info->next())
1212  {
1213  int err;
1214  handlerton *ht= ha_info->ht();
1215  status_var_increment(thd->status_var.ha_prepare_count);
1216  if (ht->prepare)
1217  {
1218  if ((err= ht->prepare(ht, thd, all)))
1219  {
1220  my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1221  ha_rollback_trans(thd, all);
1222  error=1;
1223  break;
1224  }
1225  }
1226  else
1227  {
1228  push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1229  ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1230  ha_resolve_storage_engine_name(ht));
1231  }
1232  }
1233  }
1234 
1235  DBUG_RETURN(error);
1236 }
1237 
1257 static
1258 uint
1259 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1260  bool all)
1261 {
1262  /* The number of storage engines that have actual changes. */
1263  unsigned rw_ha_count= 0;
1264  Ha_trx_info *ha_info;
1265 
1266  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1267  {
1268  if (ha_info->is_trx_read_write())
1269  ++rw_ha_count;
1270 
1271  if (! all)
1272  {
1273  Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1274  DBUG_ASSERT(ha_info != ha_info_all);
1275  /*
1276  Merge read-only/read-write information about statement
1277  transaction to its enclosing normal transaction. Do this
1278  only if in a real transaction -- that is, if we know
1279  that ha_info_all is registered in thd->transaction.all.
1280  Since otherwise we only clutter the normal transaction flags.
1281  */
1282  if (ha_info_all->is_started()) /* FALSE if autocommit. */
1283  ha_info_all->coalesce_trx_with(ha_info);
1284  }
1285  else if (rw_ha_count > 1)
1286  {
1287  /*
1288  It is a normal transaction, so we don't need to merge read/write
1289  information up, and the need for two-phase commit has been
1290  already established. Break the loop prematurely.
1291  */
1292  break;
1293  }
1294  }
1295  return rw_ha_count;
1296 }
1297 
1298 
1319 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1320 {
1321  int error= 0;
1322  /*
1323  'all' means that this is either an explicit commit issued by
1324  user, or an implicit commit issued by a DDL.
1325  */
1326  THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
1327  /*
1328  "real" is a nick name for a transaction for which a commit will
1329  make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1330  transation is not 'real': even though it's possible to commit it,
1331  the changes are not durable as they might be rolled back if the
1332  enclosing 'all' transaction is rolled back.
1333  */
1334  bool is_real_trans= all || thd->transaction.all.ha_list == 0;
1335  Ha_trx_info *ha_info= trans->ha_list;
1336  DBUG_ENTER("ha_commit_trans");
1337 
1338  DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1339  all, thd->in_sub_stmt, ha_info, is_real_trans));
1340  /*
1341  We must not commit the normal transaction if a statement
1342  transaction is pending. Otherwise statement transaction
1343  flags will not get propagated to its normal transaction's
1344  counterpart.
1345  */
1346  DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1347  trans == &thd->transaction.stmt);
1348 
1349  if (thd->in_sub_stmt)
1350  {
1351  DBUG_ASSERT(0);
1352  /*
1353  Since we don't support nested statement transactions in 5.0,
1354  we can't commit or rollback stmt transactions while we are inside
1355  stored functions or triggers. So we simply do nothing now.
1356  TODO: This should be fixed in later ( >= 5.1) releases.
1357  */
1358  if (!all)
1359  DBUG_RETURN(0);
1360  /*
1361  We assume that all statements which commit or rollback main transaction
1362  are prohibited inside of stored functions or triggers. So they should
1363  bail out with error even before ha_commit_trans() call. To be 100% safe
1364  let us throw error in non-debug builds.
1365  */
1366  my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1367  DBUG_RETURN(2);
1368  }
1369 
1370  MDL_request mdl_request;
1371  bool release_mdl= false;
1372 
1373  if (ha_info)
1374  {
1375  uint rw_ha_count;
1376  bool rw_trans;
1377 
1378  DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1379 
1380  /* Close all cursors that can not survive COMMIT */
1381  if (is_real_trans) /* not a statement commit */
1382  thd->stmt_map.close_transient_cursors();
1383 
1384  rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1385  trans->rw_ha_count= rw_ha_count;
1386  /* rw_trans is TRUE when we in a transaction changing data */
1387  rw_trans= is_real_trans && (rw_ha_count > 0);
1388 
1389  if (rw_trans && !ignore_global_read_lock)
1390  {
1391  /*
1392  Acquire a metadata lock which will ensure that COMMIT is blocked
1393  by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1394  COMMIT in progress blocks FTWRL).
1395 
1396  We allow the owner of FTWRL to COMMIT; we assume that it knows
1397  what it does.
1398  */
1399  mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1400  MDL_EXPLICIT);
1401 
1402  DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1403  if (thd->mdl_context.acquire_lock(&mdl_request,
1404  thd->variables.lock_wait_timeout))
1405  {
1406  ha_rollback_trans(thd, all);
1407  DBUG_RETURN(1);
1408  }
1409  release_mdl= true;
1410 
1411  DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1412  }
1413 
1414  if (rw_trans &&
1415  opt_readonly &&
1416  !(thd->security_ctx->master_access & SUPER_ACL) &&
1417  !thd->slave_thread)
1418  {
1419  my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1420  ha_rollback_trans(thd, all);
1421  error= 1;
1422  goto end;
1423  }
1424 
1425  if (!trans->no_2pc && (rw_ha_count > 1))
1426  error= tc_log->prepare(thd, all);
1427  }
1428  if (error || (error= tc_log->commit(thd, all)))
1429  {
1430  ha_rollback_trans(thd, all);
1431  error= 1;
1432  goto end;
1433  }
1434  DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1435 end:
1436  if (release_mdl && mdl_request.ticket)
1437  {
1438  /*
1439  We do not always immediately release transactional locks
1440  after ha_commit_trans() (see uses of ha_enable_transaction()),
1441  thus we release the commit blocker lock as soon as it's
1442  not needed.
1443  */
1444  DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1445  thd->mdl_context.release_lock(mdl_request.ticket);
1446  }
1447  /* Free resources and perform other cleanup even for 'empty' transactions. */
1448  if (is_real_trans)
1449  thd->transaction.cleanup();
1450  DBUG_RETURN(error);
1451 }
1452 
1472 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1473 {
1474  int error=0;
1475  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1476  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1477  DBUG_ENTER("ha_commit_low");
1478 
1479  if (ha_info)
1480  {
1481  for (; ha_info; ha_info= ha_info_next)
1482  {
1483  int err;
1484  handlerton *ht= ha_info->ht();
1485  if ((err= ht->commit(ht, thd, all)))
1486  {
1487  my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1488  error=1;
1489  }
1490  status_var_increment(thd->status_var.ha_commit_count);
1491  ha_info_next= ha_info->next();
1492  ha_info->reset(); /* keep it conveniently zero-filled */
1493  }
1494  trans->ha_list= 0;
1495  trans->no_2pc=0;
1496  trans->rw_ha_count= 0;
1497  if (all)
1498  {
1499 #ifdef HAVE_QUERY_CACHE
1500  if (thd->transaction.changed_tables)
1501  query_cache.invalidate(thd->transaction.changed_tables);
1502 #endif
1503  }
1504  }
1505  /* Free resources and perform other cleanup even for 'empty' transactions. */
1506  if (all)
1507  thd->transaction.cleanup();
1508  /*
1509  When the transaction has been committed, we clear the commit_low
1510  flag. This allow other parts of the system to check if commit_low
1511  was called.
1512  */
1513  thd->transaction.flags.commit_low= false;
1514  if (run_after_commit)
1515  {
1516  /* If commit succeeded, we call the after_commit hook */
1517  if (!error)
1518  (void) RUN_HOOK(transaction, after_commit, (thd, all));
1519  thd->transaction.flags.run_hooks= false;
1520  }
1521  DBUG_RETURN(error);
1522 }
1523 
1524 
1525 int ha_rollback_low(THD *thd, bool all)
1526 {
1527  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1528  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1529  int error= 0;
1530 
1531  if (ha_info)
1532  {
1533  /* Close all cursors that can not survive ROLLBACK */
1534  if (all) /* not a statement commit */
1535  thd->stmt_map.close_transient_cursors();
1536 
1537  for (; ha_info; ha_info= ha_info_next)
1538  {
1539  int err;
1540  handlerton *ht= ha_info->ht();
1541  if ((err= ht->rollback(ht, thd, all)))
1542  { // cannot happen
1543  my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1544  error= 1;
1545  }
1546  status_var_increment(thd->status_var.ha_rollback_count);
1547  ha_info_next= ha_info->next();
1548  ha_info->reset(); /* keep it conveniently zero-filled */
1549  }
1550  trans->ha_list= 0;
1551  trans->no_2pc=0;
1552  trans->rw_ha_count= 0;
1553  }
1554 
1555  /*
1556  Thanks to possibility of MDL deadlock rollback request can come even if
1557  transaction hasn't been started in any transactional storage engine.
1558  */
1559  if (all && thd->transaction_rollback_request &&
1560  thd->transaction.xid_state.xa_state != XA_NOTR)
1561  thd->transaction.xid_state.rm_error= thd->get_stmt_da()->sql_errno();
1562 
1563  (void) RUN_HOOK(transaction, after_rollback, (thd, all));
1564  return error;
1565 }
1566 
1567 
1568 int ha_rollback_trans(THD *thd, bool all)
1569 {
1570  int error=0;
1571 #ifndef DBUG_OFF
1572  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1573 #endif
1574  /*
1575  "real" is a nick name for a transaction for which a commit will
1576  make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1577  transaction is not 'real': even though it's possible to commit it,
1578  the changes are not durable as they might be rolled back if the
1579  enclosing 'all' transaction is rolled back.
1580  We establish the value of 'is_real_trans' by checking
1581  if it's an explicit COMMIT or BEGIN statement, or implicit
1582  commit issued by DDL (in these cases all == TRUE),
1583  or if we're running in autocommit mode (it's only in the autocommit mode
1584  ha_commit_one_phase() is called with an empty
1585  transaction.all.ha_list, see why in trans_register_ha()).
1586  */
1587  bool is_real_trans= all || thd->transaction.all.ha_list == NULL;
1588  DBUG_ENTER("ha_rollback_trans");
1589 
1590  /*
1591  We must not rollback the normal transaction if a statement
1592  transaction is pending.
1593  */
1594  DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1595  trans == &thd->transaction.stmt);
1596 
1597  if (thd->in_sub_stmt)
1598  {
1599  DBUG_ASSERT(0);
1600  /*
1601  If we are inside stored function or trigger we should not commit or
1602  rollback current statement transaction. See comment in ha_commit_trans()
1603  call for more information.
1604  */
1605  if (!all)
1606  DBUG_RETURN(0);
1607  my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1608  DBUG_RETURN(1);
1609  }
1610 
1611  if (tc_log)
1612  tc_log->rollback(thd, all);
1613 
1614  /* Always cleanup. Even if nht==0. There may be savepoints. */
1615  if (is_real_trans)
1616  thd->transaction.cleanup();
1617  if (all)
1618  thd->transaction_rollback_request= FALSE;
1619 
1620  /*
1621  Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
1622  complete transaction is being rollback or autocommit=1.
1623  */
1624  if (is_real_trans)
1625  gtid_rollback(thd);
1626 
1627  /*
1628  If the transaction cannot be rolled back safely, warn; don't warn if this
1629  is a slave thread (because when a slave thread executes a ROLLBACK, it has
1630  been read from the binary log, so it's 100% sure and normal to produce
1631  error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1632  slave SQL thread, it would not stop the thread but just be printed in
1633  the error log; but we don't want users to wonder why they have this
1634  message in the error log, so we don't send it.
1635  */
1636 #ifndef DBUG_OFF
1637  thd->transaction.stmt.dbug_unsafe_rollback_flags("stmt");
1638  thd->transaction.all.dbug_unsafe_rollback_flags("all");
1639 #endif
1640  if (is_real_trans && thd->transaction.all.cannot_safely_rollback() &&
1641  !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1642  thd->transaction.push_unsafe_rollback_warnings(thd);
1643  DBUG_RETURN(error);
1644 }
1645 
1646 
1647 struct xahton_st {
1648  XID *xid;
1649  int result;
1650 };
1651 
1652 static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
1653  void *arg)
1654 {
1655  handlerton *hton= plugin_data(plugin, handlerton *);
1656  if (hton->state == SHOW_OPTION_YES && hton->recover)
1657  {
1658  hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1659  ((struct xahton_st *)arg)->result= 0;
1660  }
1661  return FALSE;
1662 }
1663 
1664 static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
1665  void *arg)
1666 {
1667  handlerton *hton= plugin_data(plugin, handlerton *);
1668  if (hton->state == SHOW_OPTION_YES && hton->recover)
1669  {
1670  hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1671  ((struct xahton_st *)arg)->result= 0;
1672  }
1673  return FALSE;
1674 }
1675 
1676 
1677 int ha_commit_or_rollback_by_xid(THD *thd, XID *xid, bool commit)
1678 {
1679  struct xahton_st xaop;
1680  xaop.xid= xid;
1681  xaop.result= 1;
1682 
1683  plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1684  MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1685 
1686  gtid_rollback(thd);
1687 
1688  return xaop.result;
1689 }
1690 
1691 
1692 #ifndef DBUG_OFF
1693 
1697 static char* xid_to_str(char *buf, XID *xid)
1698 {
1699  int i;
1700  char *s=buf;
1701  *s++='\'';
1702  for (i=0; i < xid->gtrid_length+xid->bqual_length; i++)
1703  {
1704  uchar c=(uchar)xid->data[i];
1705  /* is_next_dig is set if next character is a number */
1706  bool is_next_dig= FALSE;
1707  if (i < XIDDATASIZE)
1708  {
1709  char ch= xid->data[i+1];
1710  is_next_dig= (ch >= '0' && ch <='9');
1711  }
1712  if (i == xid->gtrid_length)
1713  {
1714  *s++='\'';
1715  if (xid->bqual_length)
1716  {
1717  *s++='.';
1718  *s++='\'';
1719  }
1720  }
1721  if (c < 32 || c > 126)
1722  {
1723  *s++='\\';
1724  /*
1725  If next character is a number, write current character with
1726  3 octal numbers to ensure that the next number is not seen
1727  as part of the octal number
1728  */
1729  if (c > 077 || is_next_dig)
1730  *s++=_dig_vec_lower[c >> 6];
1731  if (c > 007 || is_next_dig)
1732  *s++=_dig_vec_lower[(c >> 3) & 7];
1733  *s++=_dig_vec_lower[c & 7];
1734  }
1735  else
1736  {
1737  if (c == '\'' || c == '\\')
1738  *s++='\\';
1739  *s++=c;
1740  }
1741  }
1742  *s++='\'';
1743  *s=0;
1744  return buf;
1745 }
1746 #endif
1747 
1765 {
1766  int len, found_foreign_xids, found_my_xids;
1767  XID *list;
1768  HASH *commit_list;
1769  bool dry_run;
1770 };
1771 
1772 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
1773  void *arg)
1774 {
1775  handlerton *hton= plugin_data(plugin, handlerton *);
1776  struct xarecover_st *info= (struct xarecover_st *) arg;
1777  int got;
1778 
1779  if (hton->state == SHOW_OPTION_YES && hton->recover)
1780  {
1781  while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1782  {
1783  sql_print_information("Found %d prepared transaction(s) in %s",
1784  got, ha_resolve_storage_engine_name(hton));
1785  for (int i=0; i < got; i ++)
1786  {
1787  my_xid x=info->list[i].get_my_xid();
1788  if (!x) // not "mine" - that is generated by external TM
1789  {
1790 #ifndef DBUG_OFF
1791  char buf[XIDDATASIZE*4+6]; // see xid_to_str
1792  sql_print_information("ignore xid %s", xid_to_str(buf, info->list+i));
1793 #endif
1794  xid_cache_insert(info->list+i, XA_PREPARED);
1795  info->found_foreign_xids++;
1796  continue;
1797  }
1798  if (info->dry_run)
1799  {
1800  info->found_my_xids++;
1801  continue;
1802  }
1803  // recovery mode
1804  if (info->commit_list ?
1805  my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1806  tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1807  {
1808 #ifndef DBUG_OFF
1809  char buf[XIDDATASIZE*4+6]; // see xid_to_str
1810  sql_print_information("commit xid %s", xid_to_str(buf, info->list+i));
1811 #endif
1812  hton->commit_by_xid(hton, info->list+i);
1813  }
1814  else
1815  {
1816 #ifndef DBUG_OFF
1817  char buf[XIDDATASIZE*4+6]; // see xid_to_str
1818  sql_print_information("rollback xid %s",
1819  xid_to_str(buf, info->list+i));
1820 #endif
1821  hton->rollback_by_xid(hton, info->list+i);
1822  }
1823  }
1824  if (got < info->len)
1825  break;
1826  }
1827  }
1828  return FALSE;
1829 }
1830 
1831 int ha_recover(HASH *commit_list)
1832 {
1833  struct xarecover_st info;
1834  DBUG_ENTER("ha_recover");
1835  info.found_foreign_xids= info.found_my_xids= 0;
1836  info.commit_list= commit_list;
1837  info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1838  info.list= NULL;
1839 
1840  /* commit_list and tc_heuristic_recover cannot be set both */
1841  DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
1842  /* if either is set, total_ha_2pc must be set too */
1843  DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log);
1844 
1845  if (total_ha_2pc <= (ulong)opt_bin_log)
1846  DBUG_RETURN(0);
1847 
1848  if (info.commit_list)
1849  sql_print_information("Starting crash recovery...");
1850 
1851 #ifndef WILL_BE_DELETED_LATER
1852  /*
1853  for now, only InnoDB supports 2pc. It means we can always safely
1854  rollback all pending transactions, without risking inconsistent data
1855  */
1856  DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
1857  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
1858  info.dry_run=FALSE;
1859 #endif
1860 
1861  for (info.len= MAX_XID_LIST_SIZE ;
1862  info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1863  {
1864  info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1865  }
1866  if (!info.list)
1867  {
1868  sql_print_error(ER(ER_OUTOFMEMORY),
1869  static_cast<int>(info.len*sizeof(XID)));
1870  DBUG_RETURN(1);
1871  }
1872 
1873  plugin_foreach(NULL, xarecover_handlerton,
1874  MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1875 
1876  my_free(info.list);
1877  if (info.found_foreign_xids)
1878  sql_print_warning("Found %d prepared XA transactions",
1879  info.found_foreign_xids);
1880  if (info.dry_run && info.found_my_xids)
1881  {
1882  sql_print_error("Found %d prepared transactions! It means that mysqld was "
1883  "not shut down properly last time and critical recovery "
1884  "information (last binlog or %s file) was manually deleted "
1885  "after a crash. You have to start mysqld with "
1886  "--tc-heuristic-recover switch to commit or rollback "
1887  "pending transactions.",
1888  info.found_my_xids, opt_tc_log_file);
1889  DBUG_RETURN(1);
1890  }
1891  if (info.commit_list)
1892  sql_print_information("Crash recovery finished.");
1893  DBUG_RETURN(0);
1894 }
1895 
1904 bool mysql_xa_recover(THD *thd)
1905 {
1906  List<Item> field_list;
1907  Protocol *protocol= thd->protocol;
1908  int i=0;
1909  XID_STATE *xs;
1910  DBUG_ENTER("mysql_xa_recover");
1911 
1912  field_list.push_back(new Item_int(NAME_STRING("formatID"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
1913  field_list.push_back(new Item_int(NAME_STRING("gtrid_length"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
1914  field_list.push_back(new Item_int(NAME_STRING("bqual_length"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
1915  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
1916 
1917  if (protocol->send_result_set_metadata(&field_list,
1918  Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
1919  DBUG_RETURN(1);
1920 
1921  mysql_mutex_lock(&LOCK_xid_cache);
1922  while ((xs= (XID_STATE*) my_hash_element(&xid_cache, i++)))
1923  {
1924  if (xs->xa_state==XA_PREPARED)
1925  {
1926  protocol->prepare_for_resend();
1927  protocol->store_longlong((longlong)xs->xid.formatID, FALSE);
1928  protocol->store_longlong((longlong)xs->xid.gtrid_length, FALSE);
1929  protocol->store_longlong((longlong)xs->xid.bqual_length, FALSE);
1930  protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
1931  &my_charset_bin);
1932  if (protocol->write())
1933  {
1934  mysql_mutex_unlock(&LOCK_xid_cache);
1935  DBUG_RETURN(1);
1936  }
1937  }
1938  }
1939 
1940  mysql_mutex_unlock(&LOCK_xid_cache);
1941  my_eof(thd);
1942  DBUG_RETURN(0);
1943 }
1944 
1965 {
1966  Ha_trx_info *info;
1967 
1968  /*
1969  Note that below we assume that only transactional storage engines
1970  may need release_temporary_latches(). If this will ever become false,
1971  we could iterate on thd->open_tables instead (and remove duplicates
1972  as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
1973  */
1974  for (info= thd->transaction.stmt.ha_list; info; info= info->next())
1975  {
1976  handlerton *hton= info->ht();
1977  if (hton && hton->release_temporary_latches)
1978  hton->release_temporary_latches(hton, thd);
1979  }
1980  return 0;
1981 }
1982 
1983 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
1984 {
1985  int error=0;
1986  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1987  &thd->transaction.all);
1988  Ha_trx_info *ha_info, *ha_info_next;
1989 
1990  DBUG_ENTER("ha_rollback_to_savepoint");
1991 
1992  trans->no_2pc=0;
1993  trans->rw_ha_count= 0;
1994  /*
1995  rolling back to savepoint in all storage engines that were part of the
1996  transaction when the savepoint was set
1997  */
1998  for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
1999  {
2000  int err;
2001  handlerton *ht= ha_info->ht();
2002  DBUG_ASSERT(ht);
2003  DBUG_ASSERT(ht->savepoint_set != 0);
2004  if ((err= ht->savepoint_rollback(ht, thd,
2005  (uchar *)(sv+1)+ht->savepoint_offset)))
2006  { // cannot happen
2007  my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2008  error=1;
2009  }
2010  status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2011  trans->no_2pc|= ht->prepare == 0;
2012  }
2013  /*
2014  rolling back the transaction in all storage engines that were not part of
2015  the transaction when the savepoint was set
2016  */
2017  for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2018  ha_info= ha_info_next)
2019  {
2020  int err;
2021  handlerton *ht= ha_info->ht();
2022  if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2023  { // cannot happen
2024  my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2025  error=1;
2026  }
2027  status_var_increment(thd->status_var.ha_rollback_count);
2028  ha_info_next= ha_info->next();
2029  ha_info->reset(); /* keep it conveniently zero-filled */
2030  }
2031  trans->ha_list= sv->ha_list;
2032  DBUG_RETURN(error);
2033 }
2034 
2035 int ha_prepare_low(THD *thd, bool all)
2036 {
2037  int error= 0;
2038  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
2039  Ha_trx_info *ha_info= trans->ha_list;
2040  DBUG_ENTER("ha_prepare_low");
2041 
2042  if (ha_info)
2043  {
2044  for (; ha_info && !error; ha_info= ha_info->next())
2045  {
2046  int err= 0;
2047  handlerton *ht= ha_info->ht();
2048  /*
2049  Do not call two-phase commit if this particular
2050  transaction is read-only. This allows for simpler
2051  implementation in engines that are always read-only.
2052  */
2053  if (!ha_info->is_trx_read_write())
2054  continue;
2055  if ((err= ht->prepare(ht, thd, all)))
2056  {
2057  my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2058  error= 1;
2059  }
2060  status_var_increment(thd->status_var.ha_prepare_count);
2061  }
2062  DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2063  }
2064 
2065  DBUG_RETURN(error);
2066 }
2067 
2074 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2075 {
2076  int error=0;
2077  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2078  &thd->transaction.all);
2079  Ha_trx_info *ha_info= trans->ha_list;
2080  DBUG_ENTER("ha_savepoint");
2081 
2082  for (; ha_info; ha_info= ha_info->next())
2083  {
2084  int err;
2085  handlerton *ht= ha_info->ht();
2086  DBUG_ASSERT(ht);
2087  if (! ht->savepoint_set)
2088  {
2089  my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2090  error=1;
2091  break;
2092  }
2093  if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2094  { // cannot happen
2095  my_error(ER_GET_ERRNO, MYF(0), err);
2096  error=1;
2097  }
2098  status_var_increment(thd->status_var.ha_savepoint_count);
2099  }
2100  /*
2101  Remember the list of registered storage engines. All new
2102  engines are prepended to the beginning of the list.
2103  */
2104  sv->ha_list= trans->ha_list;
2105 
2106  DBUG_RETURN(error);
2107 }
2108 
2109 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2110 {
2111  int error=0;
2112  Ha_trx_info *ha_info= sv->ha_list;
2113  DBUG_ENTER("ha_release_savepoint");
2114 
2115  for (; ha_info; ha_info= ha_info->next())
2116  {
2117  int err;
2118  handlerton *ht= ha_info->ht();
2119  /* Savepoint life time is enclosed into transaction life time. */
2120  DBUG_ASSERT(ht);
2121  if (!ht->savepoint_release)
2122  continue;
2123  if ((err= ht->savepoint_release(ht, thd,
2124  (uchar *)(sv+1) + ht->savepoint_offset)))
2125  { // cannot happen
2126  my_error(ER_GET_ERRNO, MYF(0), err);
2127  error=1;
2128  }
2129  }
2130  DBUG_RETURN(error);
2131 }
2132 
2133 
2134 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2135  void *arg)
2136 {
2137  handlerton *hton= plugin_data(plugin, handlerton *);
2138  if (hton->state == SHOW_OPTION_YES &&
2139  hton->start_consistent_snapshot)
2140  {
2141  hton->start_consistent_snapshot(hton, thd);
2142  *((bool *)arg)= false;
2143  }
2144  return FALSE;
2145 }
2146 
2147 int ha_start_consistent_snapshot(THD *thd)
2148 {
2149  bool warn= true;
2150 
2151  plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2152 
2153  /*
2154  Same idea as when one wants to CREATE TABLE in one engine which does not
2155  exist:
2156  */
2157  if (warn)
2158  push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2159  "This MySQL server does not support any "
2160  "consistent-read capable storage engine");
2161  return 0;
2162 }
2163 
2164 
2165 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2166  void *arg)
2167 {
2168  handlerton *hton= plugin_data(plugin, handlerton *);
2169  if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2170  hton->flush_logs(hton))
2171  return TRUE;
2172  return FALSE;
2173 }
2174 
2175 
2176 bool ha_flush_logs(handlerton *db_type)
2177 {
2178  if (db_type == NULL)
2179  {
2180  if (plugin_foreach(NULL, flush_handlerton,
2181  MYSQL_STORAGE_ENGINE_PLUGIN, 0))
2182  return TRUE;
2183  }
2184  else
2185  {
2186  if (db_type->state != SHOW_OPTION_YES ||
2187  (db_type->flush_logs && db_type->flush_logs(db_type)))
2188  return TRUE;
2189  }
2190  return FALSE;
2191 }
2192 
2193 
2216 const char *get_canonical_filename(handler *file, const char *path,
2217  char *tmp_path)
2218 {
2219  uint i;
2220  if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2221  return path;
2222 
2223  for (i= 0; i <= mysql_tmpdir_list.max; i++)
2224  {
2225  if (is_prefix(path, mysql_tmpdir_list.list[i]))
2226  return path;
2227  }
2228 
2229  /* Ensure that table handler get path in lower case */
2230  if (tmp_path != path)
2231  strmov(tmp_path, path);
2232 
2233  /*
2234  we only should turn into lowercase database/table part
2235  so start the process after homedirectory
2236  */
2237  my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2238  return tmp_path;
2239 }
2240 
2241 
2248 struct Ha_delete_table_error_handler: public Internal_error_handler
2249 {
2250 public:
2251  virtual bool handle_condition(THD *thd,
2252  uint sql_errno,
2253  const char* sqlstate,
2254  Sql_condition::enum_warning_level level,
2255  const char* msg,
2256  Sql_condition ** cond_hdl);
2257  char buff[MYSQL_ERRMSG_SIZE];
2258 };
2259 
2260 
2261 bool
2262 Ha_delete_table_error_handler::
2263 handle_condition(THD *,
2264  uint,
2265  const char*,
2266  Sql_condition::enum_warning_level,
2267  const char* msg,
2268  Sql_condition ** cond_hdl)
2269 {
2270  *cond_hdl= NULL;
2271  /* Grab the error message */
2272  strmake(buff, msg, sizeof(buff)-1);
2273  return TRUE;
2274 }
2275 
2276 
2281 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2282  const char *db, const char *alias, bool generate_warning)
2283 {
2284  handler *file;
2285  char tmp_path[FN_REFLEN];
2286  int error;
2287  TABLE dummy_table;
2288  TABLE_SHARE dummy_share;
2289  DBUG_ENTER("ha_delete_table");
2290 
2291  memset(&dummy_table, 0, sizeof(dummy_table));
2292  memset(&dummy_share, 0, sizeof(dummy_share));
2293  dummy_table.s= &dummy_share;
2294 
2295  /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2296  if (table_type == NULL ||
2297  ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2298  DBUG_RETURN(ENOENT);
2299 
2300  path= get_canonical_filename(file, path, tmp_path);
2301  if ((error= file->ha_delete_table(path)) && generate_warning)
2302  {
2303  /*
2304  Because file->print_error() use my_error() to generate the error message
2305  we use an internal error handler to intercept it and store the text
2306  in a temporary buffer. Later the message will be presented to user
2307  as a warning.
2308  */
2309  Ha_delete_table_error_handler ha_delete_table_error_handler;
2310 
2311  /* Fill up strucutures that print_error may need */
2312  dummy_share.path.str= (char*) path;
2313  dummy_share.path.length= strlen(path);
2314  dummy_share.db.str= (char*) db;
2315  dummy_share.db.length= strlen(db);
2316  dummy_share.table_name.str= (char*) alias;
2317  dummy_share.table_name.length= strlen(alias);
2318  dummy_table.alias= alias;
2319 
2320  file->change_table_ptr(&dummy_table, &dummy_share);
2321 
2322  thd->push_internal_handler(&ha_delete_table_error_handler);
2323  file->print_error(error, 0);
2324 
2325  thd->pop_internal_handler();
2326 
2327  /*
2328  XXX: should we convert *all* errors to warnings here?
2329  What if the error is fatal?
2330  */
2331  push_warning(thd, Sql_condition::WARN_LEVEL_WARN, error,
2332  ha_delete_table_error_handler.buff);
2333  }
2334  delete file;
2335 
2336 #ifdef HAVE_PSI_TABLE_INTERFACE
2337  if (likely(error == 0))
2338  {
2339  my_bool temp_table= (my_bool)is_prefix(alias, tmp_file_prefix);
2340  PSI_TABLE_CALL(drop_table_share)
2341  (temp_table, db, strlen(db), alias, strlen(alias));
2342  }
2343 #endif
2344 
2345  DBUG_RETURN(error);
2346 }
2347 
2348 /****************************************************************************
2349 ** General handler functions
2350 ****************************************************************************/
2351 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2352 {
2353  handler *new_handler= get_new_handler(table->s, mem_root, ht);
2354 
2355  if (!new_handler)
2356  return NULL;
2357  if (new_handler->set_ha_share_ref(ha_share))
2358  goto err;
2359 
2360  /*
2361  Allocate handler->ref here because otherwise ha_open will allocate it
2362  on this->table->mem_root and we will not be able to reclaim that memory
2363  when the clone handler object is destroyed.
2364  */
2365  if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2366  ALIGN_SIZE(ref_length)*2)))
2367  goto err;
2368  /*
2369  TODO: Implement a more efficient way to have more than one index open for
2370  the same table instance. The ha_open call is not cachable for clone.
2371  */
2372  if (new_handler->ha_open(table, name, table->db_stat,
2373  HA_OPEN_IGNORE_IF_LOCKED))
2374  goto err;
2375 
2376  return new_handler;
2377 
2378 err:
2379  delete new_handler;
2380  return NULL;
2381 }
2382 
2383 
2384 
2385 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2386 {
2387  status_var_increment(table->in_use->status_var.*offset);
2388 }
2389 
2390 void **handler::ha_data(THD *thd) const
2391 {
2392  return thd_ha_data(thd, ht);
2393 }
2394 
2395 THD *handler::ha_thd(void) const
2396 {
2397  DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2398  return (table && table->in_use) ? table->in_use : current_thd;
2399 }
2400 
2401 void handler::unbind_psi()
2402 {
2403 #ifdef HAVE_PSI_TABLE_INTERFACE
2404  DBUG_ASSERT(m_lock_type == F_UNLCK);
2405  DBUG_ASSERT(inited == NONE);
2406  /*
2407  Notify the instrumentation that this table is not owned
2408  by this thread any more.
2409  */
2410  PSI_TABLE_CALL(unbind_table)(m_psi);
2411 #endif
2412 }
2413 
2414 void handler::rebind_psi()
2415 {
2416 #ifdef HAVE_PSI_TABLE_INTERFACE
2417  DBUG_ASSERT(m_lock_type == F_UNLCK);
2418  DBUG_ASSERT(inited == NONE);
2419  /*
2420  Notify the instrumentation that this table is now owned
2421  by this thread.
2422  */
2423  PSI_table_share *share_psi= ha_table_share_psi(table_share);
2424  m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2425 #endif
2426 }
2427 
2428 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2429 {
2430  return share->m_psi;
2431 }
2432 
2440 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2441  int test_if_locked)
2442 {
2443  int error;
2444  DBUG_ENTER("handler::ha_open");
2445  DBUG_PRINT("enter",
2446  ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2447  name, ht->db_type, table_arg->db_stat, mode,
2448  test_if_locked));
2449 
2450  table= table_arg;
2451  DBUG_ASSERT(table->s == table_share);
2452  DBUG_ASSERT(m_lock_type == F_UNLCK);
2453  DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2454  DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2455 
2456  if ((error=open(name,mode,test_if_locked)))
2457  {
2458  if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2459  (table->db_stat & HA_TRY_READ_ONLY))
2460  {
2461  table->db_stat|=HA_READ_ONLY;
2462  error=open(name,O_RDONLY,test_if_locked);
2463  }
2464  }
2465  if (error)
2466  {
2467  my_errno= error; /* Safeguard */
2468  DBUG_PRINT("error",("error: %d errno: %d",error,errno));
2469  }
2470  else
2471  {
2472  DBUG_ASSERT(m_psi == NULL);
2473  DBUG_ASSERT(table_share != NULL);
2474 #ifdef HAVE_PSI_TABLE_INTERFACE
2475  /*
2476  Do not call this for partitions handlers, since it may take too much
2477  resources.
2478  So only use the m_psi on table level, not for individual partitions.
2479  */
2480  if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2481  {
2482  PSI_table_share *share_psi= ha_table_share_psi(table_share);
2483  m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2484  }
2485 #endif
2486 
2487  if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2488  table->db_stat|=HA_READ_ONLY;
2489  (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2490 
2491  /* ref is already allocated for us if we're called from handler::clone() */
2492  if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2493  ALIGN_SIZE(ref_length)*2)))
2494  {
2495  ha_close();
2496  error=HA_ERR_OUT_OF_MEM;
2497  }
2498  else
2499  dup_ref=ref+ALIGN_SIZE(ref_length);
2500  cached_table_flags= table_flags();
2501  }
2502  DBUG_RETURN(error);
2503 }
2504 
2505 
2511 {
2512  DBUG_ENTER("handler::ha_close");
2513 #ifdef HAVE_PSI_TABLE_INTERFACE
2514  PSI_TABLE_CALL(close_table)(m_psi);
2515  m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2516 #endif
2517  // TODO: set table= NULL to mark the handler as closed?
2518  DBUG_ASSERT(m_psi == NULL);
2519  DBUG_ASSERT(m_lock_type == F_UNLCK);
2520  DBUG_ASSERT(inited == NONE);
2521  DBUG_RETURN(close());
2522 }
2523 
2524 
2536 int handler::ha_index_init(uint idx, bool sorted)
2537 {
2538  DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2539  int result;
2540  DBUG_ENTER("ha_index_init");
2541  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2542  m_lock_type != F_UNLCK);
2543  DBUG_ASSERT(inited == NONE);
2544  if (!(result= index_init(idx, sorted)))
2545  inited= INDEX;
2546  end_range= NULL;
2547  DBUG_RETURN(result);
2548 }
2549 
2550 
2560 {
2561  DBUG_ENTER("ha_index_end");
2562  /* SQL HANDLER function can call this without having it locked. */
2563  DBUG_ASSERT(table->open_by_handler ||
2564  table_share->tmp_table != NO_TMP_TABLE ||
2565  m_lock_type != F_UNLCK);
2566  DBUG_ASSERT(inited == INDEX);
2567  inited= NONE;
2568  end_range= NULL;
2569  DBUG_RETURN(index_end());
2570 }
2571 
2572 
2584 int handler::ha_rnd_init(bool scan)
2585 {
2586  DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2587  int result;
2588  DBUG_ENTER("ha_rnd_init");
2589  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2590  m_lock_type != F_UNLCK);
2591  DBUG_ASSERT(inited == NONE || (inited == RND && scan));
2592  inited= (result= rnd_init(scan)) ? NONE : RND;
2593  end_range= NULL;
2594  DBUG_RETURN(result);
2595 }
2596 
2597 
2607 {
2608  DBUG_ENTER("ha_rnd_end");
2609  /* SQL HANDLER function can call this without having it locked. */
2610  DBUG_ASSERT(table->open_by_handler ||
2611  table_share->tmp_table != NO_TMP_TABLE ||
2612  m_lock_type != F_UNLCK);
2613  DBUG_ASSERT(inited == RND);
2614  inited= NONE;
2615  end_range= NULL;
2616  DBUG_RETURN(rnd_end());
2617 }
2618 
2619 
2630 int handler::ha_rnd_next(uchar *buf)
2631 {
2632  int result;
2633  DBUG_ENTER("handler::ha_rnd_next");
2634  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2635  m_lock_type != F_UNLCK);
2636  DBUG_ASSERT(inited == RND);
2637 
2638  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2639  { result= rnd_next(buf); })
2640  DBUG_RETURN(result);
2641 }
2642 
2643 
2655 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2656 {
2657  int result;
2658  DBUG_ENTER("handler::ha_rnd_pos");
2659  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2660  m_lock_type != F_UNLCK);
2661  /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
2662  /* DBUG_ASSERT(inited == RND); */
2663 
2664  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2665  { result= rnd_pos(buf, pos); })
2666  DBUG_RETURN(result);
2667 }
2668 
2669 
2693 int handler::ha_index_read_map(uchar *buf, const uchar *key,
2694  key_part_map keypart_map,
2695  enum ha_rkey_function find_flag)
2696 {
2697  int result;
2698  DBUG_ENTER("handler::ha_index_read_map");
2699  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2700  m_lock_type != F_UNLCK);
2701  DBUG_ASSERT(inited == INDEX);
2702 
2703  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2704  { result= index_read_map(buf, key, keypart_map, find_flag); })
2705  DBUG_RETURN(result);
2706 }
2707 
2708 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
2709  key_part_map keypart_map)
2710 {
2711  int result;
2712  DBUG_ENTER("handler::ha_index_read_last_map");
2713  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2714  m_lock_type != F_UNLCK);
2715  DBUG_ASSERT(inited == INDEX);
2716 
2717  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2718  { result= index_read_last_map(buf, key, keypart_map); })
2719  DBUG_RETURN(result);
2720 }
2721 
2722 
2729 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
2730  key_part_map keypart_map,
2731  enum ha_rkey_function find_flag)
2732 {
2733  int result;
2734  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2735  m_lock_type != F_UNLCK);
2736  DBUG_ASSERT(end_range == NULL);
2737 
2738  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, index, 0,
2739  { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
2740  return result;
2741 }
2742 
2743 
2755 int handler::ha_index_next(uchar * buf)
2756 {
2757  int result;
2758  DBUG_ENTER("handler::ha_index_next");
2759  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2760  m_lock_type != F_UNLCK);
2761  DBUG_ASSERT(inited == INDEX);
2762 
2763  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2764  { result= index_next(buf); })
2765  DBUG_RETURN(result);
2766 }
2767 
2768 
2780 int handler::ha_index_prev(uchar * buf)
2781 {
2782  int result;
2783  DBUG_ENTER("handler::ha_index_prev");
2784  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2785  m_lock_type != F_UNLCK);
2786  DBUG_ASSERT(inited == INDEX);
2787 
2788  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2789  { result= index_prev(buf); })
2790  DBUG_RETURN(result);
2791 }
2792 
2793 
2805 int handler::ha_index_first(uchar * buf)
2806 {
2807  int result;
2808  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2809  m_lock_type != F_UNLCK);
2810  DBUG_ASSERT(inited == INDEX);
2811 
2812  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2813  { result= index_first(buf); })
2814  return result;
2815 }
2816 
2817 
2829 int handler::ha_index_last(uchar * buf)
2830 {
2831  int result;
2832  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2833  m_lock_type != F_UNLCK);
2834  DBUG_ASSERT(inited == INDEX);
2835 
2836  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2837  { result= index_last(buf); })
2838  return result;
2839 }
2840 
2841 
2855 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
2856 {
2857  int result;
2858  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2859  m_lock_type != F_UNLCK);
2860  DBUG_ASSERT(inited == INDEX);
2861 
2862  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2863  { result= index_next_same(buf, key, keylen); })
2864  return result;
2865 }
2866 
2867 
2882 int handler::ha_index_read(uchar *buf, const uchar *key, uint key_len,
2883  enum ha_rkey_function find_flag)
2884 {
2885  int result;
2886  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2887  m_lock_type != F_UNLCK);
2888  DBUG_ASSERT(inited == INDEX);
2889 
2890  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2891  { result= index_read(buf, key, key_len, find_flag); })
2892  return result;
2893 }
2894 
2895 
2909 int handler::ha_index_read_last(uchar *buf, const uchar *key, uint key_len)
2910 {
2911  int result;
2912  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2913  m_lock_type != F_UNLCK);
2914  DBUG_ASSERT(inited == INDEX);
2915 
2916  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2917  { result= index_read_last(buf, key, key_len); })
2918  return result;
2919 }
2920 
2921 
2928 int handler::read_first_row(uchar * buf, uint primary_key)
2929 {
2930  register int error;
2931  DBUG_ENTER("handler::read_first_row");
2932 
2933  ha_statistic_increment(&SSV::ha_read_first_count);
2934 
2935  /*
2936  If there is very few deleted rows in the table, find the first row by
2937  scanning the table.
2938  TODO remove the test for HA_READ_ORDER
2939  */
2940  if (stats.deleted < 10 || primary_key >= MAX_KEY ||
2941  !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
2942  {
2943  if (!(error= ha_rnd_init(1)))
2944  {
2945  while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED)
2946  /* skip deleted row */;
2947  const int end_error= ha_rnd_end();
2948  if (!error)
2949  error= end_error;
2950  }
2951  }
2952  else
2953  {
2954  /* Find the first row through the primary key */
2955  if (!(error= ha_index_init(primary_key, 0)))
2956  {
2957  error= ha_index_first(buf);
2958  const int end_error= ha_index_end();
2959  if (!error)
2960  error= end_error;
2961  }
2962  }
2963  DBUG_RETURN(error);
2964 }
2965 
2978 inline ulonglong
2979 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
2980 {
2981  const ulonglong save_nr= nr;
2982 
2983  if (variables->auto_increment_increment == 1)
2984  nr= nr + 1; // optimization of the formula below
2985  else
2986  {
2987  nr= (((nr+ variables->auto_increment_increment -
2988  variables->auto_increment_offset)) /
2989  (ulonglong) variables->auto_increment_increment);
2990  nr= (nr* (ulonglong) variables->auto_increment_increment +
2991  variables->auto_increment_offset);
2992  }
2993 
2994  if (unlikely(nr <= save_nr))
2995  return ULONGLONG_MAX;
2996 
2997  return nr;
2998 }
2999 
3000 
3001 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3002 {
3003  /*
3004  If we have set THD::next_insert_id previously and plan to insert an
3005  explicitely-specified value larger than this, we need to increase
3006  THD::next_insert_id to be greater than the explicit value.
3007  */
3008  if ((next_insert_id > 0) && (nr >= next_insert_id))
3009  set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3010 }
3011 
3012 
3028 inline ulonglong
3029 prev_insert_id(ulonglong nr, struct system_variables *variables)
3030 {
3031  if (unlikely(nr < variables->auto_increment_offset))
3032  {
3033  /*
3034  There's nothing good we can do here. That is a pathological case, where
3035  the offset is larger than the column's max possible value, i.e. not even
3036  the first sequence value may be inserted. User will receive warning.
3037  */
3038  DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3039  "auto_increment_offset: %lu",
3040  (ulong) nr, variables->auto_increment_offset));
3041  return nr;
3042  }
3043  if (variables->auto_increment_increment == 1)
3044  return nr; // optimization of the formula below
3045  nr= (((nr - variables->auto_increment_offset)) /
3046  (ulonglong) variables->auto_increment_increment);
3047  return (nr * (ulonglong) variables->auto_increment_increment +
3048  variables->auto_increment_offset);
3049 }
3050 
3051 
3127 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3128 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3129 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3130 
3131 int handler::update_auto_increment()
3132 {
3133  ulonglong nr, nb_reserved_values;
3134  bool append= FALSE;
3135  THD *thd= table->in_use;
3136  struct system_variables *variables= &thd->variables;
3137  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3138  m_lock_type != F_UNLCK);
3139  DBUG_ENTER("handler::update_auto_increment");
3140 
3141  /*
3142  next_insert_id is a "cursor" into the reserved interval, it may go greater
3143  than the interval, but not smaller.
3144  */
3145  DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3146 
3147  if ((nr= table->next_number_field->val_int()) != 0 ||
3148  (table->auto_increment_field_not_null &&
3149  thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3150  {
3151  /*
3152  Update next_insert_id if we had already generated a value in this
3153  statement (case of INSERT VALUES(null),(3763),(null):
3154  the last NULL needs to insert 3764, not the value of the first NULL plus
3155  1).
3156  */
3157  adjust_next_insert_id_after_explicit_value(nr);
3158  insert_id_for_cur_row= 0; // didn't generate anything
3159  DBUG_RETURN(0);
3160  }
3161 
3162  if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3163  {
3164  /* next_insert_id is beyond what is reserved, so we reserve more. */
3165  const Discrete_interval *forced=
3166  thd->auto_inc_intervals_forced.get_next();
3167  if (forced != NULL)
3168  {
3169  nr= forced->minimum();
3170  nb_reserved_values= forced->values();
3171  }
3172  else
3173  {
3174  /*
3175  handler::estimation_rows_to_insert was set by
3176  handler::ha_start_bulk_insert(); if 0 it means "unknown".
3177  */
3178  ulonglong nb_desired_values;
3179  /*
3180  If an estimation was given to the engine:
3181  - use it.
3182  - if we already reserved numbers, it means the estimation was
3183  not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3184  time, twice that the 3rd time etc.
3185  If no estimation was given, use those increasing defaults from the
3186  start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3187  Don't go beyond a max to not reserve "way too much" (because
3188  reservation means potentially losing unused values).
3189  Note that in prelocked mode no estimation is given.
3190  */
3191 
3192  if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3193  nb_desired_values= estimation_rows_to_insert;
3194  else if ((auto_inc_intervals_count == 0) &&
3195  (thd->lex->many_values.elements > 0))
3196  {
3197  /*
3198  For multi-row inserts, if the bulk inserts cannot be started, the
3199  handler::estimation_rows_to_insert will not be set. But we still
3200  want to reserve the autoinc values.
3201  */
3202  nb_desired_values= thd->lex->many_values.elements;
3203  }
3204  else /* go with the increasing defaults */
3205  {
3206  /* avoid overflow in formula, with this if() */
3207  if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3208  {
3209  nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3210  (1 << auto_inc_intervals_count);
3211  set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3212  }
3213  else
3214  nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3215  }
3216  /* This call ignores all its parameters but nr, currently */
3217  get_auto_increment(variables->auto_increment_offset,
3218  variables->auto_increment_increment,
3219  nb_desired_values, &nr,
3220  &nb_reserved_values);
3221  if (nr == ULONGLONG_MAX)
3222  DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3223 
3224  /*
3225  That rounding below should not be needed when all engines actually
3226  respect offset and increment in get_auto_increment(). But they don't
3227  so we still do it. Wonder if for the not-first-in-index we should do
3228  it. Hope that this rounding didn't push us out of the interval; even
3229  if it did we cannot do anything about it (calling the engine again
3230  will not help as we inserted no row).
3231  */
3232  nr= compute_next_insert_id(nr-1, variables);
3233  }
3234 
3235  if (table->s->next_number_keypart == 0)
3236  {
3237  /* We must defer the appending until "nr" has been possibly truncated */
3238  append= TRUE;
3239  }
3240  else
3241  {
3242  /*
3243  For such auto_increment there is no notion of interval, just a
3244  singleton. The interval is not even stored in
3245  thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3246  for next row.
3247  */
3248  DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3249  }
3250  }
3251 
3252  if (unlikely(nr == ULONGLONG_MAX))
3253  DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3254 
3255  DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3256 
3257  if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3258  {
3259  /*
3260  first test if the query was aborted due to strict mode constraints
3261  */
3262  if (thd->killed == THD::KILL_BAD_DATA)
3263  DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3264 
3265  /*
3266  field refused this value (overflow) and truncated it, use the result of
3267  the truncation (which is going to be inserted); however we try to
3268  decrease it to honour auto_increment_* variables.
3269  That will shift the left bound of the reserved interval, we don't
3270  bother shifting the right bound (anyway any other value from this
3271  interval will cause a duplicate key).
3272  */
3273  nr= prev_insert_id(table->next_number_field->val_int(), variables);
3274  if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3275  nr= table->next_number_field->val_int();
3276  }
3277  if (append)
3278  {
3279  auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3280  variables->auto_increment_increment);
3282  /* Row-based replication does not need to store intervals in binlog */
3283  if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3284  thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3286  variables->auto_increment_increment);
3287  }
3288 
3289  /*
3290  Record this autogenerated value. If the caller then
3291  succeeds to insert this value, it will call
3292  record_first_successful_insert_id_in_cur_stmt()
3293  which will set first_successful_insert_id_in_cur_stmt if it's not
3294  already set.
3295  */
3297  /*
3298  Set next insert id to point to next auto-increment value to be able to
3299  handle multi-row statements.
3300  */
3301  set_next_insert_id(compute_next_insert_id(nr, variables));
3302 
3303  DBUG_RETURN(0);
3304 }
3305 
3306 
3321 {
3322  DBUG_ENTER("column_bitmaps_signal");
3323  DBUG_PRINT("info", ("read_set: 0x%lx write_set: 0x%lx", (long) table->read_set,
3324  (long)table->write_set));
3325  DBUG_VOID_RETURN;
3326 }
3327 
3328 
3345 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3346  ulonglong nb_desired_values,
3347  ulonglong *first_value,
3348  ulonglong *nb_reserved_values)
3349 {
3350  ulonglong nr;
3351  int error;
3352  DBUG_ENTER("handler::get_auto_increment");
3353 
3354  (void) extra(HA_EXTRA_KEYREAD);
3355  table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3356  table->read_set);
3358 
3359  if (ha_index_init(table->s->next_number_index, 1))
3360  {
3361  /* This should never happen, assert in debug, and fail in release build */
3362  DBUG_ASSERT(0);
3363  *first_value= ULONGLONG_MAX;
3364  DBUG_VOID_RETURN;
3365  }
3366 
3367  if (table->s->next_number_keypart == 0)
3368  { // Autoincrement at key-start
3369  error= ha_index_last(table->record[1]);
3370  /*
3371  MySQL implicitely assumes such method does locking (as MySQL decides to
3372  use nr+increment without checking again with the handler, in
3373  handler::update_auto_increment()), so reserves to infinite.
3374  */
3375  *nb_reserved_values= ULONGLONG_MAX;
3376  }
3377  else
3378  {
3379  uchar key[MAX_KEY_LENGTH];
3380  key_copy(key, table->record[0],
3381  table->key_info + table->s->next_number_index,
3382  table->s->next_number_key_offset);
3383  error= ha_index_read_map(table->record[1], key,
3384  make_prev_keypart_map(table->s->next_number_keypart),
3385  HA_READ_PREFIX_LAST);
3386  /*
3387  MySQL needs to call us for next row: assume we are inserting ("a",null)
3388  here, we return 3, and next this statement will want to insert
3389  ("b",null): there is no reason why ("b",3+1) would be the good row to
3390  insert: maybe it already exists, maybe 3+1 is too large...
3391  */
3392  *nb_reserved_values= 1;
3393  }
3394 
3395  if (error)
3396  {
3397  if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3398  {
3399  /* No entry found, start with 1. */
3400  nr= 1;
3401  }
3402  else
3403  {
3404  DBUG_ASSERT(0);
3405  nr= ULONGLONG_MAX;
3406  }
3407  }
3408  else
3409  nr= ((ulonglong) table->next_number_field->
3410  val_int_offset(table->s->rec_buff_length)+1);
3411  ha_index_end();
3412  (void) extra(HA_EXTRA_NO_KEYREAD);
3413  *first_value= nr;
3414  DBUG_VOID_RETURN;
3415 }
3416 
3417 
3418 void handler::ha_release_auto_increment()
3419 {
3420  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3421  m_lock_type != F_UNLCK ||
3423  release_auto_increment();
3425  auto_inc_interval_for_cur_row.replace(0, 0, 0);
3427  if (next_insert_id > 0)
3428  {
3429  next_insert_id= 0;
3430  /*
3431  this statement used forced auto_increment values if there were some,
3432  wipe them away for other statements.
3433  */
3434  table->in_use->auto_inc_intervals_forced.empty();
3435  }
3436 }
3437 
3438 
3451 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3452 {
3453  /* Write the duplicated key in the error message */
3454  char key_buff[MAX_KEY_LENGTH];
3455  String str(key_buff,sizeof(key_buff),system_charset_info);
3456 
3457  if (key == NULL)
3458  {
3459  /* Key is unknown */
3460  str.copy("", 0, system_charset_info);
3461  my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3462  }
3463  else
3464  {
3465  /* Table is opened and defined at this point */
3466  key_unpack(&str,table, key);
3467  uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3468  if (str.length() >= max_length)
3469  {
3470  str.length(max_length-4);
3471  str.append(STRING_WITH_LEN("..."));
3472  }
3473  my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3474  }
3475 }
3476 
3477 
3485 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3486 {
3487  print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3488 }
3489 
3490 
3500 void handler::print_error(int error, myf errflag)
3501 {
3502  DBUG_ENTER("handler::print_error");
3503  DBUG_PRINT("enter",("error: %d",error));
3504 
3505  int textno=ER_GET_ERRNO;
3506  switch (error) {
3507  case EACCES:
3508  textno=ER_OPEN_AS_READONLY;
3509  break;
3510  case EAGAIN:
3511  textno=ER_FILE_USED;
3512  break;
3513  case ENOENT:
3514  {
3515  char errbuf[MYSYS_STRERROR_SIZE];
3516  textno=ER_FILE_NOT_FOUND;
3517  my_error(textno, errflag, table_share->table_name.str,
3518  error, my_strerror(errbuf, sizeof(errbuf), error));
3519  }
3520  break;
3521  case HA_ERR_KEY_NOT_FOUND:
3522  case HA_ERR_NO_ACTIVE_RECORD:
3523  case HA_ERR_RECORD_DELETED:
3524  case HA_ERR_END_OF_FILE:
3525  textno=ER_KEY_NOT_FOUND;
3526  break;
3527  case HA_ERR_WRONG_MRG_TABLE_DEF:
3528  textno=ER_WRONG_MRG_TABLE;
3529  break;
3530  case HA_ERR_FOUND_DUPP_KEY:
3531  {
3532  uint key_nr= table ? get_dup_key(error) : -1;
3533  if ((int) key_nr >= 0)
3534  {
3536  key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
3537  errflag);
3538  DBUG_VOID_RETURN;
3539  }
3540  textno=ER_DUP_KEY;
3541  break;
3542  }
3543  case HA_ERR_FOREIGN_DUPLICATE_KEY:
3544  {
3545  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3546  m_lock_type != F_UNLCK);
3547 
3548  char rec_buf[MAX_KEY_LENGTH];
3549  String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3550  /* Table is opened and defined at this point */
3551 
3552  /*
3553  Just print the subset of fields that are part of the first index,
3554  printing the whole row from there is not easy.
3555  */
3556  key_unpack(&rec, table, &table->key_info[0]);
3557 
3558  char child_table_name[NAME_LEN + 1];
3559  char child_key_name[NAME_LEN + 1];
3560  if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
3561  child_key_name, sizeof(child_key_name)))
3562  {
3563  my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
3564  table_share->table_name.str, rec.c_ptr_safe(),
3565  child_table_name, child_key_name);
3566  }
3567  else
3568  {
3569  my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
3570  table_share->table_name.str, rec.c_ptr_safe());
3571  }
3572  DBUG_VOID_RETURN;
3573  }
3574  case HA_ERR_NULL_IN_SPATIAL:
3575  my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
3576  DBUG_VOID_RETURN;
3577  case HA_ERR_FOUND_DUPP_UNIQUE:
3578  textno=ER_DUP_UNIQUE;
3579  break;
3580  case HA_ERR_RECORD_CHANGED:
3581  textno=ER_CHECKREAD;
3582  break;
3583  case HA_ERR_CRASHED:
3584  textno=ER_NOT_KEYFILE;
3585  break;
3586  case HA_ERR_WRONG_IN_RECORD:
3587  textno= ER_CRASHED_ON_USAGE;
3588  break;
3589  case HA_ERR_CRASHED_ON_USAGE:
3590  textno=ER_CRASHED_ON_USAGE;
3591  break;
3592  case HA_ERR_NOT_A_TABLE:
3593  textno= error;
3594  break;
3595  case HA_ERR_CRASHED_ON_REPAIR:
3596  textno=ER_CRASHED_ON_REPAIR;
3597  break;
3598  case HA_ERR_OUT_OF_MEM:
3599  textno=ER_OUT_OF_RESOURCES;
3600  break;
3601  case HA_ERR_WRONG_COMMAND:
3602  textno=ER_ILLEGAL_HA;
3603  break;
3604  case HA_ERR_OLD_FILE:
3605  textno=ER_OLD_KEYFILE;
3606  break;
3607  case HA_ERR_UNSUPPORTED:
3608  textno=ER_UNSUPPORTED_EXTENSION;
3609  break;
3610  case HA_ERR_RECORD_FILE_FULL:
3611  case HA_ERR_INDEX_FILE_FULL:
3612  {
3613  textno=ER_RECORD_FILE_FULL;
3614  /* Write the error message to error log */
3615  errflag|= ME_NOREFRESH;
3616  break;
3617  }
3618  case HA_ERR_LOCK_WAIT_TIMEOUT:
3619  textno=ER_LOCK_WAIT_TIMEOUT;
3620  break;
3621  case HA_ERR_LOCK_TABLE_FULL:
3622  textno=ER_LOCK_TABLE_FULL;
3623  break;
3624  case HA_ERR_LOCK_DEADLOCK:
3625  textno=ER_LOCK_DEADLOCK;
3626  break;
3627  case HA_ERR_READ_ONLY_TRANSACTION:
3628  textno=ER_READ_ONLY_TRANSACTION;
3629  break;
3630  case HA_ERR_CANNOT_ADD_FOREIGN:
3631  textno=ER_CANNOT_ADD_FOREIGN;
3632  break;
3633  case HA_ERR_ROW_IS_REFERENCED:
3634  {
3635  String str;
3636  get_error_message(error, &str);
3637  my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
3638  DBUG_VOID_RETURN;
3639  }
3640  case HA_ERR_NO_REFERENCED_ROW:
3641  {
3642  String str;
3643  get_error_message(error, &str);
3644  my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
3645  DBUG_VOID_RETURN;
3646  }
3647  case HA_ERR_TABLE_DEF_CHANGED:
3648  textno=ER_TABLE_DEF_CHANGED;
3649  break;
3650  case HA_ERR_NO_SUCH_TABLE:
3651  my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
3652  table_share->table_name.str);
3653  DBUG_VOID_RETURN;
3654  case HA_ERR_RBR_LOGGING_FAILED:
3655  textno= ER_BINLOG_ROW_LOGGING_FAILED;
3656  break;
3657  case HA_ERR_DROP_INDEX_FK:
3658  {
3659  const char *ptr= "???";
3660  uint key_nr= table ? get_dup_key(error) : -1;
3661  if ((int) key_nr >= 0)
3662  ptr= table->key_info[key_nr].name;
3663  my_error(ER_DROP_INDEX_FK, errflag, ptr);
3664  DBUG_VOID_RETURN;
3665  }
3666  case HA_ERR_TABLE_NEEDS_UPGRADE:
3667  textno=ER_TABLE_NEEDS_UPGRADE;
3668  break;
3669  case HA_ERR_NO_PARTITION_FOUND:
3670  textno=ER_WRONG_PARTITION_NAME;
3671  break;
3672  case HA_ERR_TABLE_READONLY:
3673  textno= ER_OPEN_AS_READONLY;
3674  break;
3675  case HA_ERR_AUTOINC_READ_FAILED:
3676  textno= ER_AUTOINC_READ_FAILED;
3677  break;
3678  case HA_ERR_AUTOINC_ERANGE:
3679  textno= ER_WARN_DATA_OUT_OF_RANGE;
3680  break;
3681  case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
3682  textno= ER_TOO_MANY_CONCURRENT_TRXS;
3683  break;
3684  case HA_ERR_INDEX_COL_TOO_LONG:
3685  textno= ER_INDEX_COLUMN_TOO_LONG;
3686  break;
3687  case HA_ERR_NOT_IN_LOCK_PARTITIONS:
3688  textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
3689  break;
3690  case HA_ERR_INDEX_CORRUPT:
3691  textno= ER_INDEX_CORRUPT;
3692  break;
3693  case HA_ERR_UNDO_REC_TOO_BIG:
3694  textno= ER_UNDO_RECORD_TOO_BIG;
3695  break;
3696  case HA_ERR_TABLE_IN_FK_CHECK:
3697  textno= ER_TABLE_IN_FK_CHECK;
3698  break;
3699  case HA_WRONG_CREATE_OPTION:
3700  textno= ER_ILLEGAL_HA;
3701  break;
3702  case HA_ERR_TOO_MANY_FIELDS:
3703  textno= ER_TOO_MANY_FIELDS;
3704  break;
3705  case HA_ERR_INNODB_READ_ONLY:
3706  textno= ER_INNODB_READ_ONLY;
3707  break;
3708  default:
3709  {
3710  /* The error was "unknown" to this function.
3711  Ask handler if it has got a message for this error */
3712  bool temporary= FALSE;
3713  String str;
3714  temporary= get_error_message(error, &str);
3715  if (!str.is_empty())
3716  {
3717  const char* engine= table_type();
3718  if (temporary)
3719  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
3720  else
3721  my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
3722  }
3723  else
3724  my_error(ER_GET_ERRNO,errflag,error);
3725  DBUG_VOID_RETURN;
3726  }
3727  }
3728  if (textno != ER_FILE_NOT_FOUND)
3729  my_error(textno, errflag, table_share->table_name.str, error);
3730  DBUG_VOID_RETURN;
3731 }
3732 
3733 
3743 bool handler::get_error_message(int error, String* buf)
3744 {
3745  return FALSE;
3746 }
3747 
3748 
3759 {
3760  ulong mysql_version= table->s->mysql_version;
3761 
3762  if (mysql_version < 50124)
3763  {
3764  KEY *key= table->key_info;
3765  KEY *key_end= key + table->s->keys;
3766  for (; key < key_end; key++)
3767  {
3768  KEY_PART_INFO *key_part= key->key_part;
3769  KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
3770  for (; key_part < key_part_end; key_part++)
3771  {
3772  if (!key_part->fieldnr)
3773  continue;
3774  Field *field= table->field[key_part->fieldnr - 1];
3775  uint cs_number= field->charset()->number;
3776  if ((mysql_version < 50048 &&
3777  (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
3778  cs_number == 41 || /* latin7_general_ci - bug #29461 */
3779  cs_number == 42 || /* latin7_general_cs - bug #29461 */
3780  cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
3781  cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
3782  cs_number == 22 || /* koi8u_general_ci - bug #29461 */
3783  cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
3784  cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
3785  (mysql_version < 50124 &&
3786  (cs_number == 33 || /* utf8_general_ci - bug #27877 */
3787  cs_number == 35))) /* ucs2_general_ci - bug #27877 */
3788  return HA_ADMIN_NEEDS_UPGRADE;
3789  }
3790  }
3791  }
3792  return 0;
3793 }
3794 
3795 
3796 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
3797 {
3798  int error;
3799  KEY *keyinfo, *keyend;
3800  KEY_PART_INFO *keypart, *keypartend;
3801 
3802  if (!table->s->mysql_version)
3803  {
3804  /* check for blob-in-key error */
3805  keyinfo= table->key_info;
3806  keyend= table->key_info + table->s->keys;
3807  for (; keyinfo < keyend; keyinfo++)
3808  {
3809  keypart= keyinfo->key_part;
3810  keypartend= keypart + keyinfo->user_defined_key_parts;
3811  for (; keypart < keypartend; keypart++)
3812  {
3813  if (!keypart->fieldnr)
3814  continue;
3815  Field *field= table->field[keypart->fieldnr-1];
3816  if (field->type() == MYSQL_TYPE_BLOB)
3817  {
3818  if (check_opt->sql_flags & TT_FOR_UPGRADE)
3819  check_opt->flags= T_MEDIUM;
3820  return HA_ADMIN_NEEDS_CHECK;
3821  }
3822  }
3823  }
3824  }
3825  if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
3826  return HA_ADMIN_NEEDS_ALTER;
3827 
3828  if ((error= check_collation_compatibility()))
3829  return error;
3830 
3831  return check_for_upgrade(check_opt);
3832 }
3833 
3834 
3835 int handler::check_old_types()
3836 {
3837  Field** field;
3838 
3839  for (field= table->field; (*field); field++)
3840  {
3841  if (table->s->mysql_version == 0) // prior to MySQL 5.0
3842  {
3843  /* check for bad DECIMAL field */
3844  if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL) // TODO: error? MYSQL_TYPE_DECIMAL?
3845  {
3846  return HA_ADMIN_NEEDS_ALTER;
3847  }
3848  if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
3849  {
3850  return HA_ADMIN_NEEDS_ALTER;
3851  }
3852  }
3853  if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
3854  return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
3855  }
3856  return 0;
3857 }
3858 
3859 
3860 static bool update_frm_version(TABLE *table)
3861 {
3862  char path[FN_REFLEN];
3863  File file;
3864  int result= 1;
3865  DBUG_ENTER("update_frm_version");
3866 
3867  /*
3868  No need to update frm version in case table was created or checked
3869  by server with the same version. This also ensures that we do not
3870  update frm version for temporary tables as this code doesn't support
3871  temporary tables.
3872  */
3873  if (table->s->mysql_version == MYSQL_VERSION_ID)
3874  DBUG_RETURN(0);
3875 
3876  strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
3877 
3878  if ((file= mysql_file_open(key_file_frm,
3879  path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
3880  {
3881  uchar version[4];
3882 
3883  int4store(version, MYSQL_VERSION_ID);
3884 
3885  if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
3886  goto err;
3887 
3888  table->s->mysql_version= MYSQL_VERSION_ID;
3889  }
3890 err:
3891  if (file >= 0)
3892  (void) mysql_file_close(file, MYF(MY_WME));
3893  DBUG_RETURN(result);
3894 }
3895 
3896 
3897 
3902 uint handler::get_dup_key(int error)
3903 {
3904  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3905  m_lock_type != F_UNLCK);
3906  DBUG_ENTER("handler::get_dup_key");
3907  table->file->errkey = (uint) -1;
3908  if (error == HA_ERR_FOUND_DUPP_KEY ||
3909  error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
3910  error == HA_ERR_DROP_INDEX_FK)
3911  table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
3912  DBUG_RETURN(table->file->errkey);
3913 }
3914 
3915 
3931 int handler::delete_table(const char *name)
3932 {
3933  int saved_error= 0;
3934  int error= 0;
3935  int enoent_or_zero= ENOENT; // Error if no file was deleted
3936  char buff[FN_REFLEN];
3937  DBUG_ASSERT(m_lock_type == F_UNLCK);
3938 
3939  for (const char **ext=bas_ext(); *ext ; ext++)
3940  {
3941  fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
3942  if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
3943  {
3944  if (my_errno != ENOENT)
3945  {
3946  /*
3947  If error on the first existing file, return the error.
3948  Otherwise delete as much as possible.
3949  */
3950  if (enoent_or_zero)
3951  return my_errno;
3952  saved_error= my_errno;
3953  }
3954  }
3955  else
3956  enoent_or_zero= 0; // No error for ENOENT
3957  error= enoent_or_zero;
3958  }
3959  return saved_error ? saved_error : error;
3960 }
3961 
3962 
3963 int handler::rename_table(const char * from, const char * to)
3964 {
3965  int error= 0;
3966  const char **ext, **start_ext;
3967  start_ext= bas_ext();
3968  for (ext= start_ext; *ext ; ext++)
3969  {
3970  if (rename_file_ext(from, to, *ext))
3971  {
3972  if ((error=my_errno) != ENOENT)
3973  break;
3974  error= 0;
3975  }
3976  }
3977  if (error)
3978  {
3979  /* Try to revert the rename. Ignore errors. */
3980  for (; ext >= start_ext; ext--)
3981  rename_file_ext(to, from, *ext);
3982  }
3983  return error;
3984 }
3985 
3986 
3987 void handler::drop_table(const char *name)
3988 {
3989  close();
3990  delete_table(name);
3991 }
3992 
3993 
4009 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4010 {
4011  int error;
4012  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4013  m_lock_type != F_UNLCK);
4014 
4015  if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4016  (check_opt->sql_flags & TT_FOR_UPGRADE))
4017  return 0;
4018 
4019  if (table->s->mysql_version < MYSQL_VERSION_ID)
4020  {
4021  if ((error= check_old_types()))
4022  return error;
4023  error= ha_check_for_upgrade(check_opt);
4024  if (error && (error != HA_ADMIN_NEEDS_CHECK))
4025  return error;
4026  if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
4027  return 0;
4028  }
4029  if ((error= check(thd, check_opt)))
4030  return error;
4031  /* Skip updating frm version if not main handler. */
4032  if (table->file != this)
4033  return error;
4034  return update_frm_version(table);
4035 }
4036 
4042 inline
4043 void
4044 handler::mark_trx_read_write()
4045 {
4046  Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4047  /*
4048  When a storage engine method is called, the transaction must
4049  have been started, unless it's a DDL call, for which the
4050  storage engine starts the transaction internally, and commits
4051  it internally, without registering in the ha_list.
4052  Unfortunately here we can't know know for sure if the engine
4053  has registered the transaction or not, so we must check.
4054  */
4055  if (ha_info->is_started())
4056  {
4057  DBUG_ASSERT(has_transactions());
4058  /*
4059  table_share can be NULL in ha_delete_table(). See implementation
4060  of standalone function ha_delete_table() in sql_base.cc.
4061  */
4062  if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4063  ha_info->set_trx_read_write();
4064  }
4065 }
4066 
4067 
4074 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4075 {
4076  int result;
4077  mark_trx_read_write();
4078 
4079  result= repair(thd, check_opt);
4080  DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4081  ha_table_flags() & HA_CAN_REPAIR);
4082 
4083  if (result == HA_ADMIN_OK)
4084  result= update_frm_version(table);
4085  return result;
4086 }
4087 
4088 
4098 {
4099  DBUG_ENTER("handler::ha_start_bulk_insert");
4100  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4101  m_lock_type == F_WRLCK);
4102  estimation_rows_to_insert= rows;
4103  start_bulk_insert(rows);
4104  DBUG_VOID_RETURN;
4105 }
4106 
4107 
4117 {
4118  DBUG_ENTER("handler::ha_end_bulk_insert");
4119  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4120  m_lock_type == F_WRLCK);
4121  estimation_rows_to_insert= 0;
4122  DBUG_RETURN(end_bulk_insert());
4123 }
4124 
4125 
4132 int
4133 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4134  uint *dup_key_found)
4135 {
4136  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4137  m_lock_type == F_WRLCK);
4138  mark_trx_read_write();
4139 
4140  return bulk_update_row(old_data, new_data, dup_key_found);
4141 }
4142 
4143 
4150 int
4152 {
4153  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4154  m_lock_type == F_WRLCK);
4155  mark_trx_read_write();
4156 
4157  return delete_all_rows();
4158 }
4159 
4160 
4167 int
4169 {
4170  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4171  m_lock_type == F_WRLCK);
4172  mark_trx_read_write();
4173 
4174  return truncate();
4175 }
4176 
4177 
4184 int
4186 {
4187  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4188  m_lock_type == F_WRLCK);
4189  mark_trx_read_write();
4190 
4191  return reset_auto_increment(value);
4192 }
4193 
4194 
4201 int
4202 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4203 {
4204  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4205  m_lock_type == F_WRLCK);
4206  mark_trx_read_write();
4207 
4208  return optimize(thd, check_opt);
4209 }
4210 
4211 
4218 int
4219 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4220 {
4221  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4222  m_lock_type != F_UNLCK);
4223  mark_trx_read_write();
4224 
4225  return analyze(thd, check_opt);
4226 }
4227 
4228 
4235 bool
4237 {
4238  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4239  m_lock_type == F_UNLCK);
4240  mark_trx_read_write();
4241 
4242  return check_and_repair(thd);
4243 }
4244 
4245 
4252 int
4254 {
4255  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4256  m_lock_type != F_UNLCK);
4257  mark_trx_read_write();
4258 
4259  return disable_indexes(mode);
4260 }
4261 
4262 
4269 int
4271 {
4272  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4273  m_lock_type != F_UNLCK);
4274  mark_trx_read_write();
4275 
4276  return enable_indexes(mode);
4277 }
4278 
4279 
4286 int
4288 {
4289  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4290  m_lock_type == F_WRLCK);
4291  mark_trx_read_write();
4292 
4293  return discard_or_import_tablespace(discard);
4294 }
4295 
4296 
4298  Alter_inplace_info *ha_alter_info)
4299 {
4300  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4301  m_lock_type != F_UNLCK);
4302  mark_trx_read_write();
4303 
4304  return prepare_inplace_alter_table(altered_table, ha_alter_info);
4305 }
4306 
4307 
4309  Alter_inplace_info *ha_alter_info,
4310  bool commit)
4311 {
4312  /*
4313  At this point we should have an exclusive metadata lock on the table.
4314  The exception is if we're about to roll back changes (commit= false).
4315  In this case, we might be rolling back after a failed lock upgrade,
4316  so we could be holding the same lock level as for inplace_alter_table().
4317  */
4318  DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4319  table->s->db.str,
4320  table->s->table_name.str,
4321  MDL_EXCLUSIVE) ||
4322  !commit);
4323 
4324  return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4325 }
4326 
4327 
4328 /*
4329  Default implementation to support in-place alter table
4330  and old online add/drop index API
4331 */
4332 
4333 enum_alter_inplace_result
4335  Alter_inplace_info *ha_alter_info)
4336 {
4337  DBUG_ENTER("check_if_supported_alter");
4338 
4339  HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4340 
4341  Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4343  Alter_inplace_info::ALTER_COLUMN_NAME |
4344  Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4345  Alter_inplace_info::CHANGE_CREATE_OPTION |
4346  Alter_inplace_info::ALTER_RENAME;
4347 
4348  /* Is there at least one operation that requires copy algorithm? */
4349  if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4350  DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4351 
4352  /*
4353  ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4354  ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4355  change column charsets and so not supported in-place through
4356  old API.
4357 
4358  Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4359  not supported as in-place operations in old API either.
4360  */
4361  if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4362  HA_CREATE_USED_DEFAULT_CHARSET |
4363  HA_CREATE_USED_PACK_KEYS |
4364  HA_CREATE_USED_MAX_ROWS) ||
4365  (table->s->row_type != create_info->row_type))
4366  DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4367 
4368  uint table_changes= (ha_alter_info->handler_flags &
4370  IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4371  if (table->file->check_if_incompatible_data(create_info, table_changes)
4372  == COMPATIBLE_DATA_YES)
4373  DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
4374 
4375  DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4376 }
4377 
4378 
4379 /*
4380  Default implementation to support in-place alter table
4381  and old online add/drop index API
4382 */
4383 
4385 {
4386  ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
4387 }
4388 
4389 
4390 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4391  const char *try_instead)
4392 {
4393  if (unsupported_reason == NULL)
4394  my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4395  not_supported, try_instead);
4396  else
4397  my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4398  not_supported, unsupported_reason, try_instead);
4399 }
4400 
4401 
4408 int
4409 handler::ha_rename_table(const char *from, const char *to)
4410 {
4411  DBUG_ASSERT(m_lock_type == F_UNLCK);
4412  mark_trx_read_write();
4413 
4414  return rename_table(from, to);
4415 }
4416 
4417 
4424 int
4425 handler::ha_delete_table(const char *name)
4426 {
4427  DBUG_ASSERT(m_lock_type == F_UNLCK);
4428  mark_trx_read_write();
4429 
4430  return delete_table(name);
4431 }
4432 
4433 
4440 void
4441 handler::ha_drop_table(const char *name)
4442 {
4443  DBUG_ASSERT(m_lock_type == F_UNLCK);
4444  mark_trx_read_write();
4445 
4446  return drop_table(name);
4447 }
4448 
4449 
4456 int
4457 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
4458 {
4459  DBUG_ASSERT(m_lock_type == F_UNLCK);
4460  mark_trx_read_write();
4461 
4462  return create(name, form, info);
4463 }
4464 
4465 
4472 int
4473 handler::ha_create_handler_files(const char *name, const char *old_name,
4474  int action_flag, HA_CREATE_INFO *info)
4475 {
4476  /*
4477  Normally this is done when unlocked, but in fast_alter_partition_table,
4478  it is done on an already locked handler when preparing to alter/rename
4479  partitions.
4480  */
4481  DBUG_ASSERT(m_lock_type == F_UNLCK ||
4482  (!old_name && strcmp(name, table_share->path.str)));
4483  mark_trx_read_write();
4484 
4485  return create_handler_files(name, old_name, action_flag, info);
4486 }
4487 
4488 
4495 int
4497  const char *path,
4498  ulonglong * const copied,
4499  ulonglong * const deleted,
4500  const uchar *pack_frm_data,
4501  size_t pack_frm_len)
4502 {
4503  /*
4504  Must have at least RDLCK or be a TMP table. Read lock is needed to read
4505  from current partitions and write lock will be taken on new partitions.
4506  */
4507  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4508  m_lock_type != F_UNLCK);
4509  mark_trx_read_write();
4510 
4511  return change_partitions(create_info, path, copied, deleted,
4512  pack_frm_data, pack_frm_len);
4513 }
4514 
4515 
4522 int
4524 {
4525  DBUG_ASSERT(!table->db_stat);
4526 
4527  mark_trx_read_write();
4528 
4529  return drop_partitions(path);
4530 }
4531 
4532 
4539 int
4541 {
4542  DBUG_ASSERT(!table->db_stat);
4543  mark_trx_read_write();
4544 
4545  return rename_partitions(path);
4546 }
4547 
4548 
4557 int ha_enable_transaction(THD *thd, bool on)
4558 {
4559  int error=0;
4560  DBUG_ENTER("ha_enable_transaction");
4561  DBUG_PRINT("enter", ("on: %d", (int) on));
4562 
4563  if ((thd->transaction.flags.enabled= on))
4564  {
4565  /*
4566  Now all storage engines should have transaction handling enabled.
4567  But some may have it enabled all the time - "disabling" transactions
4568  is an optimization hint that storage engine is free to ignore.
4569  So, let's commit an open transaction (if any) now.
4570  */
4571  if (!(error= ha_commit_trans(thd, 0)))
4572  error= trans_commit_implicit(thd);
4573  }
4574  DBUG_RETURN(error);
4575 }
4576 
4577 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
4578 {
4579  int error;
4580  DBUG_ENTER("index_next_same");
4581  if (!(error=index_next(buf)))
4582  {
4583  my_ptrdiff_t ptrdiff= buf - table->record[0];
4584  uchar *UNINIT_VAR(save_record_0);
4585  KEY *UNINIT_VAR(key_info);
4586  KEY_PART_INFO *UNINIT_VAR(key_part);
4587  KEY_PART_INFO *UNINIT_VAR(key_part_end);
4588 
4589  /*
4590  key_cmp_if_same() compares table->record[0] against 'key'.
4591  In parts it uses table->record[0] directly, in parts it uses
4592  field objects with their local pointers into table->record[0].
4593  If 'buf' is distinct from table->record[0], we need to move
4594  all record references. This is table->record[0] itself and
4595  the field pointers of the fields used in this key.
4596  */
4597  if (ptrdiff)
4598  {
4599  save_record_0= table->record[0];
4600  table->record[0]= buf;
4601  key_info= table->key_info + active_index;
4602  key_part= key_info->key_part;
4603  key_part_end= key_part + key_info->user_defined_key_parts;
4604  for (; key_part < key_part_end; key_part++)
4605  {
4606  DBUG_ASSERT(key_part->field);
4607  key_part->field->move_field_offset(ptrdiff);
4608  }
4609  }
4610 
4611  if (key_cmp_if_same(table, key, active_index, keylen))
4612  {
4613  table->status=STATUS_NOT_FOUND;
4614  error=HA_ERR_END_OF_FILE;
4615  }
4616 
4617  /* Move back if necessary. */
4618  if (ptrdiff)
4619  {
4620  table->record[0]= save_record_0;
4621  for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
4622  key_part->field->move_field_offset(-ptrdiff);
4623  }
4624  }
4625  DBUG_RETURN(error);
4626 }
4627 
4628 
4629 void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
4630  uint part_id)
4631 {
4632  info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
4633  HA_STATUS_NO_LOCK);
4634  stat_info->records= stats.records;
4635  stat_info->mean_rec_length= stats.mean_rec_length;
4636  stat_info->data_file_length= stats.data_file_length;
4637  stat_info->max_data_file_length= stats.max_data_file_length;
4638  stat_info->index_file_length= stats.index_file_length;
4639  stat_info->delete_length= stats.delete_length;
4640  stat_info->create_time= stats.create_time;
4641  stat_info->update_time= stats.update_time;
4642  stat_info->check_time= stats.check_time;
4643  stat_info->check_sum= 0;
4644  if (table_flags() & (ulong) HA_HAS_CHECKSUM)
4645  stat_info->check_sum= checksum();
4646  return;
4647 }
4648 
4649 
4650 /****************************************************************************
4651 ** Some general functions that isn't in the handler class
4652 ****************************************************************************/
4653 
4662 int ha_create_table(THD *thd, const char *path,
4663  const char *db, const char *table_name,
4664  HA_CREATE_INFO *create_info,
4665  bool update_create_info,
4666  bool is_temp_table)
4667 {
4668  int error= 1;
4669  TABLE table;
4670  char name_buff[FN_REFLEN];
4671  const char *name;
4672  TABLE_SHARE share;
4673  bool saved_abort_on_warning;
4674  DBUG_ENTER("ha_create_table");
4675 #ifdef HAVE_PSI_TABLE_INTERFACE
4676  my_bool temp_table= (my_bool)is_temp_table ||
4677  (my_bool)is_prefix(table_name, tmp_file_prefix) ||
4678  (create_info->options & HA_LEX_CREATE_TMP_TABLE ? TRUE : FALSE);
4679 #endif
4680 
4681  init_tmp_table_share(thd, &share, db, 0, table_name, path);
4682  if (open_table_def(thd, &share, 0))
4683  goto err;
4684 
4685 #ifdef HAVE_PSI_TABLE_INTERFACE
4686  share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
4687 #endif
4688 
4689  if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
4690  TRUE))
4691  goto err;
4692 
4693  if (update_create_info)
4694  update_create_info_from_table(create_info, &table);
4695 
4696  name= get_canonical_filename(table.file, share.path.str, name_buff);
4697 
4698  saved_abort_on_warning = thd->abort_on_warning;
4699  thd->abort_on_warning = false;
4700  error= table.file->ha_create(name, &table, create_info);
4701  thd->abort_on_warning = saved_abort_on_warning;
4702  if (error)
4703  {
4704  table.file->print_error(error, MYF(0));
4705 #ifdef HAVE_PSI_TABLE_INTERFACE
4706  PSI_TABLE_CALL(drop_table_share)
4707  (temp_table, db, strlen(db), table_name, strlen(table_name));
4708 #endif
4709  }
4710  (void) closefrm(&table, 0);
4711 err:
4712  free_table_share(&share);
4713  DBUG_RETURN(error != 0);
4714 }
4715 
4729 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
4730 {
4731  int error;
4732  uchar *frmblob;
4733  size_t frmlen;
4734  char path[FN_REFLEN + 1];
4735  HA_CREATE_INFO create_info;
4736  TABLE table;
4737  TABLE_SHARE share;
4738  DBUG_ENTER("ha_create_table_from_engine");
4739  DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
4740 
4741  memset(&create_info, 0, sizeof(create_info));
4742  if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
4743  {
4744  /* Table could not be discovered and thus not created */
4745  DBUG_RETURN(error);
4746  }
4747 
4748  /*
4749  Table exists in handler and could be discovered
4750  frmblob and frmlen are set, write the frm to disk
4751  */
4752 
4753  build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
4754  // Save the frm file
4755  error= writefrm(path, frmblob, frmlen);
4756  my_free(frmblob);
4757  if (error)
4758  DBUG_RETURN(2);
4759 
4760  init_tmp_table_share(thd, &share, db, 0, name, path);
4761  if (open_table_def(thd, &share, 0))
4762  {
4763  DBUG_RETURN(3);
4764  }
4765 
4766 #ifdef HAVE_PSI_TABLE_INTERFACE
4767  /*
4768  Table discovery is not instrumented.
4769  Once discovered, the table will be opened normally,
4770  and instrumented normally.
4771  */
4772 #endif
4773 
4774  if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
4775  {
4776  free_table_share(&share);
4777  DBUG_RETURN(3);
4778  }
4779 
4780  update_create_info_from_table(&create_info, &table);
4781  create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
4782 
4783  get_canonical_filename(table.file, path, path);
4784  error=table.file->ha_create(path, &table, &create_info);
4785  (void) closefrm(&table, 1);
4786 
4787  DBUG_RETURN(error != 0);
4788 }
4789 
4790 
4802 bool
4803 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
4804  bool *exists)
4805 {
4806  uchar *frmblob= NULL;
4807  size_t frmlen;
4808  DBUG_ENTER("ha_check_if_table_exists");
4809 
4810  *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
4811  if (*exists)
4812  my_free(frmblob);
4813 
4814  DBUG_RETURN(FALSE);
4815 }
4816 
4843  const char *table_name)
4844 {
4845  DBUG_ENTER("ha_check_if_supported_system_table");
4846  st_sys_tbl_chk_params check_params;
4847  bool is_system_database= false;
4848  const char **names;
4849  st_system_tablename *systab;
4850 
4851  // Check if we have a system database name in the command.
4852  DBUG_ASSERT(known_system_databases != NULL);
4853  names= known_system_databases;
4854  while (names && *names)
4855  {
4856  if (strcmp(*names, db) == 0)
4857  {
4858  /* Used to compare later, will be faster */
4859  check_params.db= *names;
4860  is_system_database= true;
4861  break;
4862  }
4863  names++;
4864  }
4865  if (!is_system_database)
4866  DBUG_RETURN(true); // It's a user table name.
4867 
4868  // Check if this is SQL layer system tables.
4869  systab= mysqld_system_tables;
4870  check_params.is_sql_layer_system_table= false;
4871  while (systab && systab->db)
4872  {
4873  if (systab->db == check_params.db &&
4874  strcmp(systab->tablename, table_name) == 0)
4875  {
4876  check_params.is_sql_layer_system_table= true;
4877  break;
4878  }
4879  systab++;
4880  }
4881 
4882  // Check if this is a system table and if some engine supports it.
4883  check_params.status= check_params.is_sql_layer_system_table ?
4884  st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE :
4885  st_sys_tbl_chk_params::NOT_KNOWN_SYSTEM_TABLE;
4886  check_params.db_type= hton->db_type;
4887  check_params.table_name= table_name;
4888  plugin_foreach(NULL, check_engine_system_table_handlerton,
4889  MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
4890 
4891  // SE does not support this system table.
4892  if (check_params.status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
4893  DBUG_RETURN(false);
4894 
4895  // It's a system table or a valid user table.
4896  DBUG_RETURN(true);
4897 }
4898 
4920 static my_bool check_engine_system_table_handlerton(THD *unused,
4921  plugin_ref plugin,
4922  void *arg)
4923 {
4924  st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
4925  handlerton *hton= plugin_data(plugin, handlerton *);
4926 
4927  // Do we already know that the table is a system table?
4928  if (check_params->status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
4929  {
4930  /*
4931  If this is the same SE specified in the command, we can
4932  simply ask the SE if it supports it stop the search regardless.
4933  */
4934  if (hton->db_type == check_params->db_type)
4935  {
4936  if (hton->is_supported_system_table &&
4937  hton->is_supported_system_table(check_params->db,
4938  check_params->table_name,
4939  check_params->is_sql_layer_system_table))
4940  check_params->status= st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
4941  return TRUE;
4942  }
4943  /*
4944  If this is a different SE, there is no point in asking the SE
4945  since we already know it's a system table and we don't care
4946  if it is supported or not.
4947  */
4948  return FALSE;
4949  }
4950 
4951  /*
4952  We don't yet know if the table is a system table or not.
4953  We therefore must always ask the SE.
4954  */
4955  if (hton->is_supported_system_table &&
4956  hton->is_supported_system_table(check_params->db,
4957  check_params->table_name,
4958  check_params->is_sql_layer_system_table))
4959  {
4960  /*
4961  If this is the same SE specified in the command, we know it's a
4962  supported system table and can stop the search.
4963  */
4964  if (hton->db_type == check_params->db_type)
4965  {
4966  check_params->status= st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
4967  return TRUE;
4968  }
4969  else
4970  check_params->status= st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE;
4971  }
4972 
4973  return FALSE;
4974 }
4975 
4976 /*
4977  Prepare list of all known system database names
4978  current we just have 'mysql' as system database name.
4979 
4980  Later ndbcluster, innodb SE's can define some new database
4981  name which can store system tables specific to SE.
4982 */
4983 const char** ha_known_system_databases(void)
4984 {
4985  list<const char*> found_databases;
4986  const char **databases, **database;
4987 
4988  // Get mysqld system database name.
4989  found_databases.push_back((char*) mysqld_system_database);
4990 
4991  // Get system database names from every specific storage engine.
4992  plugin_foreach(NULL, system_databases_handlerton,
4993  MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
4994 
4995  databases= (const char **) my_once_alloc(sizeof(char *)*
4996  (found_databases.size()+1),
4997  MYF(MY_WME | MY_FAE));
4998  DBUG_ASSERT(databases != NULL);
4999 
5000  list<const char*>::iterator it;
5001  database= databases;
5002  for (it= found_databases.begin(); it != found_databases.end(); it++)
5003  *database++= *it;
5004  *database= 0; // Last element.
5005 
5006  return databases;
5007 }
5008 
5015 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5016  void *arg)
5017 {
5018  list<const char*> *found_databases= (list<const char*> *) arg;
5019  const char *db;
5020 
5021  handlerton *hton= plugin_data(plugin, handlerton *);
5022  if (hton->system_database)
5023  {
5024  db= hton->system_database();
5025  if (db)
5026  found_databases->push_back(db);
5027  }
5028 
5029  return FALSE;
5030 }
5031 
5032 void st_ha_check_opt::init()
5033 {
5034  flags= sql_flags= 0;
5035 }
5036 
5037 
5038 /*****************************************************************************
5039  Key cache handling.
5040 
5041  This code is only relevant for ISAM/MyISAM tables
5042 
5043  key_cache->cache may be 0 only in the case where a key cache is not
5044  initialized or when we where not able to init the key cache in a previous
5045  call to ha_init_key_cache() (probably out of memory)
5046 *****************************************************************************/
5047 
5051 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5052 {
5053  DBUG_ENTER("ha_init_key_cache");
5054 
5055  if (!key_cache->key_cache_inited)
5056  {
5057  mysql_mutex_lock(&LOCK_global_system_variables);
5058  size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5059  uint tmp_block_size= (uint) key_cache->param_block_size;
5060  uint division_limit= key_cache->param_division_limit;
5061  uint age_threshold= key_cache->param_age_threshold;
5062  mysql_mutex_unlock(&LOCK_global_system_variables);
5063  DBUG_RETURN(!init_key_cache(key_cache,
5064  tmp_block_size,
5065  tmp_buff_size,
5066  division_limit, age_threshold));
5067  }
5068  DBUG_RETURN(0);
5069 }
5070 
5071 
5076 {
5077  DBUG_ENTER("ha_resize_key_cache");
5078 
5079  if (key_cache->key_cache_inited)
5080  {
5081  mysql_mutex_lock(&LOCK_global_system_variables);
5082  size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5083  long tmp_block_size= (long) key_cache->param_block_size;
5084  uint division_limit= key_cache->param_division_limit;
5085  uint age_threshold= key_cache->param_age_threshold;
5086  mysql_mutex_unlock(&LOCK_global_system_variables);
5087  DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5088  tmp_buff_size,
5089  division_limit, age_threshold));
5090  }
5091  DBUG_RETURN(0);
5092 }
5093 
5094 
5099 {
5100  if (key_cache->key_cache_inited)
5101  {
5102  mysql_mutex_lock(&LOCK_global_system_variables);
5103  uint division_limit= key_cache->param_division_limit;
5104  uint age_threshold= key_cache->param_age_threshold;
5105  mysql_mutex_unlock(&LOCK_global_system_variables);
5106  change_key_cache_param(key_cache, division_limit, age_threshold);
5107  }
5108  return 0;
5109 }
5110 
5114 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5115  KEY_CACHE *new_key_cache)
5116 {
5117  mi_change_key_cache(old_key_cache, new_key_cache);
5118  return 0;
5119 }
5120 
5121 
5133 {
5134  const char *db;
5135  const char *name;
5136  uchar **frmblob;
5137  size_t *frmlen;
5138 };
5139 
5140 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5141  void *arg)
5142 {
5143  st_discover_args *vargs= (st_discover_args *)arg;
5144  handlerton *hton= plugin_data(plugin, handlerton *);
5145  if (hton->state == SHOW_OPTION_YES && hton->discover &&
5146  (!(hton->discover(hton, thd, vargs->db, vargs->name,
5147  vargs->frmblob,
5148  vargs->frmlen))))
5149  return TRUE;
5150 
5151  return FALSE;
5152 }
5153 
5154 int ha_discover(THD *thd, const char *db, const char *name,
5155  uchar **frmblob, size_t *frmlen)
5156 {
5157  int error= -1; // Table does not exist in any handler
5158  DBUG_ENTER("ha_discover");
5159  DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5160  st_discover_args args= {db, name, frmblob, frmlen};
5161 
5162  if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5163  DBUG_RETURN(error);
5164 
5165  if (plugin_foreach(thd, discover_handlerton,
5166  MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5167  error= 0;
5168 
5169  if (!error)
5170  status_var_increment(thd->status_var.ha_discover_count);
5171  DBUG_RETURN(error);
5172 }
5173 
5174 
5181 {
5182  const char *db;
5183  const char *path;
5184  const char *wild;
5185  bool dir;
5186  List<LEX_STRING> *files;
5187 };
5188 
5189 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5190  void *arg)
5191 {
5192  st_find_files_args *vargs= (st_find_files_args *)arg;
5193  handlerton *hton= plugin_data(plugin, handlerton *);
5194 
5195 
5196  if (hton->state == SHOW_OPTION_YES && hton->find_files)
5197  if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5198  vargs->dir, vargs->files))
5199  return TRUE;
5200 
5201  return FALSE;
5202 }
5203 
5204 int
5205 ha_find_files(THD *thd,const char *db,const char *path,
5206  const char *wild, bool dir, List<LEX_STRING> *files)
5207 {
5208  int error= 0;
5209  DBUG_ENTER("ha_find_files");
5210  DBUG_PRINT("enter", ("db: '%s' path: '%s' wild: '%s' dir: %d",
5211  db, path, wild ? wild : "NULL", dir));
5212  st_find_files_args args= {db, path, wild, dir, files};
5213 
5214  plugin_foreach(thd, find_files_handlerton,
5215  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5216  /* The return value is not currently used */
5217  DBUG_RETURN(error);
5218 }
5219 
5230 {
5231  const char *db;
5232  const char *name;
5233  int err;
5234 };
5235 
5236 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5237  void *arg)
5238 {
5240  handlerton *hton= plugin_data(plugin, handlerton *);
5241 
5242  int err= HA_ERR_NO_SUCH_TABLE;
5243 
5244  if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5245  err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5246 
5247  vargs->err = err;
5248  if (vargs->err == HA_ERR_TABLE_EXIST)
5249  return TRUE;
5250 
5251  return FALSE;
5252 }
5253 
5254 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5255 {
5256  DBUG_ENTER("ha_table_exists_in_engine");
5257  DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5258  st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5259  plugin_foreach(thd, table_exists_in_engine_handlerton,
5260  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5261  DBUG_PRINT("exit", ("error: %d", args.err));
5262  DBUG_RETURN(args.err);
5263 }
5264 
5270 {
5271  const AQP::Join_plan* plan; // Query plan provided by optimizer
5272  int err; // Error code to return.
5273 };
5274 
5275 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5276  void *arg)
5277 {
5279  handlerton *hton= plugin_data(plugin, handlerton *);
5280 
5281  if (hton && hton->make_pushed_join)
5282  {
5283  const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5284  if (unlikely(error))
5285  {
5286  vargs->err = error;
5287  return TRUE;
5288  }
5289  }
5290  return FALSE;
5291 }
5292 
5293 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5294 {
5295  DBUG_ENTER("ha_make_pushed_joins");
5296  st_make_pushed_join_args args= {plan, 0};
5297  plugin_foreach(thd, make_pushed_join_handlerton,
5298  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5299  DBUG_PRINT("exit", ("error: %d", args.err));
5300  DBUG_RETURN(args.err);
5301 }
5302 
5303 /*
5304  TODO: change this into a dynamic struct
5305  List<handlerton> does not work as
5306  1. binlog_end is called when MEM_ROOT is gone
5307  2. cannot work with thd MEM_ROOT as memory should be freed
5308 */
5309 #define MAX_HTON_LIST_ST 63
5311 {
5312  handlerton *hton[MAX_HTON_LIST_ST];
5313  uint sz;
5314 };
5315 
5317 {
5318  enum_binlog_func fn;
5319  void *arg;
5320 };
5321 
5325 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5326 {
5327  hton_list_st *hton_list= (hton_list_st *)arg;
5328  handlerton *hton= plugin_data(plugin, handlerton *);
5329  if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5330  {
5331  uint sz= hton_list->sz;
5332  if (sz == MAX_HTON_LIST_ST-1)
5333  {
5334  /* list full */
5335  return FALSE;
5336  }
5337  hton_list->hton[sz]= hton;
5338  hton_list->sz= sz+1;
5339  }
5340  return FALSE;
5341 }
5342 
5343 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5344 {
5345  hton_list_st hton_list;
5346  uint i, sz;
5347 
5348  hton_list.sz= 0;
5349  plugin_foreach(thd, binlog_func_list,
5350  MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5351 
5352  for (i= 0, sz= hton_list.sz; i < sz ; i++)
5353  hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5354  return FALSE;
5355 }
5356 
5357 #ifdef HAVE_NDB_BINLOG
5358 
5359 int ha_reset_logs(THD *thd)
5360 {
5361  binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5362  binlog_func_foreach(thd, &bfn);
5363  return 0;
5364 }
5365 
5366 void ha_reset_slave(THD* thd)
5367 {
5368  binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5369  binlog_func_foreach(thd, &bfn);
5370 }
5371 
5372 void ha_binlog_wait(THD* thd)
5373 {
5374  binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5375  binlog_func_foreach(thd, &bfn);
5376 }
5377 
5378 int ha_binlog_index_purge_file(THD *thd, const char *file)
5379 {
5380  binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5381  binlog_func_foreach(thd, &bfn);
5382  return 0;
5383 }
5384 
5385 struct binlog_log_query_st
5386 {
5387  enum_binlog_command binlog_command;
5388  const char *query;
5389  uint query_length;
5390  const char *db;
5391  const char *table_name;
5392 };
5393 
5394 static my_bool binlog_log_query_handlerton2(THD *thd,
5395  handlerton *hton,
5396  void *args)
5397 {
5398  struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
5399  if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5400  hton->binlog_log_query(hton, thd,
5401  b->binlog_command,
5402  b->query,
5403  b->query_length,
5404  b->db,
5405  b->table_name);
5406  return FALSE;
5407 }
5408 
5409 static my_bool binlog_log_query_handlerton(THD *thd,
5410  plugin_ref plugin,
5411  void *args)
5412 {
5413  return binlog_log_query_handlerton2(thd, plugin_data(plugin, handlerton *), args);
5414 }
5415 
5416 void ha_binlog_log_query(THD *thd, handlerton *hton,
5417  enum_binlog_command binlog_command,
5418  const char *query, uint query_length,
5419  const char *db, const char *table_name)
5420 {
5421  struct binlog_log_query_st b;
5422  b.binlog_command= binlog_command;
5423  b.query= query;
5424  b.query_length= query_length;
5425  b.db= db;
5426  b.table_name= table_name;
5427  if (hton == 0)
5428  plugin_foreach(thd, binlog_log_query_handlerton,
5429  MYSQL_STORAGE_ENGINE_PLUGIN, &b);
5430  else
5431  binlog_log_query_handlerton2(thd, hton, &b);
5432 }
5433 #endif
5434 
5435 int ha_binlog_end(THD* thd)
5436 {
5437  binlog_func_st bfn= {BFN_BINLOG_END, 0};
5438  binlog_func_foreach(thd, &bfn);
5439  return 0;
5440 }
5441 
5463 double handler::index_only_read_time(uint keynr, double records)
5464 {
5465  double read_time;
5466  uint keys_per_block= (stats.block_size/2/
5467  (table_share->key_info[keynr].key_length + ref_length) +
5468  1);
5469  read_time=((double) (records + keys_per_block-1) /
5470  (double) keys_per_block);
5471  return read_time;
5472 }
5473 
5474 
5493 bool key_uses_partial_cols(TABLE *table, uint keyno)
5494 {
5495  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
5496  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
5497  for (; kp != kp_end; kp++)
5498  {
5499  if (!kp->field->part_of_key.is_set(keyno))
5500  return TRUE;
5501  }
5502  return FALSE;
5503 }
5504 
5505 /****************************************************************************
5506  * Default MRR implementation (MRR to non-MRR converter)
5507  ***************************************************************************/
5508 
5540 ha_rows
5542  void *seq_init_param, uint n_ranges_arg,
5543  uint *bufsz, uint *flags,
5544  Cost_estimate *cost)
5545 {
5546  KEY_MULTI_RANGE range;
5547  range_seq_t seq_it;
5548  ha_rows rows, total_rows= 0;
5549  uint n_ranges=0;
5550  THD *thd= current_thd;
5551 
5552  /* Default MRR implementation doesn't need buffer */
5553  *bufsz= 0;
5554 
5555  DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
5556 
5557  seq_it= seq->init(seq_init_param, n_ranges, *flags);
5558  while (!seq->next(seq_it, &range))
5559  {
5560  if (unlikely(thd->killed != 0))
5561  return HA_POS_ERROR;
5562 
5563  n_ranges++;
5564  key_range *min_endp, *max_endp;
5565  if (range.range_flag & GEOM_FLAG)
5566  {
5567  /* In this case tmp_min_flag contains the handler-read-function */
5568  range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
5569  min_endp= &range.start_key;
5570  max_endp= NULL;
5571  }
5572  else
5573  {
5574  min_endp= range.start_key.length? &range.start_key : NULL;
5575  max_endp= range.end_key.length? &range.end_key : NULL;
5576  }
5577  /*
5578  Get the number of rows in the range. This is done by calling
5579  records_in_range() unless:
5580 
5581  1) The range is an equality range and the index is unique.
5582  There cannot be more than one matching row, so 1 is
5583  assumed. Note that it is possible that the correct number
5584  is actually 0, so the row estimate may be too high in this
5585  case. Also note: ranges of the form "x IS NULL" may have more
5586  than 1 mathing row so records_in_range() is called for these.
5587  2) a) The range is an equality range but the index is either
5588  not unique or all of the keyparts are not used.
5589  b) The user has requested that index statistics should be used
5590  for equality ranges to avoid the incurred overhead of
5591  index dives in records_in_range().
5592  c) Index statistics is available.
5593  Ranges of the form "x IS NULL" will not use index statistics
5594  because the number of rows with this value are likely to be
5595  very different than the values in the index statistics.
5596  */
5597  int keyparts_used= 0;
5598  if ((range.range_flag & UNIQUE_RANGE) && // 1)
5599  !(range.range_flag & NULL_RANGE))
5600  rows= 1; /* there can be at most one row */
5601  else if ((range.range_flag & EQ_RANGE) && // 2a)
5602  (range.range_flag & USE_INDEX_STATISTICS) && // 2b)
5603  (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
5604  table->key_info[keyno].rec_per_key[keyparts_used-1] && // 2c)
5605  !(range.range_flag & NULL_RANGE))
5606  rows= table->key_info[keyno].rec_per_key[keyparts_used-1];
5607  else
5608  {
5609  DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
5610  DBUG_ASSERT(min_endp || max_endp);
5611  if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
5612  max_endp)))
5613  {
5614  /* Can't scan one range => can't do MRR scan at all */
5615  total_rows= HA_POS_ERROR;
5616  break;
5617  }
5618  }
5619  total_rows += rows;
5620  }
5621 
5622  if (total_rows != HA_POS_ERROR)
5623  {
5624  /* The following calculation is the same as in multi_range_read_info(): */
5625  *flags|= HA_MRR_USE_DEFAULT_IMPL;
5626  *flags|= HA_MRR_SUPPORT_SORTED;
5627 
5628  DBUG_ASSERT(cost->is_zero());
5629  if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
5630  cost->add_io(index_only_read_time(keyno, total_rows) *
5632  else
5633  cost->add_io(read_time(keyno, n_ranges, total_rows) *
5635  cost->add_cpu(total_rows * ROW_EVALUATE_COST + 0.01);
5636  }
5637  return total_rows;
5638 }
5639 
5640 
5675 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
5676  uint *bufsz, uint *flags,
5677  Cost_estimate *cost)
5678 {
5679  *bufsz= 0; /* Default implementation doesn't need a buffer */
5680 
5681  *flags|= HA_MRR_USE_DEFAULT_IMPL;
5682  *flags|= HA_MRR_SUPPORT_SORTED;
5683 
5684  DBUG_ASSERT(cost->is_zero());
5685 
5686  /* Produce the same cost as non-MRR code does */
5687  if (*flags & HA_MRR_INDEX_ONLY)
5688  cost->add_io(index_only_read_time(keyno, n_rows) *
5690  else
5691  cost->add_io(read_time(keyno, n_ranges, n_rows) *
5693  return 0;
5694 }
5695 
5696 
5738 int
5739 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
5740  uint n_ranges, uint mode, HANDLER_BUFFER *buf)
5741 {
5742  DBUG_ENTER("handler::multi_range_read_init");
5743  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
5744  mrr_funcs= *seq_funcs;
5745  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
5746  mrr_have_range= FALSE;
5747  DBUG_RETURN(0);
5748 }
5749 
5750 
5764 int handler::multi_range_read_next(char **range_info)
5765 {
5766  int result= HA_ERR_END_OF_FILE;
5767  int range_res;
5768  DBUG_ENTER("handler::multi_range_read_next");
5769 
5770  if (!mrr_have_range)
5771  {
5772  mrr_have_range= TRUE;
5773  goto start;
5774  }
5775 
5776  do
5777  {
5778  /* Save a call if there can be only one row in range. */
5779  if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
5780  {
5781  result= read_range_next();
5782  /* On success or non-EOF errors jump to the end. */
5783  if (result != HA_ERR_END_OF_FILE)
5784  break;
5785  }
5786  else
5787  {
5789  goto scan_it_again;
5790  }
5791 
5792 start:
5793  /* Try the next range(s) until one matches a record. */
5794  while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
5795  {
5796 scan_it_again:
5797  result= read_range_first(mrr_cur_range.start_key.keypart_map ?
5798  &mrr_cur_range.start_key : 0,
5799  mrr_cur_range.end_key.keypart_map ?
5800  &mrr_cur_range.end_key : 0,
5801  test(mrr_cur_range.range_flag & EQ_RANGE),
5802  mrr_is_output_sorted);
5803  if (result != HA_ERR_END_OF_FILE)
5804  break;
5805  }
5806  }
5807  while ((result == HA_ERR_END_OF_FILE) && !range_res);
5808 
5809  *range_info= mrr_cur_range.ptr;
5810  DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
5811  DBUG_RETURN(result);
5812 }
5813 
5814 
5815 /****************************************************************************
5816  * DS-MRR implementation
5817  ***************************************************************************/
5818 
5857  void *seq_init_param, uint n_ranges, uint mode,
5858  HANDLER_BUFFER *buf)
5859 {
5860  uint elem_size;
5861  int retval= 0;
5862  DBUG_ENTER("DsMrr_impl::dsmrr_init");
5863  THD *thd= h_arg->table->in_use; // current THD
5864 
5865  /*
5866  index_merge may invoke a scan on an object for which dsmrr_info[_const]
5867  has not been called, so set the owner handler here as well.
5868  */
5869  h= h_arg;
5870  if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
5871  mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
5872  {
5873  use_default_impl= TRUE;
5874  retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
5875  n_ranges, mode, buf);
5876  DBUG_RETURN(retval);
5877  }
5878 
5879  /*
5880  This assert will hit if we have pushed an index condition to the
5881  primary key index and then "change our mind" and use a different
5882  index for retrieving data with MRR. One of the following criteria
5883  must be true:
5884  1. We have not pushed an index conditon on this handler.
5885  2. We have pushed an index condition and this is on the currently used
5886  index.
5887  3. We have pushed an index condition but this is not for the primary key.
5888  4. We have pushed an index condition and this has been transferred to
5889  the clone (h2) of the handler object.
5890  */
5891  DBUG_ASSERT(!h->pushed_idx_cond ||
5892  h->pushed_idx_cond_keyno == h->active_index ||
5893  h->pushed_idx_cond_keyno != table->s->primary_key ||
5894  (h2 && h->pushed_idx_cond_keyno == h2->active_index));
5895 
5896  rowids_buf= buf->buffer;
5897 
5898  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
5899 
5900  if (is_mrr_assoc)
5901  status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
5902 
5903  rowids_buf_end= buf->buffer_end;
5904  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
5905  rowids_buf_last= rowids_buf +
5906  ((rowids_buf_end - rowids_buf)/ elem_size)*
5907  elem_size;
5908  rowids_buf_end= rowids_buf_last;
5909 
5910  /*
5911  The DS-MRR scan uses a second handler object (h2) for doing the
5912  index scan. Create this by cloning the primary handler
5913  object. The h2 handler object is deleted when DsMrr_impl::reset()
5914  is called.
5915  */
5916  if (!h2)
5917  {
5918  handler *new_h2;
5919  /*
5920  ::clone() takes up a lot of stack, especially on 64 bit platforms.
5921  The constant 5 is an empiric result.
5922  @todo Is this still the case? Leave it as it is for now but could
5923  likely be removed?
5924  */
5925  if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
5926  DBUG_RETURN(1);
5927 
5928  if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
5929  DBUG_RETURN(1);
5930  h2= new_h2; /* Ok, now can put it into h2 */
5931  table->prepare_for_position();
5932  }
5933 
5934  /*
5935  Open the index scan on h2 using the key from the primary handler.
5936  */
5937  if (h2->active_index == MAX_KEY)
5938  {
5939  DBUG_ASSERT(h->active_index != MAX_KEY);
5940  const uint mrr_keyno= h->active_index;
5941 
5942  if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
5943  goto error;
5944 
5945  if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
5946  goto error;
5947 
5948  if ((retval= h2->ha_index_init(mrr_keyno, false)))
5949  goto error;
5950 
5951  // Transfer ICP from h to h2
5952  if (mrr_keyno == h->pushed_idx_cond_keyno)
5953  {
5954  if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
5955  {
5956  retval= 1;
5957  goto error;
5958  }
5959  }
5960  else
5961  {
5962  // Cancel any potentially previously pushed index conditions
5963  h2->cancel_pushed_idx_cond();
5964  }
5965  }
5966  else
5967  {
5968  /*
5969  h2 has already an open index. This happens when the DS-MRR scan
5970  is re-started without closing it first. In this case the primary
5971  handler must be used for reading records from the table, ie. it
5972  must not be opened for doing a new range scan. In this case
5973  the active_index must either not be set or be the primary key.
5974  */
5975  DBUG_ASSERT(h->inited == handler::RND);
5976  DBUG_ASSERT(h->active_index == MAX_KEY ||
5977  h->active_index == table->s->primary_key);
5978  }
5979 
5980  /*
5981  The index scan is now transferred to h2 and we can close the open
5982  index scan on the primary handler.
5983  */
5984  if (h->inited == handler::INDEX)
5985  {
5986  /*
5987  Calling h->ha_index_end() will invoke dsmrr_close() for this object,
5988  which will close the index scan on h2. We need to keep it open, so
5989  temporarily move h2 out of the DsMrr object.
5990  */
5991  handler *save_h2= h2;
5992  h2= NULL;
5993  retval= h->ha_index_end();
5994  h2= save_h2;
5995  if (retval)
5996  goto error;
5997  }
5998 
5999  /*
6000  Verify consistency between h and h2.
6001  */
6002  DBUG_ASSERT(h->inited != handler::INDEX);
6003  DBUG_ASSERT(h->active_index == MAX_KEY ||
6004  h->active_index == table->s->primary_key);
6005  DBUG_ASSERT(h2->inited == handler::INDEX);
6006  DBUG_ASSERT(h2->active_index != MAX_KEY);
6007  DBUG_ASSERT(h->m_lock_type == h2->m_lock_type);
6008 
6009  if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6010  n_ranges, mode, buf)))
6011  goto error;
6012 
6013  if ((retval= dsmrr_fill_buffer()))
6014  goto error;
6015 
6016  /*
6017  If the above call has scanned through all intervals in *seq, then
6018  adjust *buf to indicate that the remaining buffer space will not be used.
6019  */
6020  if (dsmrr_eof)
6021  buf->end_of_used_area= rowids_buf_last;
6022 
6023  /*
6024  h->inited == INDEX may occur when 'range checked for each record' is
6025  used.
6026  */
6027  if ((h->inited != handler::RND) &&
6028  ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6029  (h->ha_rnd_init(FALSE))))
6030  {
6031  retval= 1;
6032  goto error;
6033  }
6034 
6035  use_default_impl= FALSE;
6036  h->mrr_funcs= *seq_funcs;
6037 
6038  DBUG_RETURN(0);
6039 error:
6040  h2->ha_index_or_rnd_end();
6041  h2->ha_external_lock(thd, F_UNLCK);
6042  h2->close();
6043  delete h2;
6044  h2= NULL;
6045  DBUG_ASSERT(retval != 0);
6046  DBUG_RETURN(retval);
6047 }
6048 
6049 
6050 void DsMrr_impl::dsmrr_close()
6051 {
6052  DBUG_ENTER("DsMrr_impl::dsmrr_close");
6053 
6054  // If there is an open index on h2, then close it
6055  if (h2 && h2->active_index != MAX_KEY)
6056  {
6057  h2->ha_index_or_rnd_end();
6058  h2->ha_external_lock(current_thd, F_UNLCK);
6059  }
6060  use_default_impl= true;
6061  DBUG_VOID_RETURN;
6062 }
6063 
6064 
6066 {
6067  DBUG_ENTER("DsMrr_impl::reset");
6068 
6069  if (h2)
6070  {
6071  // Close any ongoing DS-MRR scan
6072  dsmrr_close();
6073 
6074  // Close and delete the h2 handler
6075  h2->close();
6076  delete h2;
6077  h2= NULL;
6078  }
6079  DBUG_VOID_RETURN;
6080 }
6081 
6082 
6083 static int rowid_cmp(void *h, uchar *a, uchar *b)
6084 {
6085  return ((handler*)h)->cmp_ref(a, b);
6086 }
6087 
6088 
6107 {
6108  char *range_info;
6109  int res= 0;
6110  DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6111  DBUG_ASSERT(rowids_buf < rowids_buf_end);
6112 
6113  rowids_buf_cur= rowids_buf;
6114  while ((rowids_buf_cur < rowids_buf_end) &&
6115  !(res= h2->handler::multi_range_read_next(&range_info)))
6116  {
6117  KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6118  if (h2->mrr_funcs.skip_index_tuple &&
6119  h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6120  continue;
6121 
6122  /* Put rowid, or {rowid, range_id} pair into the buffer */
6123  h2->position(table->record[0]);
6124  memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6125  rowids_buf_cur += h2->ref_length;
6126 
6127  if (is_mrr_assoc)
6128  {
6129  memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6130  rowids_buf_cur += sizeof(void*);
6131  }
6132  }
6133 
6134  if (res && res != HA_ERR_END_OF_FILE)
6135  DBUG_RETURN(res);
6136  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
6137 
6138  /* Sort the buffer contents by rowid */
6139  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6140  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6141 
6142  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6143  (void*)h);
6144  rowids_buf_last= rowids_buf_cur;
6145  rowids_buf_cur= rowids_buf;
6146  DBUG_RETURN(0);
6147 }
6148 
6149 
6150 /*
6151  DS-MRR implementation: multi_range_read_next() function
6152 */
6153 
6154 int DsMrr_impl::dsmrr_next(char **range_info)
6155 {
6156  int res;
6157  uchar *cur_range_info= 0;
6158  uchar *rowid;
6159 
6160  if (use_default_impl)
6161  return h->handler::multi_range_read_next(range_info);
6162 
6163  do
6164  {
6165  if (rowids_buf_cur == rowids_buf_last)
6166  {
6167  if (dsmrr_eof)
6168  {
6169  res= HA_ERR_END_OF_FILE;
6170  goto end;
6171  }
6172 
6173  res= dsmrr_fill_buffer();
6174  if (res)
6175  goto end;
6176  }
6177 
6178  /* return eof if there are no rowids in the buffer after re-fill attempt */
6179  if (rowids_buf_cur == rowids_buf_last)
6180  {
6181  res= HA_ERR_END_OF_FILE;
6182  goto end;
6183  }
6184  rowid= rowids_buf_cur;
6185 
6186  if (is_mrr_assoc)
6187  memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6188 
6189  rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
6190  if (h2->mrr_funcs.skip_record &&
6191  h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
6192  continue;
6193  res= h->rnd_pos(table->record[0], rowid);
6194  break;
6195  } while (true);
6196 
6197  if (is_mrr_assoc)
6198  {
6199  memcpy(range_info, rowid + h->ref_length, sizeof(void*));
6200  }
6201 end:
6202  return res;
6203 }
6204 
6205 
6206 /*
6207  DS-MRR implementation: multi_range_read_info() function
6208 */
6209 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6210  uint *bufsz, uint *flags, Cost_estimate *cost)
6211 {
6212  ha_rows res;
6213  uint def_flags= *flags;
6214  uint def_bufsz= *bufsz;
6215 
6216  /* Get cost/flags/mem_usage of default MRR implementation */
6217  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6218  &def_flags, cost);
6219  DBUG_ASSERT(!res);
6220 
6221  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6222  choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6223  {
6224  /* Default implementation is choosen */
6225  DBUG_PRINT("info", ("Default MRR implementation choosen"));
6226  *flags= def_flags;
6227  *bufsz= def_bufsz;
6228  DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6229  }
6230  else
6231  {
6232  /* *flags and *bufsz were set by choose_mrr_impl */
6233  DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6234  }
6235  return 0;
6236 }
6237 
6238 
6239 /*
6240  DS-MRR Implementation: multi_range_read_info_const() function
6241 */
6242 
6243 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6244  void *seq_init_param, uint n_ranges,
6245  uint *bufsz, uint *flags, Cost_estimate *cost)
6246 {
6247  ha_rows rows;
6248  uint def_flags= *flags;
6249  uint def_bufsz= *bufsz;
6250  /* Get cost/flags/mem_usage of default MRR implementation */
6251  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
6252  n_ranges, &def_bufsz,
6253  &def_flags, cost);
6254  if (rows == HA_POS_ERROR)
6255  {
6256  /* Default implementation can't perform MRR scan => we can't either */
6257  return rows;
6258  }
6259 
6260  /*
6261  If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6262  use the default MRR implementation (we need it for UPDATE/DELETE).
6263  Otherwise, make a choice based on cost and mrr* flags of
6264  @@optimizer_switch.
6265  */
6266  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6267  choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6268  {
6269  DBUG_PRINT("info", ("Default MRR implementation choosen"));
6270  *flags= def_flags;
6271  *bufsz= def_bufsz;
6272  DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6273  }
6274  else
6275  {
6276  /* *flags and *bufsz were set by choose_mrr_impl */
6277  DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6278  }
6279  return rows;
6280 }
6281 
6282 
6306 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
6307  uint *bufsz, Cost_estimate *cost)
6308 {
6309  bool res;
6310  THD *thd= current_thd;
6311  if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
6312  *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
6313  (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
6314  key_uses_partial_cols(table, keyno))
6315  {
6316  /* Use the default implementation, don't modify args: See comments */
6317  return TRUE;
6318  }
6319 
6320  /*
6321  If @@optimizer_switch has "mrr_cost_based" on, we should avoid
6322  using DS-MRR for queries where it is likely that the records are
6323  stored in memory. Since there is currently no way to determine
6324  this, we use a heuristic:
6325  a) if the storage engine has a memory buffer, DS-MRR is only
6326  considered if the table size is bigger than the buffer.
6327  b) if the storage engine does not have a memory buffer, DS-MRR is
6328  only considered if the table size is bigger than 100MB.
6329  c) Since there is an initial setup cost of DS-MRR, so it is only
6330  considered if at least 50 records will be read.
6331  */
6332  if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))
6333  {
6334  /*
6335  If the storage engine has a database buffer we use this as the
6336  minimum size the table should have before considering DS-MRR.
6337  */
6338  longlong min_file_size= table->file->get_memory_buffer_size();
6339  if (min_file_size == -1)
6340  {
6341  // No estimate for database buffer
6342  min_file_size= 100 * 1024 * 1024; // 100 MB
6343  }
6344 
6345  if (table->file->stats.data_file_length <
6346  static_cast<ulonglong>(min_file_size) ||
6347  rows <= 50)
6348  return true; // Use the default implementation
6349  }
6350 
6351  Cost_estimate dsmrr_cost;
6352  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
6353  return TRUE;
6354 
6355  bool force_dsmrr;
6356  /*
6357  If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
6358  of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
6359  allows one to force use of DS-MRR whenever it is applicable without
6360  affecting other cost-based choices.
6361  */
6362  if ((force_dsmrr=
6363  (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) &&
6364  !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))) &&
6365  dsmrr_cost.total_cost() > cost->total_cost())
6366  dsmrr_cost= *cost;
6367 
6368  if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
6369  {
6370  *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
6371  *flags &= ~HA_MRR_SUPPORT_SORTED; /* We can't provide ordered output */
6372  *cost= dsmrr_cost;
6373  res= FALSE;
6374  }
6375  else
6376  {
6377  /* Use the default MRR implementation */
6378  res= TRUE;
6379  }
6380  return res;
6381 }
6382 
6383 
6384 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
6385  Cost_estimate *cost);
6386 
6387 
6402 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
6403  uint *buffer_size,
6404  Cost_estimate *cost)
6405 {
6406  ha_rows rows_in_last_step;
6407  uint n_full_steps;
6408  double index_read_cost;
6409 
6410  const uint elem_size= h->ref_length +
6411  sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
6412  const ha_rows max_buff_entries= *buffer_size / elem_size;
6413 
6414  if (!max_buff_entries)
6415  return TRUE; /* Buffer has not enough space for even 1 rowid */
6416 
6417  /* Number of iterations we'll make with full buffer */
6418  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
6419 
6420  /*
6421  Get numbers of rows we'll be processing in last iteration, with
6422  non-full buffer
6423  */
6424  rows_in_last_step= rows % max_buff_entries;
6425 
6426  DBUG_ASSERT(cost->is_zero());
6427 
6428  if (n_full_steps)
6429  {
6430  get_sort_and_sweep_cost(table, max_buff_entries, cost);
6431  cost->multiply(n_full_steps);
6432  }
6433  else
6434  {
6435  /*
6436  Adjust buffer size since only parts of the buffer will be used:
6437  1. Adjust record estimate for the last scan to reduce likelyhood
6438  of needing more than one scan by adding 20 percent to the
6439  record estimate and by ensuring this is at least 100 records.
6440  2. If the estimated needed buffer size is lower than suggested by
6441  the caller then set it to the estimated buffer size.
6442  */
6443  const ha_rows keys_in_buffer=
6444  max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
6445  *buffer_size= min<ulong>(*buffer_size,
6446  static_cast<ulong>(keys_in_buffer) * elem_size);
6447  }
6448 
6449  Cost_estimate last_step_cost;
6450  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
6451  (*cost)+= last_step_cost;
6452 
6453  /*
6454  Cost of memory is not included in the total_cost() function and
6455  thus will not be considered when comparing costs. Still, we
6456  record it in the cost estimate object for future use.
6457  */
6458  cost->add_mem(*buffer_size);
6459 
6460  /* Total cost of all index accesses */
6461  index_read_cost= h->index_only_read_time(keynr, rows);
6462  cost->add_io(index_read_cost * Cost_estimate::IO_BLOCK_READ_COST());
6463 
6464  /*
6465  Add CPU cost for processing records (see
6466  @handler::multi_range_read_info_const()).
6467  */
6468  cost->add_cpu(rows * ROW_EVALUATE_COST);
6469  return FALSE;
6470 }
6471 
6472 
6473 /*
6474  Get cost of one sort-and-sweep step
6475 
6476  SYNOPSIS
6477  get_sort_and_sweep_cost()
6478  table Table being accessed
6479  nrows Number of rows to be sorted and retrieved
6480  cost OUT The cost
6481 
6482  DESCRIPTION
6483  Get cost of these operations:
6484  - sort an array of #nrows ROWIDs using qsort
6485  - read #nrows records from table in a sweep.
6486 */
6487 
6488 static
6489 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
6490 {
6491  DBUG_ASSERT(cost->is_zero());
6492  if (nrows)
6493  {
6494  get_sweep_read_cost(table, nrows, FALSE, cost);
6495 
6496  /*
6497  Constant for the cost of doing one key compare operation in the
6498  sort operation. We should have used the existing
6499  ROWID_COMPARE_COST constant here but this would make the cost
6500  estimate of sorting very high for queries accessing many
6501  records. Until this constant is adjusted we introduce a constant
6502  that is more realistic. @todo: Replace this with
6503  ROWID_COMPARE_COST when this have been given a realistic value.
6504  */
6505  const double ROWID_COMPARE_SORT_COST = 0.01;
6506 
6507  /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
6508  const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
6509  cost->add_cpu(cpu_sort);
6510  }
6511 }
6512 
6513 
6557 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
6558  Cost_estimate *cost)
6559 {
6560  DBUG_ENTER("get_sweep_read_cost");
6561 
6562  DBUG_ASSERT(cost->is_zero());
6563  if(nrows > 0)
6564  {
6565  double n_blocks=
6566  ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
6567  if (n_blocks < 1.0) // When data_file_length is 0
6568  n_blocks= 1.0;
6569  double busy_blocks=
6570  n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
6571  if (busy_blocks < 1.0)
6572  busy_blocks= 1.0;
6573 
6574  DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
6575  busy_blocks));
6576  if (interrupted)
6577  cost->add_io(busy_blocks * Cost_estimate::IO_BLOCK_READ_COST());
6578  else
6579  /* Assume reading is done in one 'sweep' */
6580  cost->add_io(busy_blocks *
6581  (DISK_SEEK_BASE_COST +
6582  DISK_SEEK_PROP_COST * n_blocks / busy_blocks));
6583  }
6584  DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
6585  DBUG_VOID_RETURN;
6586 }
6587 
6588 
6589 /****************************************************************************
6590  * DS-MRR implementation ends
6591  ***************************************************************************/
6592 
6613  const key_range *end_key,
6614  bool eq_range_arg,
6615  bool sorted /* ignored */)
6616 {
6617  int result;
6618  DBUG_ENTER("handler::read_range_first");
6619 
6620  eq_range= eq_range_arg;
6621  set_end_range(end_key, RANGE_SCAN_ASC);
6622 
6623  range_key_part= table->key_info[active_index].key_part;
6624 
6625  if (!start_key) // Read first record
6626  result= ha_index_first(table->record[0]);
6627  else
6628  result= ha_index_read_map(table->record[0],
6629  start_key->key,
6630  start_key->keypart_map,
6631  start_key->flag);
6632  if (result)
6633  DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
6634  ? HA_ERR_END_OF_FILE
6635  : result);
6636 
6637  if (compare_key(end_range) <= 0)
6638  {
6639  DBUG_RETURN(0);
6640  }
6641  else
6642  {
6643  /*
6644  The last read row does not fall in the range. So request
6645  storage engine to release row lock if possible.
6646  */
6647  unlock_row();
6648  DBUG_RETURN(HA_ERR_END_OF_FILE);
6649  }
6650 }
6651 
6652 
6667 {
6668  int result;
6669  DBUG_ENTER("handler::read_range_next");
6670 
6671  if (eq_range)
6672  {
6673  /* We trust that index_next_same always gives a row in range */
6674  DBUG_RETURN(ha_index_next_same(table->record[0],
6675  end_range->key,
6676  end_range->length));
6677  }
6678  result= ha_index_next(table->record[0]);
6679  if (result)
6680  DBUG_RETURN(result);
6681 
6682  if (compare_key(end_range) <= 0)
6683  {
6684  DBUG_RETURN(0);
6685  }
6686  else
6687  {
6688  /*
6689  The last read row does not fall in the range. So request
6690  storage engine to release row lock if possible.
6691  */
6692  unlock_row();
6693  DBUG_RETURN(HA_ERR_END_OF_FILE);
6694  }
6695 }
6696 
6697 
6699  enum_range_scan_direction direction)
6700 {
6701  if (range)
6702  {
6703  save_end_range= *range;
6704  end_range= &save_end_range;
6705  range_key_part= table->key_info[active_index].key_part;
6706  key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
6707  (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
6708  }
6709  else
6710  end_range= NULL;
6711 
6712  range_scan_direction= direction;
6713 }
6714 
6715 
6732 {
6733  int cmp;
6734  if (!range || in_range_check_pushed_down)
6735  return 0; // No max range
6736  cmp= key_cmp(range_key_part, range->key, range->length);
6737  if (!cmp)
6738  cmp= key_compare_result_on_equal;
6739  return cmp;
6740 }
6741 
6742 
6743 /*
6744  Compare if a found key (in row) is within the range.
6745 
6746  This function is similar to compare_key() but checks the range scan
6747  direction to determine if this is a descending scan. This function
6748  is used by the index condition pushdown implementation to determine
6749  if the read record is within the range scan.
6750 
6751  @param range Range to compare to row. May be NULL for no range.
6752 
6753  @seealso
6754  handler::compare_key()
6755 
6756  @return Returns whether the key is within the range
6757 
6758  - 0 : Key is equal to range or 'range' == 0 (no range)
6759  - -1 : Key is within the current range
6760  - 1 : Key is outside the current range
6761 */
6762 
6763 int handler::compare_key_icp(const key_range *range) const
6764 {
6765  int cmp;
6766  if (!range)
6767  return 0; // no max range
6768  cmp= key_cmp(range_key_part, range->key, range->length);
6769  if (!cmp)
6770  cmp= key_compare_result_on_equal;
6771  if (range_scan_direction == RANGE_SCAN_DESC)
6772  cmp= -cmp;
6773  return cmp;
6774 }
6775 
6776 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
6777  key_part_map keypart_map,
6778  enum ha_rkey_function find_flag)
6779 {
6780  int error, error1;
6781  error= index_init(index, 0);
6782  if (!error)
6783  {
6784  error= index_read_map(buf, key, keypart_map, find_flag);
6785  error1= index_end();
6786  }
6787  return error ? error : error1;
6788 }
6789 
6790 
6801 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
6802  void *arg)
6803 {
6804  List<char> *found_exts= (List<char> *) arg;
6805  handlerton *hton= plugin_data(plugin, handlerton *);
6806  handler *file;
6807  if (hton->state == SHOW_OPTION_YES && hton->create &&
6808  (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
6809  {
6810  List_iterator_fast<char> it(*found_exts);
6811  const char **ext, *old_ext;
6812 
6813  for (ext= file->bas_ext(); *ext; ext++)
6814  {
6815  while ((old_ext= it++))
6816  {
6817  if (!strcmp(old_ext, *ext))
6818  break;
6819  }
6820  if (!old_ext)
6821  found_exts->push_back((char *) *ext);
6822 
6823  it.rewind();
6824  }
6825  delete file;
6826  }
6827  return FALSE;
6828 }
6829 
6830 TYPELIB* ha_known_exts()
6831 {
6832  TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
6833  known_extensions->name= "known_exts";
6834  known_extensions->type_lengths= NULL;
6835 
6836  List<char> found_exts;
6837  const char **ext, *old_ext;
6838 
6839  found_exts.push_back((char*) TRG_EXT);
6840  found_exts.push_back((char*) TRN_EXT);
6841 
6842  plugin_foreach(NULL, exts_handlerton,
6843  MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
6844 
6845  size_t arr_length= sizeof(char *)* (found_exts.elements+1);
6846  ext= (const char **) sql_alloc(arr_length);
6847 
6848  DBUG_ASSERT(NULL != ext);
6849  known_extensions->count= found_exts.elements;
6850  known_extensions->type_names= ext;
6851 
6852  List_iterator_fast<char> it(found_exts);
6853  while ((old_ext= it++))
6854  *ext++= old_ext;
6855  *ext= NULL;
6856  return known_extensions;
6857 }
6858 
6859 
6860 static bool stat_print(THD *thd, const char *type, uint type_len,
6861  const char *file, uint file_len,
6862  const char *status, uint status_len)
6863 {
6864  Protocol *protocol= thd->protocol;
6865  protocol->prepare_for_resend();
6866  protocol->store(type, type_len, system_charset_info);
6867  protocol->store(file, file_len, system_charset_info);
6868  protocol->store(status, status_len, system_charset_info);
6869  if (protocol->write())
6870  return TRUE;
6871  return FALSE;
6872 }
6873 
6874 
6875 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
6876  void *arg)
6877 {
6878  enum ha_stat_type stat= *(enum ha_stat_type *) arg;
6879  handlerton *hton= plugin_data(plugin, handlerton *);
6880  if (hton->state == SHOW_OPTION_YES && hton->show_status &&
6881  hton->show_status(hton, thd, stat_print, stat))
6882  return TRUE;
6883  return FALSE;
6884 }
6885 
6886 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
6887 {
6888  List<Item> field_list;
6889  Protocol *protocol= thd->protocol;
6890  bool result;
6891 
6892  field_list.push_back(new Item_empty_string("Type",10));
6893  field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
6894  field_list.push_back(new Item_empty_string("Status",10));
6895 
6896  if (protocol->send_result_set_metadata(&field_list,
6897  Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
6898  return TRUE;
6899 
6900  if (db_type == NULL)
6901  {
6902  result= plugin_foreach(thd, showstat_handlerton,
6903  MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
6904  }
6905  else
6906  {
6907  if (db_type->state != SHOW_OPTION_YES)
6908  {
6909  const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
6910  result= stat_print(thd, name->str, name->length,
6911  "", 0, "DISABLED", 8) ? 1 : 0;
6912  }
6913  else
6914  result= db_type->show_status &&
6915  db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
6916  }
6917 
6918  if (!result)
6919  my_eof(thd);
6920  return result;
6921 }
6922 
6923 /*
6924  Function to check if the conditions for row-based binlogging is
6925  correct for the table.
6926 
6927  A row in the given table should be replicated if:
6928  - Row-based replication is enabled in the current thread
6929  - The binlog is enabled
6930  - It is not a temporary table
6931  - The binary log is open
6932  - The database the table resides in shall be binlogged (binlog_*_db rules)
6933  - table is not mysql.event
6934 */
6935 
6936 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
6937 {
6938  if (table->s->cached_row_logging_check == -1)
6939  {
6940  int const check(table->s->tmp_table == NO_TMP_TABLE &&
6941  ! table->no_replicate &&
6942  binlog_filter->db_ok(table->s->db.str));
6943  table->s->cached_row_logging_check= check;
6944  }
6945 
6946  DBUG_ASSERT(table->s->cached_row_logging_check == 0 ||
6947  table->s->cached_row_logging_check == 1);
6948 
6949  return (thd->is_current_stmt_binlog_format_row() &&
6950  table->s->cached_row_logging_check &&
6951  (thd->variables.option_bits & OPTION_BIN_LOG) &&
6952  mysql_bin_log.is_open());
6953 }
6954 
6955 
6976 static int write_locked_table_maps(THD *thd)
6977 {
6978  DBUG_ENTER("write_locked_table_maps");
6979  DBUG_PRINT("enter", ("thd: 0x%lx thd->lock: 0x%lx "
6980  "thd->extra_lock: 0x%lx",
6981  (long) thd, (long) thd->lock, (long) thd->extra_lock));
6982 
6983  DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
6984 
6985  if (thd->get_binlog_table_maps() == 0)
6986  {
6987  MYSQL_LOCK *locks[2];
6988  locks[0]= thd->extra_lock;
6989  locks[1]= thd->lock;
6990  for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
6991  {
6992  MYSQL_LOCK const *const lock= locks[i];
6993  if (lock == NULL)
6994  continue;
6995 
6996  bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
6997  TABLE **const end_ptr= lock->table + lock->table_count;
6998  for (TABLE **table_ptr= lock->table ;
6999  table_ptr != end_ptr ;
7000  ++table_ptr)
7001  {
7002  TABLE *const table= *table_ptr;
7003  DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7004  if (table->current_lock == F_WRLCK &&
7005  check_table_binlog_row_based(thd, table))
7006  {
7007  /*
7008  We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7009  (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7010  compatible behavior with the STMT based replication even when
7011  the table is not transactional. In other words, if the operation
7012  fails while executing the insert phase nothing is written to the
7013  binlog.
7014 
7015  Note that at this point, we check the type of a set of tables to
7016  create the table map events. In the function binlog_log_row(),
7017  which calls the current function, we check the type of the table
7018  of the current row.
7019  */
7020  bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7021  table->file->has_transactions();
7022  int const error= thd->binlog_write_table_map(table, has_trans,
7023  need_binlog_rows_query);
7024  /* Binlog Rows_query log event once for one statement which updates
7025  two or more tables.*/
7026  if (need_binlog_rows_query)
7027  need_binlog_rows_query= FALSE;
7028  /*
7029  If an error occurs, it is the responsibility of the caller to
7030  roll back the transaction.
7031  */
7032  if (unlikely(error))
7033  DBUG_RETURN(1);
7034  }
7035  }
7036  }
7037  }
7038  DBUG_RETURN(0);
7039 }
7040 
7041 
7042 typedef bool Log_func(THD*, TABLE*, bool,
7043  const uchar*, const uchar*);
7044 
7046  const uchar *before_record,
7047  const uchar *after_record,
7048  Log_func *log_func)
7049 {
7050  bool error= 0;
7051  THD *const thd= table->in_use;
7052 
7053  if (check_table_binlog_row_based(thd, table))
7054  {
7055  DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
7056  (table->s->fields + 7) / 8);
7057 
7058  /*
7059  If there are no table maps written to the binary log, this is
7060  the first row handled in this statement. In that case, we need
7061  to write table maps for all locked tables to the binary log.
7062  */
7063  if (likely(!(error= write_locked_table_maps(thd))))
7064  {
7065  /*
7066  We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7067  (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7068  compatible behavior with the STMT based replication even when
7069  the table is not transactional. In other words, if the operation
7070  fails while executing the insert phase nothing is written to the
7071  binlog.
7072  */
7073  bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7074  table->file->has_transactions();
7075  error=
7076  (*log_func)(thd, table, has_trans, before_record, after_record);
7077  }
7078  }
7079  return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7080 }
7081 
7082 int handler::ha_external_lock(THD *thd, int lock_type)
7083 {
7084  int error;
7085  DBUG_ENTER("handler::ha_external_lock");
7086  /*
7087  Whether this is lock or unlock, this should be true, and is to verify that
7088  if get_auto_increment() was called (thus may have reserved intervals or
7089  taken a table lock), ha_release_auto_increment() was too.
7090  */
7091  DBUG_ASSERT(next_insert_id == 0);
7092  /* Consecutive calls for lock without unlocking in between is not allowed */
7093  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7094  ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
7095  lock_type == F_UNLCK));
7096  /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
7097  DBUG_ASSERT(inited == NONE || table->open_by_handler);
7098 
7099  if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
7100  MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
7101  MYSQL_HANDLER_UNLOCK_START_ENABLED())
7102  {
7103  if (lock_type == F_RDLCK)
7104  {
7105  MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
7106  table_share->table_name.str);
7107  }
7108  else if (lock_type == F_WRLCK)
7109  {
7110  MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
7111  table_share->table_name.str);
7112  }
7113  else if (lock_type == F_UNLCK)
7114  {
7115  MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
7116  table_share->table_name.str);
7117  }
7118  }
7119 
7120  ha_statistic_increment(&SSV::ha_external_lock_count);
7121 
7122  MYSQL_TABLE_LOCK_WAIT(m_psi, PSI_TABLE_EXTERNAL_LOCK, lock_type,
7123  { error= external_lock(thd, lock_type); })
7124 
7125  /*
7126  We cache the table flags if the locking succeeded. Otherwise, we
7127  keep them as they were when they were fetched in ha_open().
7128  */
7129 
7130  if (error == 0)
7131  {
7132  /*
7133  The lock type is needed by MRR when creating a clone of this handler
7134  object.
7135  */
7136  m_lock_type= lock_type;
7137  cached_table_flags= table_flags();
7138  }
7139 
7140  if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
7141  MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
7142  MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
7143  {
7144  if (lock_type == F_RDLCK)
7145  {
7146  MYSQL_HANDLER_RDLOCK_DONE(error);
7147  }
7148  else if (lock_type == F_WRLCK)
7149  {
7150  MYSQL_HANDLER_WRLOCK_DONE(error);
7151  }
7152  else if (lock_type == F_UNLCK)
7153  {
7154  MYSQL_HANDLER_UNLOCK_DONE(error);
7155  }
7156  }
7157  DBUG_RETURN(error);
7158 }
7159 
7160 
7167 {
7168  DBUG_ENTER("handler::ha_reset");
7169  /* Check that we have called all proper deallocation functions */
7170  DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
7171  table->s->column_bitmap_size ==
7172  (uchar*) table->def_write_set.bitmap);
7173  DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
7174  DBUG_ASSERT(table->key_read == 0);
7175  /* ensure that ha_index_end / ha_rnd_end has been called */
7176  DBUG_ASSERT(inited == NONE);
7177  /* Free cache used by filesort */
7178  free_io_cache(table);
7179  /* reset the bitmaps to point to defaults */
7180  table->default_column_bitmaps();
7181  /* Reset information about pushed engine conditions */
7182  pushed_cond= NULL;
7183  /* Reset information about pushed index conditions */
7185 
7186  const int retval= reset();
7187  DBUG_RETURN(retval);
7188 }
7189 
7190 
7191 int handler::ha_write_row(uchar *buf)
7192 {
7193  int error;
7194  Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
7195  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7196  m_lock_type == F_WRLCK);
7197 
7198  DBUG_ENTER("handler::ha_write_row");
7199  DBUG_EXECUTE_IF("inject_error_ha_write_row",
7200  DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
7201 
7202  MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
7203  mark_trx_read_write();
7204 
7205  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, 0,
7206  { error= write_row(buf); })
7207 
7208  MYSQL_INSERT_ROW_DONE(error);
7209  if (unlikely(error))
7210  DBUG_RETURN(error);
7211 
7212  if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
7213  DBUG_RETURN(error); /* purecov: inspected */
7214 
7215  DEBUG_SYNC_C("ha_write_row_end");
7216  DBUG_RETURN(0);
7217 }
7218 
7219 
7220 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
7221 {
7222  int error;
7223  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7224  m_lock_type == F_WRLCK);
7225  Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
7226 
7227  /*
7228  Some storage engines require that the new record is in record[0]
7229  (and the old record is in record[1]).
7230  */
7231  DBUG_ASSERT(new_data == table->record[0]);
7232  DBUG_ASSERT(old_data == table->record[1]);
7233 
7234  MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7235  mark_trx_read_write();
7236 
7237  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_UPDATE_ROW, active_index, 0,
7238  { error= update_row(old_data, new_data);})
7239 
7240  MYSQL_UPDATE_ROW_DONE(error);
7241  if (unlikely(error))
7242  return error;
7243  if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
7244  return error;
7245  return 0;
7246 }
7247 
7248 int handler::ha_delete_row(const uchar *buf)
7249 {
7250  int error;
7251  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7252  m_lock_type == F_WRLCK);
7253  Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
7254  /*
7255  Normally table->record[0] is used, but sometimes table->record[1] is used.
7256  */
7257  DBUG_ASSERT(buf == table->record[0] ||
7258  buf == table->record[1]);
7259  DBUG_EXECUTE_IF("inject_error_ha_delete_row",
7260  return HA_ERR_INTERNAL_ERROR; );
7261 
7262  MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
7263  mark_trx_read_write();
7264 
7265  MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_DELETE_ROW, active_index, 0,
7266  { error= delete_row(buf);})
7267 
7268  MYSQL_DELETE_ROW_DONE(error);
7269  if (unlikely(error))
7270  return error;
7271  if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
7272  return error;
7273  return 0;
7274 }
7275 
7276 
7277 
7284 {
7285  /* fallback to use all columns in the table to identify row */
7286  table->use_all_columns();
7287 }
7288 
7289 
7302 {
7303  DBUG_ENTER("handler::get_ha_share_ptr");
7304  DBUG_ASSERT(ha_share && table_share);
7305 
7306 #ifndef DBUG_OFF
7307  if (table_share->tmp_table == NO_TMP_TABLE)
7308  mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7309 #endif
7310 
7311  DBUG_RETURN(*ha_share);
7312 }
7313 
7314 
7325 {
7326  DBUG_ENTER("handler::set_ha_share_ptr");
7327  DBUG_ASSERT(ha_share);
7328 #ifndef DBUG_OFF
7329  if (table_share->tmp_table == NO_TMP_TABLE)
7330  mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7331 #endif
7332 
7333  *ha_share= arg_ha_share;
7334  DBUG_VOID_RETURN;
7335 }
7336 
7337 
7343 {
7344  DBUG_ASSERT(table_share);
7345  if (table_share->tmp_table == NO_TMP_TABLE)
7346  mysql_mutex_lock(&table_share->LOCK_ha_data);
7347 }
7348 
7349 
7355 {
7356  DBUG_ASSERT(table_share);
7357  if (table_share->tmp_table == NO_TMP_TABLE)
7358  mysql_mutex_unlock(&table_share->LOCK_ha_data);
7359 }
7360 
7361 
7366 void signal_log_not_needed(struct handlerton, char *log_file)
7367 {
7368  DBUG_ENTER("signal_log_not_needed");
7369  DBUG_PRINT("enter", ("logfile '%s'", log_file));
7370  DBUG_VOID_RETURN;
7371 }
7372 
7373 #ifdef TRANS_LOG_MGM_EXAMPLE_CODE
7374 /*
7375  Example of transaction log management functions based on assumption that logs
7376  placed into a directory
7377 */
7378 #include <my_dir.h>
7379 #include <my_sys.h>
7380 int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
7381 {
7382  void *buffer;
7383  int res= 1;
7384  struct handler_iterator iterator;
7385  struct handler_log_file_data data;
7386 
7387  if (!hton->create_iterator)
7388  return 1; /* iterator creator is not supported */
7389 
7390  if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
7391  HA_ITERATOR_OK)
7392  {
7393  /* error during creation of log iterator or iterator is not supported */
7394  return 1;
7395  }
7396  while((*iterator.next)(&iterator, (void*)&data) == 0)
7397  {
7398  printf("%s\n", data.filename.str);
7399  if (data.status == HA_LOG_STATUS_FREE &&
7400  mysql_file_delete(INSTRUMENT_ME,
7401  data.filename.str, MYF(MY_WME)))
7402  goto err;
7403  }
7404  res= 0;
7405 err:
7406  (*iterator.destroy)(&iterator);
7407  return res;
7408 }
7409 
7410 
7411 /*
7412  Here we should get info from handler where it save logs but here is
7413  just example, so we use constant.
7414  IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
7415  rights on it except root and it consist of directories only at lest for
7416  *nix (sorry, can't find windows-safe solution here, but it is only example).
7417 */
7418 #define fl_dir FN_ROOTDIR
7419 
7420 
7426 enum log_status fl_get_log_status(char *log)
7427 {
7428  MY_STAT stat_buff;
7429  if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
7430  return HA_LOG_STATUS_INUSE;
7431  return HA_LOG_STATUS_NOSUCHLOG;
7432 }
7433 
7434 
7435 struct fl_buff
7436 {
7437  LEX_STRING *names;
7438  enum log_status *statuses;
7439  uint32 entries;
7440  uint32 current;
7441 };
7442 
7443 
7444 int fl_log_iterator_next(struct handler_iterator *iterator,
7445  void *iterator_object)
7446 {
7447  struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
7448  struct handler_log_file_data *data=
7449  (struct handler_log_file_data *) iterator_object;
7450  if (buff->current >= buff->entries)
7451  return 1;
7452  data->filename= buff->names[buff->current];
7453  data->status= buff->statuses[buff->current];
7454  buff->current++;
7455  return 0;
7456 }
7457 
7458 
7459 void fl_log_iterator_destroy(struct handler_iterator *iterator)
7460 {
7461  my_free(iterator->buffer);
7462 }
7463 
7464 
7468 enum handler_create_iterator_result
7469 fl_log_iterator_buffer_init(struct handler_iterator *iterator)
7470 {
7471  MY_DIR *dirp;
7472  struct fl_buff *buff;
7473  char *name_ptr;
7474  uchar *ptr;
7475  FILEINFO *file;
7476  uint32 i;
7477 
7478  /* to be able to make my_free without crash in case of error */
7479  iterator->buffer= 0;
7480 
7481  if (!(dirp = my_dir(fl_dir, MYF(0))))
7482  {
7483  return HA_ITERATOR_ERROR;
7484  }
7485  if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
7486  ((ALIGN_SIZE(sizeof(LEX_STRING)) +
7487  sizeof(enum log_status) +
7488  + FN_REFLEN + 1) *
7489  (uint) dirp->number_off_files),
7490  MYF(0))) == 0)
7491  {
7492  return HA_ITERATOR_ERROR;
7493  }
7494  buff= (struct fl_buff *)ptr;
7495  buff->entries= buff->current= 0;
7496  ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
7497  buff->names= (LEX_STRING*) (ptr);
7498  ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
7499  (uint) dirp->number_off_files));
7500  buff->statuses= (enum log_status *)(ptr);
7501  name_ptr= (char *)(ptr + (sizeof(enum log_status) *
7502  (uint) dirp->number_off_files));
7503  for (i=0 ; i < (uint) dirp->number_off_files ; i++)
7504  {
7505  enum log_status st;
7506  file= dirp->dir_entry + i;
7507  if ((file->name[0] == '.' &&
7508  ((file->name[1] == '.' && file->name[2] == '\0') ||
7509  file->name[1] == '\0')))
7510  continue;
7511  if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
7512  continue;
7513  name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
7514  FN_REFLEN, fl_dir, file->name, NullS);
7515  buff->names[buff->entries].length= (name_ptr -
7516  buff->names[buff->entries].str);
7517  buff->statuses[buff->entries]= st;
7518  buff->entries++;
7519  }
7520 
7521  iterator->buffer= buff;
7522  iterator->next= &fl_log_iterator_next;
7523  iterator->destroy= &fl_log_iterator_destroy;
7524  return HA_ITERATOR_OK;
7525 }
7526 
7527 
7528 /* An example of a iterator creator */
7529 enum handler_create_iterator_result
7530 fl_create_iterator(enum handler_iterator_type type,
7531  struct handler_iterator *iterator)
7532 {
7533  switch(type) {
7534  case HA_TRANSACTLOG_ITERATOR:
7535  return fl_log_iterator_buffer_init(iterator);
7536  default:
7537  return HA_ITERATOR_UNSUPPORTED;
7538  }
7539 }
7540 #endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/