MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ha_partition.cc
1 /*
2  Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 /*
19  This handler was developed by Mikael Ronstrom for version 5.1 of MySQL.
20  It is an abstraction layer on top of other handlers such as MyISAM,
21  InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
22  be handled by a storage engine. The current example of this is NDB
23  Cluster that has internally handled partitioning. This have benefits in
24  that many loops needed in the partition handler can be avoided.
25 
26  Partitioning has an inherent feature which in some cases is positive and
27  in some cases is negative. It splits the data into chunks. This makes
28  the data more manageable, queries can easily be parallelised towards the
29  parts and indexes are split such that there are less levels in the
30  index trees. The inherent disadvantage is that to use a split index
31  one has to scan all index parts which is ok for large queries but for
32  small queries it can be a disadvantage.
33 
34  Partitioning lays the foundation for more manageable databases that are
35  extremely large. It does also lay the foundation for more parallelism
36  in the execution of queries. This functionality will grow with later
37  versions of MySQL.
38 
39  You can enable it in your buld by doing the following during your build
40  process:
41  ./configure --with-partition
42 
43  The partition is setup to use table locks. It implements an partition "SHARE"
44  that is inserted into a hash by table name. You can use this to store
45  information of state that any partition handler object will be able to see
46  if it is using the same table.
47 
48  Please read the object definition in ha_partition.h before reading the rest
49  if this file.
50 */
51 
52 #include "sql_priv.h"
53 #include "sql_parse.h" // append_file_to_dir
54 #include "binlog.h" // mysql_bin_log
55 
56 #ifdef WITH_PARTITION_STORAGE_ENGINE
57 #include "ha_partition.h"
58 #include "sql_table.h" // tablename_to_filename
59 #include "key.h"
60 #include "sql_plugin.h"
61 #include "sql_partition.h"
62 #include "sql_show.h" // append_identifier
63 #include "sql_admin.h" // SQL_ADMIN_MSG_TEXT_SIZE
64 
65 #include "debug_sync.h"
66 
67 using std::min;
68 using std::max;
69 
70 
71 /* First 4 bytes in the .par file is the number of 32-bit words in the file */
72 #define PAR_WORD_SIZE 4
73 /* offset to the .par file checksum */
74 #define PAR_CHECKSUM_OFFSET 4
75 /* offset to the total number of partitions */
76 #define PAR_NUM_PARTS_OFFSET 8
77 /* offset to the engines array */
78 #define PAR_ENGINES_OFFSET 12
79 #define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \
80  HA_REC_NOT_IN_SEQ | \
81  HA_CAN_REPAIR)
82 #define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
83  HA_CAN_FULLTEXT | \
84  HA_DUPLICATE_POS | \
85  HA_CAN_SQL_HANDLER | \
86  HA_CAN_INSERT_DELAYED | \
87  HA_READ_BEFORE_WRITE_REMOVAL)
88 static const char *ha_par_ext= ".par";
89 
90 /****************************************************************************
91  MODULE create/delete handler object
92 ****************************************************************************/
93 
94 static handler *partition_create_handler(handlerton *hton,
95  TABLE_SHARE *share,
96  MEM_ROOT *mem_root);
97 static uint partition_flags();
98 static uint alter_table_flags(uint flags);
99 
100 #ifdef HAVE_PSI_INTERFACE
101 PSI_mutex_key key_partition_auto_inc_mutex;
102 
103 static PSI_mutex_info all_partition_mutexes[]=
104 {
105  { &key_partition_auto_inc_mutex, "Partition_share::auto_inc_mutex", 0}
106 };
107 
108 static void init_partition_psi_keys(void)
109 {
110  const char* category= "partition";
111  int count;
112 
113  count= array_elements(all_partition_mutexes);
114  mysql_mutex_register(category, all_partition_mutexes, count);
115 }
116 #endif /* HAVE_PSI_INTERFACE */
117 
118 static int partition_initialize(void *p)
119 {
120 
121  handlerton *partition_hton;
122  partition_hton= (handlerton *)p;
123 
124  partition_hton->state= SHOW_OPTION_YES;
125  partition_hton->db_type= DB_TYPE_PARTITION_DB;
126  partition_hton->create= partition_create_handler;
127  partition_hton->partition_flags= partition_flags;
128  partition_hton->alter_table_flags= alter_table_flags;
129  partition_hton->flags= HTON_NOT_USER_SELECTABLE |
130  HTON_HIDDEN |
131  HTON_TEMPORARY_NOT_SUPPORTED;
132 #ifdef HAVE_PSI_INTERFACE
133  init_partition_psi_keys();
134 #endif
135  return 0;
136 }
137 
138 
149 bool Partition_share::init(uint num_parts)
150 {
151  DBUG_ENTER("Partition_share::init");
152  mysql_mutex_init(key_partition_auto_inc_mutex,
154  MY_MUTEX_INIT_FAST);
155  auto_inc_initialized= false;
160  DBUG_RETURN(true);
161  if (partitions_share_refs->init(num_parts))
162  {
163  delete partitions_share_refs;
164  DBUG_RETURN(true);
165  }
166  DBUG_RETURN(false);
167 }
168 
169 
170 /*
171  Create new partition handler
172 
173  SYNOPSIS
174  partition_create_handler()
175  table Table object
176 
177  RETURN VALUE
178  New partition object
179 */
180 
181 static handler *partition_create_handler(handlerton *hton,
182  TABLE_SHARE *share,
183  MEM_ROOT *mem_root)
184 {
185  ha_partition *file= new (mem_root) ha_partition(hton, share);
186  if (file && file->initialize_partition(mem_root))
187  {
188  delete file;
189  file= 0;
190  }
191  return file;
192 }
193 
194 /*
195  HA_CAN_PARTITION:
196  Used by storage engines that can handle partitioning without this
197  partition handler
198  (Partition, NDB)
199 
200  HA_CAN_UPDATE_PARTITION_KEY:
201  Set if the handler can update fields that are part of the partition
202  function.
203 
204  HA_CAN_PARTITION_UNIQUE:
205  Set if the handler can handle unique indexes where the fields of the
206  unique key are not part of the fields of the partition function. Thus
207  a unique key can be set on all fields.
208 
209  HA_USE_AUTO_PARTITION
210  Set if the handler sets all tables to be partitioned by default.
211 */
212 
213 static uint partition_flags()
214 {
215  return HA_CAN_PARTITION;
216 }
217 
218 static uint alter_table_flags(uint flags __attribute__((unused)))
219 {
220  return (HA_PARTITION_FUNCTION_SUPPORTED |
221  HA_FAST_CHANGE_PARTITION);
222 }
223 
224 const uint32 ha_partition::NO_CURRENT_PART_ID= NOT_A_PARTITION_ID;
225 
226 /*
227  Constructor method
228 
229  SYNOPSIS
230  ha_partition()
231  table Table object
232 
233  RETURN VALUE
234  NONE
235 */
236 
237 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
238  :handler(hton, share)
239 {
240  DBUG_ENTER("ha_partition::ha_partition(table)");
241  init_handler_variables();
242  DBUG_VOID_RETURN;
243 }
244 
245 
246 /*
247  Constructor method
248 
249  SYNOPSIS
250  ha_partition()
251  part_info Partition info
252 
253  RETURN VALUE
254  NONE
255 */
256 
257 ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
258  :handler(hton, NULL)
259 {
260  DBUG_ENTER("ha_partition::ha_partition(part_info)");
261  DBUG_ASSERT(part_info);
262  init_handler_variables();
263  m_part_info= part_info;
264  m_create_handler= TRUE;
265  m_is_sub_partitioned= m_part_info->is_sub_partitioned();
266  DBUG_VOID_RETURN;
267 }
268 
281 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
282  partition_info *part_info_arg,
283  ha_partition *clone_arg,
284  MEM_ROOT *clone_mem_root_arg)
285  :handler(hton, share)
286 {
287  DBUG_ENTER("ha_partition::ha_partition(clone)");
288  init_handler_variables();
289  m_part_info= part_info_arg;
290  m_create_handler= TRUE;
291  m_is_sub_partitioned= m_part_info->is_sub_partitioned();
292  m_is_clone_of= clone_arg;
293  m_clone_mem_root= clone_mem_root_arg;
294  part_share= clone_arg->part_share;
295  m_tot_parts= clone_arg->m_tot_parts;
296  m_pkey_is_clustered= clone_arg->primary_key_is_clustered();
297  DBUG_VOID_RETURN;
298 }
299 
300 /*
301  Initialize handler object
302 
303  SYNOPSIS
304  init_handler_variables()
305 
306  RETURN VALUE
307  NONE
308 */
309 
310 void ha_partition::init_handler_variables()
311 {
312  active_index= MAX_KEY;
313  m_mode= 0;
314  m_open_test_lock= 0;
315  m_file_buffer= NULL;
316  m_name_buffer_ptr= NULL;
317  m_engine_array= NULL;
318  m_file= NULL;
319  m_file_tot_parts= 0;
320  m_reorged_file= NULL;
321  m_new_file= NULL;
322  m_reorged_parts= 0;
323  m_added_file= NULL;
324  m_tot_parts= 0;
325  m_pkey_is_clustered= 0;
326  m_part_spec.start_part= NO_CURRENT_PART_ID;
327  m_scan_value= 2;
328  m_ref_length= 0;
329  m_part_spec.end_part= NO_CURRENT_PART_ID;
330  m_index_scan_type= partition_no_index_scan;
331  m_start_key.key= NULL;
332  m_start_key.length= 0;
333  m_myisam= FALSE;
334  m_innodb= FALSE;
335  m_extra_cache= FALSE;
336  m_extra_cache_size= 0;
337  m_extra_prepare_for_update= FALSE;
338  m_extra_cache_part_id= NO_CURRENT_PART_ID;
339  m_handler_status= handler_not_initialized;
340  m_low_byte_first= 1;
341  m_part_field_array= NULL;
342  m_ordered_rec_buffer= NULL;
343  m_top_entry= NO_CURRENT_PART_ID;
344  m_rec_length= 0;
345  m_last_part= 0;
346  m_rec0= 0;
347  m_err_rec= NULL;
348  m_curr_key_info[0]= NULL;
349  m_curr_key_info[1]= NULL;
350  m_part_func_monotonicity_info= NON_MONOTONIC;
351  auto_increment_lock= FALSE;
352  auto_increment_safe_stmt_log_lock= FALSE;
353  /*
354  this allows blackhole to work properly
355  */
356  m_num_locks= 0;
357  m_part_info= NULL;
358  m_create_handler= FALSE;
359  m_is_sub_partitioned= 0;
360  m_is_clone_of= NULL;
361  m_clone_mem_root= NULL;
362  part_share= NULL;
363  m_new_partitions_share_refs.empty();
364  m_part_ids_sorted_by_num_of_records= NULL;
365 
366 #ifdef DONT_HAVE_TO_BE_INITALIZED
367  m_start_key.flag= 0;
368  m_ordered= TRUE;
369 #endif
370 }
371 
372 
373 const char *ha_partition::table_type() const
374 {
375  // we can do this since we only support a single engine type
376  return m_file[0]->table_type();
377 }
378 
379 
380 /*
381  Destructor method
382 
383  SYNOPSIS
384  ~ha_partition()
385 
386  RETURN VALUE
387  NONE
388 */
389 
390 ha_partition::~ha_partition()
391 {
392  DBUG_ENTER("ha_partition::~ha_partition()");
393  if (m_new_partitions_share_refs.elements)
394  m_new_partitions_share_refs.delete_elements();
395  if (m_file != NULL)
396  {
397  uint i;
398  for (i= 0; i < m_tot_parts; i++)
399  delete m_file[i];
400  }
401  destroy_record_priority_queue();
402  my_free(m_part_ids_sorted_by_num_of_records);
403 
404  clear_handler_file();
405  DBUG_VOID_RETURN;
406 }
407 
408 
409 /*
410  Initialize partition handler object
411 
412  SYNOPSIS
413  initialize_partition()
414  mem_root Allocate memory through this
415 
416  RETURN VALUE
417  1 Error
418  0 Success
419 
420  DESCRIPTION
421 
422  The partition handler is only a layer on top of other engines. Thus it
423  can't really perform anything without the underlying handlers. Thus we
424  add this method as part of the allocation of a handler object.
425 
426  1) Allocation of underlying handlers
427  If we have access to the partition info we will allocate one handler
428  instance for each partition.
429  2) Allocation without partition info
430  The cases where we don't have access to this information is when called
431  in preparation for delete_table and rename_table and in that case we
432  only need to set HA_FILE_BASED. In that case we will use the .par file
433  that contains information about the partitions and their engines and
434  the names of each partition.
435  3) Table flags initialisation
436  We need also to set table flags for the partition handler. This is not
437  static since it depends on what storage engines are used as underlying
438  handlers.
439  The table flags is set in this routine to simulate the behaviour of a
440  normal storage engine
441  The flag HA_FILE_BASED will be set independent of the underlying handlers
442  4) Index flags initialisation
443  When knowledge exists on the indexes it is also possible to initialize the
444  index flags. Again the index flags must be initialized by using the under-
445  lying handlers since this is storage engine dependent.
446  The flag HA_READ_ORDER will be reset for the time being to indicate no
447  ordered output is available from partition handler indexes. Later a merge
448  sort will be performed using the underlying handlers.
449  5) primary_key_is_clustered, has_transactions and low_byte_first is
450  calculated here.
451 
452 */
453 
454 bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
455 {
456  handler **file_array, *file;
457  ulonglong check_table_flags;
458  DBUG_ENTER("ha_partition::initialize_partition");
459 
460  if (m_create_handler)
461  {
462  m_tot_parts= m_part_info->get_tot_partitions();
463  DBUG_ASSERT(m_tot_parts > 0);
464  if (new_handlers_from_part_info(mem_root))
465  DBUG_RETURN(1);
466  }
467  else if (!table_share || !table_share->normalized_path.str)
468  {
469  /*
470  Called with dummy table share (delete, rename and alter table).
471  Don't need to set-up anything.
472  */
473  DBUG_RETURN(0);
474  }
475  else if (get_from_handler_file(table_share->normalized_path.str,
476  mem_root, false))
477  {
478  my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0));
479  DBUG_RETURN(1);
480  }
481  /*
482  We create all underlying table handlers here. We do it in this special
483  method to be able to report allocation errors.
484 
485  Set up low_byte_first, primary_key_is_clustered and
486  has_transactions since they are called often in all kinds of places,
487  other parameters are calculated on demand.
488  Verify that all partitions have the same table_flags.
489  */
490  check_table_flags= m_file[0]->ha_table_flags();
491  m_low_byte_first= m_file[0]->low_byte_first();
492  m_pkey_is_clustered= TRUE;
493  file_array= m_file;
494  do
495  {
496  file= *file_array;
497  if (m_low_byte_first != file->low_byte_first())
498  {
499  // Cannot have handlers with different endian
500  my_error(ER_MIX_HANDLER_ERROR, MYF(0));
501  DBUG_RETURN(1);
502  }
503  if (!file->primary_key_is_clustered())
504  m_pkey_is_clustered= FALSE;
505  if (check_table_flags != file->ha_table_flags())
506  {
507  my_error(ER_MIX_HANDLER_ERROR, MYF(0));
508  DBUG_RETURN(1);
509  }
510  } while (*(++file_array));
511  m_handler_status= handler_initialized;
512  DBUG_RETURN(0);
513 }
514 
515 /****************************************************************************
516  MODULE meta data changes
517 ****************************************************************************/
518 /*
519  Delete a table
520 
521  SYNOPSIS
522  delete_table()
523  name Full path of table name
524 
525  RETURN VALUE
526  >0 Error
527  0 Success
528 
529  DESCRIPTION
530  Used to delete a table. By the time delete_table() has been called all
531  opened references to this table will have been closed (and your globally
532  shared references released. The variable name will just be the name of
533  the table. You will need to remove any files you have created at this
534  point.
535 
536  If you do not implement this, the default delete_table() is called from
537  handler.cc and it will delete all files with the file extentions returned
538  by bas_ext().
539 
540  Called from handler.cc by delete_table and ha_create_table(). Only used
541  during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
542  the storage engine.
543 */
544 
545 int ha_partition::delete_table(const char *name)
546 {
547  DBUG_ENTER("ha_partition::delete_table");
548 
549  DBUG_RETURN(del_ren_table(name, NULL));
550 }
551 
552 
553 /*
554  Rename a table
555 
556  SYNOPSIS
557  rename_table()
558  from Full path of old table name
559  to Full path of new table name
560 
561  RETURN VALUE
562  >0 Error
563  0 Success
564 
565  DESCRIPTION
566  Renames a table from one name to another from alter table call.
567 
568  If you do not implement this, the default rename_table() is called from
569  handler.cc and it will rename all files with the file extentions returned
570  by bas_ext().
571 
572  Called from sql_table.cc by mysql_rename_table().
573 */
574 
575 int ha_partition::rename_table(const char *from, const char *to)
576 {
577  DBUG_ENTER("ha_partition::rename_table");
578 
579  DBUG_RETURN(del_ren_table(from, to));
580 }
581 
582 
583 /*
584  Create the handler file (.par-file)
585 
586  SYNOPSIS
587  create_handler_files()
588  name Full path of table name
589  create_info Create info generated for CREATE TABLE
590 
591  RETURN VALUE
592  >0 Error
593  0 Success
594 
595  DESCRIPTION
596  create_handler_files is called to create any handler specific files
597  before opening the file with openfrm to later call ::create on the
598  file object.
599  In the partition handler this is used to store the names of partitions
600  and types of engines in the partitions.
601 */
602 
603 int ha_partition::create_handler_files(const char *path,
604  const char *old_path,
605  int action_flag,
606  HA_CREATE_INFO *create_info)
607 {
608  DBUG_ENTER("ha_partition::create_handler_files()");
609 
610  /*
611  We need to update total number of parts since we might write the handler
612  file as part of a partition management command
613  */
614  if (action_flag == CHF_DELETE_FLAG ||
615  action_flag == CHF_RENAME_FLAG)
616  {
617  char name[FN_REFLEN];
618  char old_name[FN_REFLEN];
619 
620  strxmov(name, path, ha_par_ext, NullS);
621  strxmov(old_name, old_path, ha_par_ext, NullS);
622  if ((action_flag == CHF_DELETE_FLAG &&
623  mysql_file_delete(key_file_partition, name, MYF(MY_WME))) ||
624  (action_flag == CHF_RENAME_FLAG &&
625  mysql_file_rename(key_file_partition, old_name, name, MYF(MY_WME))))
626  {
627  DBUG_RETURN(TRUE);
628  }
629  }
630  else if (action_flag == CHF_CREATE_FLAG)
631  {
632  if (create_handler_file(path))
633  {
634  my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
635  DBUG_RETURN(1);
636  }
637  }
638  DBUG_RETURN(0);
639 }
640 
641 
642 /*
643  Create a partitioned table
644 
645  SYNOPSIS
646  create()
647  name Full path of table name
648  table_arg Table object
649  create_info Create info generated for CREATE TABLE
650 
651  RETURN VALUE
652  >0 Error
653  0 Success
654 
655  DESCRIPTION
656  create() is called to create a table. The variable name will have the name
657  of the table. When create() is called you do not need to worry about
658  opening the table. Also, the FRM file will have already been created so
659  adjusting create_info will not do you any good. You can overwrite the frm
660  file at this point if you wish to change the table definition, but there
661  are no methods currently provided for doing that.
662 
663  Called from handler.cc by ha_create_table().
664 */
665 
666 int ha_partition::create(const char *name, TABLE *table_arg,
667  HA_CREATE_INFO *create_info)
668 {
669  int error;
670  char name_buff[FN_REFLEN], name_lc_buff[FN_REFLEN];
671  char *name_buffer_ptr;
672  const char *path;
673  uint i;
674  List_iterator_fast <partition_element> part_it(m_part_info->partitions);
675  partition_element *part_elem;
676  handler **file, **abort_file;
677  DBUG_ENTER("ha_partition::create");
678 
679  DBUG_ASSERT(*fn_rext((char*)name) == '\0');
680 
681  /* Not allowed to create temporary partitioned tables */
682  if (create_info && create_info->options & HA_LEX_CREATE_TMP_TABLE)
683  {
684  my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
685  DBUG_RETURN(TRUE);
686  }
687 
688  if (get_from_handler_file(name, ha_thd()->mem_root, false))
689  DBUG_RETURN(TRUE);
690  DBUG_ASSERT(m_file_buffer);
691  DBUG_PRINT("enter", ("name: (%s)", name));
692  name_buffer_ptr= m_name_buffer_ptr;
693  file= m_file;
694  /*
695  Since ha_partition has HA_FILE_BASED, it must alter underlying table names
696  if they do not have HA_FILE_BASED and lower_case_table_names == 2.
697  See Bug#37402, for Mac OS X.
698  The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
699  Using the first partitions handler, since mixing handlers is not allowed.
700  */
701  path= get_canonical_filename(*file, name, name_lc_buff);
702  for (i= 0; i < m_part_info->num_parts; i++)
703  {
704  part_elem= part_it++;
705  if (m_is_sub_partitioned)
706  {
707  uint j;
708  List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
709  for (j= 0; j < m_part_info->num_subparts; j++)
710  {
711  part_elem= sub_it++;
712  create_partition_name(name_buff, path, name_buffer_ptr,
713  NORMAL_PART_NAME, FALSE);
714  if ((error= set_up_table_before_create(table_arg, name_buff,
715  create_info, part_elem)) ||
716  ((error= (*file)->ha_create(name_buff, table_arg, create_info))))
717  goto create_error;
718 
719  name_buffer_ptr= strend(name_buffer_ptr) + 1;
720  file++;
721  }
722  }
723  else
724  {
725  create_partition_name(name_buff, path, name_buffer_ptr,
726  NORMAL_PART_NAME, FALSE);
727  if ((error= set_up_table_before_create(table_arg, name_buff,
728  create_info, part_elem)) ||
729  ((error= (*file)->ha_create(name_buff, table_arg, create_info))))
730  goto create_error;
731 
732  name_buffer_ptr= strend(name_buffer_ptr) + 1;
733  file++;
734  }
735  }
736  DBUG_RETURN(0);
737 
738 create_error:
739  name_buffer_ptr= m_name_buffer_ptr;
740  for (abort_file= file, file= m_file; file < abort_file; file++)
741  {
742  create_partition_name(name_buff, path, name_buffer_ptr, NORMAL_PART_NAME,
743  FALSE);
744  (void) (*file)->ha_delete_table((const char*) name_buff);
745  name_buffer_ptr= strend(name_buffer_ptr) + 1;
746  }
747  handler::delete_table(name);
748  DBUG_RETURN(error);
749 }
750 
751 
752 /*
753  Drop partitions as part of ALTER TABLE of partitions
754 
755  SYNOPSIS
756  drop_partitions()
757  path Complete path of db and table name
758 
759  RETURN VALUE
760  >0 Failure
761  0 Success
762 
763  DESCRIPTION
764  Use part_info object on handler object to deduce which partitions to
765  drop (each partition has a state attached to it)
766 */
767 
768 int ha_partition::drop_partitions(const char *path)
769 {
770  List_iterator<partition_element> part_it(m_part_info->partitions);
771  char part_name_buff[FN_REFLEN];
772  uint num_parts= m_part_info->partitions.elements;
773  uint num_subparts= m_part_info->num_subparts;
774  uint i= 0;
775  uint name_variant;
776  int ret_error;
777  int error= 0;
778  DBUG_ENTER("ha_partition::drop_partitions");
779 
780  /*
781  Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
782  We use m_file[0] as long as all partitions have the same storage engine.
783  */
784  DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
785  part_name_buff)));
786  do
787  {
788  partition_element *part_elem= part_it++;
789  if (part_elem->part_state == PART_TO_BE_DROPPED)
790  {
791  handler *file;
792  /*
793  This part is to be dropped, meaning the part or all its subparts.
794  */
795  name_variant= NORMAL_PART_NAME;
796  if (m_is_sub_partitioned)
797  {
798  List_iterator<partition_element> sub_it(part_elem->subpartitions);
799  uint j= 0, part;
800  do
801  {
802  partition_element *sub_elem= sub_it++;
803  part= i * num_subparts + j;
804  create_subpartition_name(part_name_buff, path,
805  part_elem->partition_name,
806  sub_elem->partition_name, name_variant);
807  file= m_file[part];
808  DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff));
809  if ((ret_error= file->ha_delete_table(part_name_buff)))
810  error= ret_error;
811  if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
812  error= 1;
813  } while (++j < num_subparts);
814  }
815  else
816  {
817  create_partition_name(part_name_buff, path,
818  part_elem->partition_name, name_variant,
819  TRUE);
820  file= m_file[i];
821  DBUG_PRINT("info", ("Drop partition %s", part_name_buff));
822  if ((ret_error= file->ha_delete_table(part_name_buff)))
823  error= ret_error;
824  if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
825  error= 1;
826  }
827  if (part_elem->part_state == PART_IS_CHANGED)
828  part_elem->part_state= PART_NORMAL;
829  else
830  part_elem->part_state= PART_IS_DROPPED;
831  }
832  } while (++i < num_parts);
833  (void) sync_ddl_log();
834  DBUG_RETURN(error);
835 }
836 
837 
838 /*
839  Rename partitions as part of ALTER TABLE of partitions
840 
841  SYNOPSIS
842  rename_partitions()
843  path Complete path of db and table name
844 
845  RETURN VALUE
846  TRUE Failure
847  FALSE Success
848 
849  DESCRIPTION
850  When reorganising partitions, adding hash partitions and coalescing
851  partitions it can be necessary to rename partitions while holding
852  an exclusive lock on the table.
853  Which partitions to rename is given by state of partitions found by the
854  partition info struct referenced from the handler object
855 */
856 
857 int ha_partition::rename_partitions(const char *path)
858 {
859  List_iterator<partition_element> part_it(m_part_info->partitions);
860  List_iterator<partition_element> temp_it(m_part_info->temp_partitions);
861  char part_name_buff[FN_REFLEN];
862  char norm_name_buff[FN_REFLEN];
863  uint num_parts= m_part_info->partitions.elements;
864  uint part_count= 0;
865  uint num_subparts= m_part_info->num_subparts;
866  uint i= 0;
867  uint j= 0;
868  int error= 0;
869  int ret_error;
870  uint temp_partitions= m_part_info->temp_partitions.elements;
871  handler *file;
872  partition_element *part_elem, *sub_elem;
873  DBUG_ENTER("ha_partition::rename_partitions");
874 
875  /*
876  Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
877  We use m_file[0] as long as all partitions have the same storage engine.
878  */
879  DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
880  norm_name_buff)));
881 
882  DEBUG_SYNC(ha_thd(), "before_rename_partitions");
883  if (temp_partitions)
884  {
885  /*
886  These are the reorganised partitions that have already been copied.
887  We delete the partitions and log the delete by inactivating the
888  delete log entry in the table log. We only need to synchronise
889  these writes before moving to the next loop since there is no
890  interaction among reorganised partitions, they cannot have the
891  same name.
892  */
893  do
894  {
895  part_elem= temp_it++;
896  if (m_is_sub_partitioned)
897  {
898  List_iterator<partition_element> sub_it(part_elem->subpartitions);
899  j= 0;
900  do
901  {
902  sub_elem= sub_it++;
903  file= m_reorged_file[part_count++];
904  create_subpartition_name(norm_name_buff, path,
905  part_elem->partition_name,
906  sub_elem->partition_name,
907  NORMAL_PART_NAME);
908  DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
909  if ((ret_error= file->ha_delete_table(norm_name_buff)))
910  error= ret_error;
911  else if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
912  error= 1;
913  else
914  sub_elem->log_entry= NULL; /* Indicate success */
915  } while (++j < num_subparts);
916  }
917  else
918  {
919  file= m_reorged_file[part_count++];
920  create_partition_name(norm_name_buff, path,
921  part_elem->partition_name, NORMAL_PART_NAME,
922  TRUE);
923  DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
924  if ((ret_error= file->ha_delete_table(norm_name_buff)))
925  error= ret_error;
926  else if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
927  error= 1;
928  else
929  part_elem->log_entry= NULL; /* Indicate success */
930  }
931  } while (++i < temp_partitions);
932  (void) sync_ddl_log();
933  }
934  i= 0;
935  do
936  {
937  /*
938  When state is PART_IS_CHANGED it means that we have created a new
939  TEMP partition that is to be renamed to normal partition name and
940  we are to delete the old partition with currently the normal name.
941 
942  We perform this operation by
943  1) Delete old partition with normal partition name
944  2) Signal this in table log entry
945  3) Synch table log to ensure we have consistency in crashes
946  4) Rename temporary partition name to normal partition name
947  5) Signal this to table log entry
948  It is not necessary to synch the last state since a new rename
949  should not corrupt things if there was no temporary partition.
950 
951  The only other parts we need to cater for are new parts that
952  replace reorganised parts. The reorganised parts were deleted
953  by the code above that goes through the temp_partitions list.
954  Thus the synch above makes it safe to simply perform step 4 and 5
955  for those entries.
956  */
957  part_elem= part_it++;
958  if (part_elem->part_state == PART_IS_CHANGED ||
959  part_elem->part_state == PART_TO_BE_DROPPED ||
960  (part_elem->part_state == PART_IS_ADDED && temp_partitions))
961  {
962  if (m_is_sub_partitioned)
963  {
964  List_iterator<partition_element> sub_it(part_elem->subpartitions);
965  uint part;
966 
967  j= 0;
968  do
969  {
970  sub_elem= sub_it++;
971  part= i * num_subparts + j;
972  create_subpartition_name(norm_name_buff, path,
973  part_elem->partition_name,
974  sub_elem->partition_name,
975  NORMAL_PART_NAME);
976  if (part_elem->part_state == PART_IS_CHANGED)
977  {
978  file= m_reorged_file[part_count++];
979  DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
980  if ((ret_error= file->ha_delete_table(norm_name_buff)))
981  error= ret_error;
982  else if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
983  error= 1;
984  (void) sync_ddl_log();
985  }
986  file= m_new_file[part];
987  create_subpartition_name(part_name_buff, path,
988  part_elem->partition_name,
989  sub_elem->partition_name,
990  TEMP_PART_NAME);
991  DBUG_PRINT("info", ("Rename subpartition from %s to %s",
992  part_name_buff, norm_name_buff));
993  if ((ret_error= file->ha_rename_table(part_name_buff,
994  norm_name_buff)))
995  error= ret_error;
996  else if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
997  error= 1;
998  else
999  sub_elem->log_entry= NULL;
1000  } while (++j < num_subparts);
1001  }
1002  else
1003  {
1004  create_partition_name(norm_name_buff, path,
1005  part_elem->partition_name, NORMAL_PART_NAME,
1006  TRUE);
1007  if (part_elem->part_state == PART_IS_CHANGED)
1008  {
1009  file= m_reorged_file[part_count++];
1010  DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1011  if ((ret_error= file->ha_delete_table(norm_name_buff)))
1012  error= ret_error;
1013  else if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
1014  error= 1;
1015  (void) sync_ddl_log();
1016  }
1017  file= m_new_file[i];
1018  create_partition_name(part_name_buff, path,
1019  part_elem->partition_name, TEMP_PART_NAME,
1020  TRUE);
1021  DBUG_PRINT("info", ("Rename partition from %s to %s",
1022  part_name_buff, norm_name_buff));
1023  if ((ret_error= file->ha_rename_table(part_name_buff,
1024  norm_name_buff)))
1025  error= ret_error;
1026  else if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
1027  error= 1;
1028  else
1029  part_elem->log_entry= NULL;
1030  }
1031  }
1032  } while (++i < num_parts);
1033  (void) sync_ddl_log();
1034  DBUG_RETURN(error);
1035 }
1036 
1037 
1038 #define OPTIMIZE_PARTS 1
1039 #define ANALYZE_PARTS 2
1040 #define CHECK_PARTS 3
1041 #define REPAIR_PARTS 4
1042 #define ASSIGN_KEYCACHE_PARTS 5
1043 #define PRELOAD_KEYS_PARTS 6
1044 
1045 static const char *opt_op_name[]= {NULL,
1046  "optimize", "analyze", "check", "repair",
1047  "assign_to_keycache", "preload_keys"};
1048 
1049 /*
1050  Optimize table
1051 
1052  SYNOPSIS
1053  optimize()
1054  thd Thread object
1055  check_opt Check/analyze/repair/optimize options
1056 
1057  RETURN VALUES
1058  >0 Error
1059  0 Success
1060 */
1061 
1062 int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt)
1063 {
1064  DBUG_ENTER("ha_partition::optimize");
1065 
1066  DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS));
1067 }
1068 
1069 
1070 /*
1071  Analyze table
1072 
1073  SYNOPSIS
1074  analyze()
1075  thd Thread object
1076  check_opt Check/analyze/repair/optimize options
1077 
1078  RETURN VALUES
1079  >0 Error
1080  0 Success
1081 */
1082 
1083 int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt)
1084 {
1085  DBUG_ENTER("ha_partition::analyze");
1086 
1087  DBUG_RETURN(handle_opt_partitions(thd, check_opt, ANALYZE_PARTS));
1088 }
1089 
1090 
1091 /*
1092  Check table
1093 
1094  SYNOPSIS
1095  check()
1096  thd Thread object
1097  check_opt Check/analyze/repair/optimize options
1098 
1099  RETURN VALUES
1100  >0 Error
1101  0 Success
1102 */
1103 
1104 int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt)
1105 {
1106  DBUG_ENTER("ha_partition::check");
1107 
1108  DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS));
1109 }
1110 
1111 
1112 /*
1113  Repair table
1114 
1115  SYNOPSIS
1116  repair()
1117  thd Thread object
1118  check_opt Check/analyze/repair/optimize options
1119 
1120  RETURN VALUES
1121  >0 Error
1122  0 Success
1123 */
1124 
1125 int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt)
1126 {
1127  DBUG_ENTER("ha_partition::repair");
1128 
1129  DBUG_RETURN(handle_opt_partitions(thd, check_opt, REPAIR_PARTS));
1130 }
1131 
1143 int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt)
1144 {
1145  DBUG_ENTER("ha_partition::assign_to_keycache");
1146 
1147  DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS));
1148 }
1149 
1150 
1162 int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt)
1163 {
1164  DBUG_ENTER("ha_partition::preload_keys");
1165 
1166  DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS));
1167 }
1168 
1169 
1170 /*
1171  Handle optimize/analyze/check/repair of one partition
1172 
1173  SYNOPSIS
1174  handle_opt_part()
1175  thd Thread object
1176  check_opt Options
1177  file Handler object of partition
1178  flag Optimize/Analyze/Check/Repair flag
1179 
1180  RETURN VALUE
1181  >0 Failure
1182  0 Success
1183 */
1184 
1185 int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
1186  uint part_id, uint flag)
1187 {
1188  int error;
1189  handler *file= m_file[part_id];
1190  DBUG_ENTER("handle_opt_part");
1191  DBUG_PRINT("enter", ("flag = %u", flag));
1192 
1193  if (flag == OPTIMIZE_PARTS)
1194  error= file->ha_optimize(thd, check_opt);
1195  else if (flag == ANALYZE_PARTS)
1196  error= file->ha_analyze(thd, check_opt);
1197  else if (flag == CHECK_PARTS)
1198  {
1199  error= file->ha_check(thd, check_opt);
1200  if (!error ||
1201  error == HA_ADMIN_ALREADY_DONE ||
1202  error == HA_ADMIN_NOT_IMPLEMENTED)
1203  {
1204  if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1205  error= check_misplaced_rows(part_id, false);
1206  }
1207  }
1208  else if (flag == REPAIR_PARTS)
1209  {
1210  error= file->ha_repair(thd, check_opt);
1211  if (!error ||
1212  error == HA_ADMIN_ALREADY_DONE ||
1213  error == HA_ADMIN_NOT_IMPLEMENTED)
1214  {
1215  if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1216  error= check_misplaced_rows(part_id, true);
1217  }
1218  }
1219  else if (flag == ASSIGN_KEYCACHE_PARTS)
1220  error= file->assign_to_keycache(thd, check_opt);
1221  else if (flag == PRELOAD_KEYS_PARTS)
1222  error= file->preload_keys(thd, check_opt);
1223  else
1224  {
1225  DBUG_ASSERT(FALSE);
1226  error= 1;
1227  }
1228  if (error == HA_ADMIN_ALREADY_DONE)
1229  error= 0;
1230  DBUG_RETURN(error);
1231 }
1232 
1233 
1234 /*
1235  print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE
1236  (modelled after mi_check_print_msg)
1237  TODO: move this into the handler, or rewrite mysql_admin_table.
1238 */
1239 static bool print_admin_msg(THD* thd, uint len,
1240  const char* msg_type,
1241  const char* db_name, const char* table_name,
1242  const char* op_name, const char *fmt, ...)
1243  ATTRIBUTE_FORMAT(printf, 7, 8);
1244 static bool print_admin_msg(THD* thd, uint len,
1245  const char* msg_type,
1246  const char* db_name, const char* table_name,
1247  const char* op_name, const char *fmt, ...)
1248 {
1249  va_list args;
1250  Protocol *protocol= thd->protocol;
1251  uint length;
1252  uint msg_length;
1253  char name[NAME_LEN*2+2];
1254  char *msgbuf;
1255  bool error= true;
1256 
1257  if (!(msgbuf= (char*) my_malloc(len, MYF(0))))
1258  return true;
1259  va_start(args, fmt);
1260  msg_length= my_vsnprintf(msgbuf, len, fmt, args);
1261  va_end(args);
1262  if (msg_length >= (len - 1))
1263  goto err;
1264  msgbuf[len - 1] = 0; // healthy paranoia
1265 
1266 
1267  if (!thd->vio_ok())
1268  {
1269  sql_print_error("%s", msgbuf);
1270  goto err;
1271  }
1272 
1273  length=(uint) (strxmov(name, db_name, ".", table_name,NullS) - name);
1274  /*
1275  TODO: switch from protocol to push_warning here. The main reason we didn't
1276  it yet is parallel repair. Due to following trace:
1277  mi_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.
1278 
1279  Also we likely need to lock mutex here (in both cases with protocol and
1280  push_warning).
1281  */
1282  DBUG_PRINT("info",("print_admin_msg: %s, %s, %s, %s", name, op_name,
1283  msg_type, msgbuf));
1284  protocol->prepare_for_resend();
1285  protocol->store(name, length, system_charset_info);
1286  protocol->store(op_name, system_charset_info);
1287  protocol->store(msg_type, system_charset_info);
1288  protocol->store(msgbuf, msg_length, system_charset_info);
1289  if (protocol->write())
1290  {
1291  sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
1292  msgbuf);
1293  goto err;
1294  }
1295  error= false;
1296 err:
1297  my_free(msgbuf);
1298  return error;
1299 }
1300 
1301 
1302 /*
1303  Handle optimize/analyze/check/repair of partitions
1304 
1305  SYNOPSIS
1306  handle_opt_partitions()
1307  thd Thread object
1308  check_opt Options
1309  flag Optimize/Analyze/Check/Repair flag
1310 
1311  RETURN VALUE
1312  >0 Failure
1313  0 Success
1314 */
1315 
1316 int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
1317  uint flag)
1318 {
1319  List_iterator<partition_element> part_it(m_part_info->partitions);
1320  uint num_parts= m_part_info->num_parts;
1321  uint num_subparts= m_part_info->num_subparts;
1322  uint i= 0;
1323  int error;
1324  DBUG_ENTER("ha_partition::handle_opt_partitions");
1325  DBUG_PRINT("enter", ("flag= %u", flag));
1326 
1327  do
1328  {
1329  partition_element *part_elem= part_it++;
1330  /*
1331  when ALTER TABLE <CMD> PARTITION ...
1332  it should only do named partitions, otherwise all partitions
1333  */
1334  if (!(thd->lex->alter_info.flags & Alter_info::ALTER_ADMIN_PARTITION) ||
1335  part_elem->part_state == PART_ADMIN)
1336  {
1337  if (m_is_sub_partitioned)
1338  {
1339  List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1340  partition_element *sub_elem;
1341  uint j= 0, part;
1342  do
1343  {
1344  sub_elem= subpart_it++;
1345  part= i * num_subparts + j;
1346  DBUG_PRINT("info", ("Optimize subpartition %u (%s)",
1347  part, sub_elem->partition_name));
1348  if ((error= handle_opt_part(thd, check_opt, part, flag)))
1349  {
1350  /* print a line which partition the error belongs to */
1351  if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1352  error != HA_ADMIN_ALREADY_DONE &&
1353  error != HA_ADMIN_TRY_ALTER)
1354  {
1355  print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
1356  table_share->db.str, table->alias,
1357  opt_op_name[flag],
1358  "Subpartition %s returned error",
1359  sub_elem->partition_name);
1360  }
1361  /* reset part_state for the remaining partitions */
1362  do
1363  {
1364  if (part_elem->part_state == PART_ADMIN)
1365  part_elem->part_state= PART_NORMAL;
1366  } while ((part_elem= part_it++));
1367  DBUG_RETURN(error);
1368  }
1369  } while (++j < num_subparts);
1370  }
1371  else
1372  {
1373  DBUG_PRINT("info", ("Optimize partition %u (%s)", i,
1374  part_elem->partition_name));
1375  if ((error= handle_opt_part(thd, check_opt, i, flag)))
1376  {
1377  /* print a line which partition the error belongs to */
1378  if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1379  error != HA_ADMIN_ALREADY_DONE &&
1380  error != HA_ADMIN_TRY_ALTER)
1381  {
1382  print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
1383  table_share->db.str, table->alias,
1384  opt_op_name[flag], "Partition %s returned error",
1385  part_elem->partition_name);
1386  }
1387  /* reset part_state for the remaining partitions */
1388  do
1389  {
1390  if (part_elem->part_state == PART_ADMIN)
1391  part_elem->part_state= PART_NORMAL;
1392  } while ((part_elem= part_it++));
1393  DBUG_RETURN(error);
1394  }
1395  }
1396  part_elem->part_state= PART_NORMAL;
1397  }
1398  } while (++i < num_parts);
1399  DBUG_RETURN(FALSE);
1400 }
1401 
1402 
1414 bool ha_partition::check_and_repair(THD *thd)
1415 {
1416  handler **file= m_file;
1417  DBUG_ENTER("ha_partition::check_and_repair");
1418 
1419  do
1420  {
1421  if ((*file)->ha_check_and_repair(thd))
1422  DBUG_RETURN(TRUE);
1423  } while (*(++file));
1424  DBUG_RETURN(FALSE);
1425 }
1426 
1427 
1435 bool ha_partition::auto_repair() const
1436 {
1437  DBUG_ENTER("ha_partition::auto_repair");
1438 
1439  /*
1440  As long as we only support one storage engine per table,
1441  we can use the first partition for this function.
1442  */
1443  DBUG_RETURN(m_file[0]->auto_repair());
1444 }
1445 
1446 
1454 bool ha_partition::is_crashed() const
1455 {
1456  handler **file= m_file;
1457  DBUG_ENTER("ha_partition::is_crashed");
1458 
1459  do
1460  {
1461  if ((*file)->is_crashed())
1462  DBUG_RETURN(TRUE);
1463  } while (*(++file));
1464  DBUG_RETURN(FALSE);
1465 }
1466 
1467 
1468 /*
1469  Prepare by creating a new partition
1470 
1471  SYNOPSIS
1472  prepare_new_partition()
1473  table Table object
1474  create_info Create info from CREATE TABLE
1475  file Handler object of new partition
1476  part_name partition name
1477 
1478  RETURN VALUE
1479  >0 Error
1480  0 Success
1481 */
1482 
1483 int ha_partition::prepare_new_partition(TABLE *tbl,
1484  HA_CREATE_INFO *create_info,
1485  handler *file, const char *part_name,
1486  partition_element *p_elem)
1487 {
1488  int error;
1489  DBUG_ENTER("prepare_new_partition");
1490 
1491  /*
1492  This call to set_up_table_before_create() is done for an alter table.
1493  So this may be the second time around for this partition_element,
1494  depending on how many partitions and subpartitions there were before,
1495  and how many there are now.
1496  The first time, on the CREATE, data_file_name and index_file_name
1497  came from the parser. They did not have the file name attached to
1498  the end. But if this partition is less than the total number of
1499  previous partitions, it's data_file_name has the filename attached.
1500  So we need to take the partition filename off if it exists.
1501  That file name may be different from part_name, which will be
1502  attached in append_file_to_dir().
1503  */
1504  truncate_partition_filename(p_elem->data_file_name);
1505  truncate_partition_filename(p_elem->index_file_name);
1506 
1507  if ((error= set_up_table_before_create(tbl, part_name, create_info, p_elem)))
1508  goto error_create;
1509 
1510  if ((error= file->ha_create(part_name, tbl, create_info)))
1511  {
1512  /*
1513  Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY
1514  if the table/partition already exists.
1515  If we return that error code, then print_error would try to
1516  get_dup_key on a non-existing partition.
1517  So return a more reasonable error code.
1518  */
1519  if (error == HA_ERR_FOUND_DUPP_KEY)
1520  error= HA_ERR_TABLE_EXIST;
1521  goto error_create;
1522  }
1523  DBUG_PRINT("info", ("partition %s created", part_name));
1524  if ((error= file->ha_open(tbl, part_name, m_mode,
1525  m_open_test_lock | HA_OPEN_NO_PSI_CALL)))
1526  goto error_open;
1527  DBUG_PRINT("info", ("partition %s opened", part_name));
1528  /*
1529  Note: if you plan to add another call that may return failure,
1530  better to do it before external_lock() as cleanup_new_partition()
1531  assumes that external_lock() is last call that may fail here.
1532  Otherwise see description for cleanup_new_partition().
1533  */
1534  if ((error= file->ha_external_lock(ha_thd(), F_WRLCK)))
1535  goto error_external_lock;
1536  DBUG_PRINT("info", ("partition %s external locked", part_name));
1537 
1538  DBUG_RETURN(0);
1539 error_external_lock:
1540  (void) file->ha_close();
1541 error_open:
1542  (void) file->ha_delete_table(part_name);
1543 error_create:
1544  DBUG_RETURN(error);
1545 }
1546 
1547 
1548 /*
1549  Cleanup by removing all created partitions after error
1550 
1551  SYNOPSIS
1552  cleanup_new_partition()
1553  part_count Number of partitions to remove
1554 
1555  RETURN VALUE
1556  NONE
1557 
1558  DESCRIPTION
1559  This function is called immediately after prepare_new_partition() in
1560  case the latter fails.
1561 
1562  In prepare_new_partition() last call that may return failure is
1563  external_lock(). That means if prepare_new_partition() fails,
1564  partition does not have external lock. Thus no need to call
1565  external_lock(F_UNLCK) here.
1566 
1567  TODO:
1568  We must ensure that in the case that we get an error during the process
1569  that we call external_lock with F_UNLCK, close the table and delete the
1570  table in the case where we have been successful with prepare_handler.
1571  We solve this by keeping an array of successful calls to prepare_handler
1572  which can then be used to undo the call.
1573 */
1574 
1575 void ha_partition::cleanup_new_partition(uint part_count)
1576 {
1577  DBUG_ENTER("ha_partition::cleanup_new_partition");
1578 
1579  if (m_added_file)
1580  {
1581  THD *thd= ha_thd();
1582  handler **file= m_added_file;
1583  while ((part_count > 0) && (*file))
1584  {
1585  (*file)->ha_external_lock(thd, F_UNLCK);
1586  (*file)->ha_close();
1587 
1588  /* Leave the (*file)->ha_delete_table(part_name) to the ddl-log */
1589 
1590  file++;
1591  part_count--;
1592  }
1593  m_added_file= NULL;
1594  }
1595  DBUG_VOID_RETURN;
1596 }
1597 
1598 /*
1599  Implement the partition changes defined by ALTER TABLE of partitions
1600 
1601  SYNOPSIS
1602  change_partitions()
1603  create_info HA_CREATE_INFO object describing all
1604  fields and indexes in table
1605  path Complete path of db and table name
1606  out: copied Output parameter where number of copied
1607  records are added
1608  out: deleted Output parameter where number of deleted
1609  records are added
1610  pack_frm_data Reference to packed frm file
1611  pack_frm_len Length of packed frm file
1612 
1613  RETURN VALUE
1614  >0 Failure
1615  0 Success
1616 
1617  DESCRIPTION
1618  Add and copy if needed a number of partitions, during this operation
1619  no other operation is ongoing in the server. This is used by
1620  ADD PARTITION all types as well as by REORGANIZE PARTITION. For
1621  one-phased implementations it is used also by DROP and COALESCE
1622  PARTITIONs.
1623  One-phased implementation needs the new frm file, other handlers will
1624  get zero length and a NULL reference here.
1625 */
1626 
1627 int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
1628  const char *path,
1629  ulonglong * const copied,
1630  ulonglong * const deleted,
1631  const uchar *pack_frm_data
1632  __attribute__((unused)),
1633  size_t pack_frm_len
1634  __attribute__((unused)))
1635 {
1636  List_iterator<partition_element> part_it(m_part_info->partitions);
1637  List_iterator <partition_element> t_it(m_part_info->temp_partitions);
1638  char part_name_buff[FN_REFLEN];
1639  uint num_parts= m_part_info->partitions.elements;
1640  uint num_subparts= m_part_info->num_subparts;
1641  uint i= 0;
1642  uint num_remain_partitions, part_count, orig_count;
1643  handler **new_file_array;
1644  int error= 1;
1645  bool first;
1646  uint temp_partitions= m_part_info->temp_partitions.elements;
1647  THD *thd= ha_thd();
1648  DBUG_ENTER("ha_partition::change_partitions");
1649 
1650  /*
1651  Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
1652  We use m_file[0] as long as all partitions have the same storage engine.
1653  */
1654  DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
1655  part_name_buff)));
1656  m_reorged_parts= 0;
1657  if (!m_part_info->is_sub_partitioned())
1658  num_subparts= 1;
1659 
1660  /*
1661  Step 1:
1662  Calculate number of reorganised partitions and allocate space for
1663  their handler references.
1664  */
1665  if (temp_partitions)
1666  {
1667  m_reorged_parts= temp_partitions * num_subparts;
1668  }
1669  else
1670  {
1671  do
1672  {
1673  partition_element *part_elem= part_it++;
1674  if (part_elem->part_state == PART_CHANGED ||
1675  part_elem->part_state == PART_REORGED_DROPPED)
1676  {
1677  m_reorged_parts+= num_subparts;
1678  }
1679  } while (++i < num_parts);
1680  }
1681  if (m_reorged_parts &&
1682  !(m_reorged_file= (handler**)sql_calloc(sizeof(handler*)*
1683  (m_reorged_parts + 1))))
1684  {
1685  mem_alloc_error(sizeof(handler*)*(m_reorged_parts+1));
1686  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1687  }
1688 
1689  /*
1690  Step 2:
1691  Calculate number of partitions after change and allocate space for
1692  their handler references.
1693  */
1694  num_remain_partitions= 0;
1695  if (temp_partitions)
1696  {
1697  num_remain_partitions= num_parts * num_subparts;
1698  }
1699  else
1700  {
1701  part_it.rewind();
1702  i= 0;
1703  do
1704  {
1705  partition_element *part_elem= part_it++;
1706  if (part_elem->part_state == PART_NORMAL ||
1707  part_elem->part_state == PART_TO_BE_ADDED ||
1708  part_elem->part_state == PART_CHANGED)
1709  {
1710  num_remain_partitions+= num_subparts;
1711  }
1712  } while (++i < num_parts);
1713  }
1714  if (!(new_file_array= (handler**)sql_calloc(sizeof(handler*)*
1715  (2*(num_remain_partitions + 1)))))
1716  {
1717  mem_alloc_error(sizeof(handler*)*2*(num_remain_partitions+1));
1718  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1719  }
1720  m_added_file= &new_file_array[num_remain_partitions + 1];
1721 
1722  /*
1723  Step 3:
1724  Fill m_reorged_file with handler references and NULL at the end
1725  */
1726  if (m_reorged_parts)
1727  {
1728  i= 0;
1729  part_count= 0;
1730  first= TRUE;
1731  part_it.rewind();
1732  do
1733  {
1734  partition_element *part_elem= part_it++;
1735  if (part_elem->part_state == PART_CHANGED ||
1736  part_elem->part_state == PART_REORGED_DROPPED)
1737  {
1738  memcpy((void*)&m_reorged_file[part_count],
1739  (void*)&m_file[i*num_subparts],
1740  sizeof(handler*)*num_subparts);
1741  part_count+= num_subparts;
1742  }
1743  else if (first && temp_partitions &&
1744  part_elem->part_state == PART_TO_BE_ADDED)
1745  {
1746  /*
1747  When doing an ALTER TABLE REORGANIZE PARTITION a number of
1748  partitions is to be reorganised into a set of new partitions.
1749  The reorganised partitions are in this case in the temp_partitions
1750  list. We copy all of them in one batch and thus we only do this
1751  until we find the first partition with state PART_TO_BE_ADDED
1752  since this is where the new partitions go in and where the old
1753  ones used to be.
1754  */
1755  first= FALSE;
1756  DBUG_ASSERT(((i*num_subparts) + m_reorged_parts) <= m_file_tot_parts);
1757  memcpy((void*)m_reorged_file, &m_file[i*num_subparts],
1758  sizeof(handler*)*m_reorged_parts);
1759  }
1760  } while (++i < num_parts);
1761  }
1762 
1763  /*
1764  Step 4:
1765  Fill new_array_file with handler references. Create the handlers if
1766  needed.
1767  */
1768  i= 0;
1769  part_count= 0;
1770  orig_count= 0;
1771  first= TRUE;
1772  part_it.rewind();
1773  do
1774  {
1775  partition_element *part_elem= part_it++;
1776  if (part_elem->part_state == PART_NORMAL)
1777  {
1778  DBUG_ASSERT(orig_count + num_subparts <= m_file_tot_parts);
1779  memcpy((void*)&new_file_array[part_count], (void*)&m_file[orig_count],
1780  sizeof(handler*)*num_subparts);
1781  part_count+= num_subparts;
1782  orig_count+= num_subparts;
1783  }
1784  else if (part_elem->part_state == PART_CHANGED ||
1785  part_elem->part_state == PART_TO_BE_ADDED)
1786  {
1787  uint j= 0;
1788  Parts_share_refs *p_share_refs;
1789  /*
1790  The Handler_shares for each partition's handler can be allocated
1791  within this handler, since there will not be any more instances of the
1792  new partitions, until the table is reopened after the ALTER succeeded.
1793  */
1794  p_share_refs= new Parts_share_refs;
1795  if (!p_share_refs)
1796  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1797  if (p_share_refs->init(num_subparts))
1798  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1799  if (m_new_partitions_share_refs.push_back(p_share_refs))
1800  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1801  do
1802  {
1803  handler **new_file= &new_file_array[part_count++];
1804  if (!(*new_file=
1805  get_new_handler(table->s,
1806  thd->mem_root,
1807  part_elem->engine_type)))
1808  {
1809  mem_alloc_error(sizeof(handler));
1810  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1811  }
1812  if ((*new_file)->set_ha_share_ref(&p_share_refs->ha_shares[j]))
1813  {
1814  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1815  }
1816  } while (++j < num_subparts);
1817  if (part_elem->part_state == PART_CHANGED)
1818  orig_count+= num_subparts;
1819  else if (temp_partitions && first)
1820  {
1821  orig_count+= (num_subparts * temp_partitions);
1822  first= FALSE;
1823  }
1824  }
1825  } while (++i < num_parts);
1826  first= FALSE;
1827  /*
1828  Step 5:
1829  Create the new partitions and also open, lock and call external_lock
1830  on them to prepare them for copy phase and also for later close
1831  calls
1832  */
1833  i= 0;
1834  part_count= 0;
1835  part_it.rewind();
1836  do
1837  {
1838  partition_element *part_elem= part_it++;
1839  if (part_elem->part_state == PART_TO_BE_ADDED ||
1840  part_elem->part_state == PART_CHANGED)
1841  {
1842  /*
1843  A new partition needs to be created PART_TO_BE_ADDED means an
1844  entirely new partition and PART_CHANGED means a changed partition
1845  that will still exist with either more or less data in it.
1846  */
1847  uint name_variant= NORMAL_PART_NAME;
1848  if (part_elem->part_state == PART_CHANGED ||
1849  (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions))
1850  name_variant= TEMP_PART_NAME;
1851  if (m_part_info->is_sub_partitioned())
1852  {
1853  List_iterator<partition_element> sub_it(part_elem->subpartitions);
1854  uint j= 0, part;
1855  do
1856  {
1857  partition_element *sub_elem= sub_it++;
1858  create_subpartition_name(part_name_buff, path,
1859  part_elem->partition_name,
1860  sub_elem->partition_name,
1861  name_variant);
1862  part= i * num_subparts + j;
1863  DBUG_PRINT("info", ("Add subpartition %s", part_name_buff));
1864  if ((error= prepare_new_partition(table, create_info,
1865  new_file_array[part],
1866  (const char *)part_name_buff,
1867  sub_elem)))
1868  {
1869  cleanup_new_partition(part_count);
1870  DBUG_RETURN(error);
1871  }
1872  m_added_file[part_count++]= new_file_array[part];
1873  } while (++j < num_subparts);
1874  }
1875  else
1876  {
1877  create_partition_name(part_name_buff, path,
1878  part_elem->partition_name, name_variant,
1879  TRUE);
1880  DBUG_PRINT("info", ("Add partition %s", part_name_buff));
1881  if ((error= prepare_new_partition(table, create_info,
1882  new_file_array[i],
1883  (const char *)part_name_buff,
1884  part_elem)))
1885  {
1886  cleanup_new_partition(part_count);
1887  DBUG_RETURN(error);
1888  }
1889  m_added_file[part_count++]= new_file_array[i];
1890  }
1891  }
1892  } while (++i < num_parts);
1893 
1894  /*
1895  Step 6:
1896  State update to prepare for next write of the frm file.
1897  */
1898  i= 0;
1899  part_it.rewind();
1900  do
1901  {
1902  partition_element *part_elem= part_it++;
1903  if (part_elem->part_state == PART_TO_BE_ADDED)
1904  part_elem->part_state= PART_IS_ADDED;
1905  else if (part_elem->part_state == PART_CHANGED)
1906  part_elem->part_state= PART_IS_CHANGED;
1907  else if (part_elem->part_state == PART_REORGED_DROPPED)
1908  part_elem->part_state= PART_TO_BE_DROPPED;
1909  } while (++i < num_parts);
1910  for (i= 0; i < temp_partitions; i++)
1911  {
1912  partition_element *part_elem= t_it++;
1913  DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
1914  part_elem->part_state= PART_TO_BE_DROPPED;
1915  }
1916  m_new_file= new_file_array;
1917  if ((error= copy_partitions(copied, deleted)))
1918  {
1919  /*
1920  Close and unlock the new temporary partitions.
1921  They will later be deleted through the ddl-log.
1922  */
1923  cleanup_new_partition(part_count);
1924  }
1925  DBUG_RETURN(error);
1926 }
1927 
1928 
1929 /*
1930  Copy partitions as part of ALTER TABLE of partitions
1931 
1932  SYNOPSIS
1933  copy_partitions()
1934  out:copied Number of records copied
1935  out:deleted Number of records deleted
1936 
1937  RETURN VALUE
1938  >0 Error code
1939  0 Success
1940 
1941  DESCRIPTION
1942  change_partitions has done all the preparations, now it is time to
1943  actually copy the data from the reorganised partitions to the new
1944  partitions.
1945 */
1946 
1947 int ha_partition::copy_partitions(ulonglong * const copied,
1948  ulonglong * const deleted)
1949 {
1950  uint reorg_part= 0;
1951  int result= 0;
1952  longlong func_value;
1953  DBUG_ENTER("ha_partition::copy_partitions");
1954 
1955  if (m_part_info->linear_hash_ind)
1956  {
1957  if (m_part_info->part_type == HASH_PARTITION)
1958  set_linear_hash_mask(m_part_info, m_part_info->num_parts);
1959  else
1960  set_linear_hash_mask(m_part_info, m_part_info->num_subparts);
1961  }
1962 
1963  while (reorg_part < m_reorged_parts)
1964  {
1965  handler *file= m_reorged_file[reorg_part];
1966  uint32 new_part;
1967 
1968  late_extra_cache(reorg_part);
1969  if ((result= file->ha_rnd_init(1)))
1970  goto init_error;
1971  while (TRUE)
1972  {
1973  if ((result= file->ha_rnd_next(m_rec0)))
1974  {
1975  if (result == HA_ERR_RECORD_DELETED)
1976  continue; //Probably MyISAM
1977  if (result != HA_ERR_END_OF_FILE)
1978  goto error;
1979  /*
1980  End-of-file reached, break out to continue with next partition or
1981  end the copy process.
1982  */
1983  break;
1984  }
1985  /* Found record to insert into new handler */
1986  if (m_part_info->get_partition_id(m_part_info, &new_part,
1987  &func_value))
1988  {
1989  /*
1990  This record is in the original table but will not be in the new
1991  table since it doesn't fit into any partition any longer due to
1992  changed partitioning ranges or list values.
1993  */
1994  (*deleted)++;
1995  }
1996  else
1997  {
1998  THD *thd= ha_thd();
1999  /* Copy record to new handler */
2000  (*copied)++;
2001  tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
2002  result= m_new_file[new_part]->ha_write_row(m_rec0);
2003  reenable_binlog(thd);
2004  if (result)
2005  goto error;
2006  }
2007  }
2008  late_extra_no_cache(reorg_part);
2009  file->ha_rnd_end();
2010  reorg_part++;
2011  }
2012  DBUG_RETURN(FALSE);
2013 error:
2014  m_reorged_file[reorg_part]->ha_rnd_end();
2015 init_error:
2016  DBUG_RETURN(result);
2017 }
2018 
2019 /*
2020  Update create info as part of ALTER TABLE
2021 
2022  SYNOPSIS
2023  update_create_info()
2024  create_info Create info from ALTER TABLE
2025 
2026  RETURN VALUE
2027  NONE
2028 
2029  DESCRIPTION
2030  Forward this handler call to the storage engine foreach
2031  partition handler. The data_file_name for each partition may
2032  need to be reset if the tablespace was moved. Use a dummy
2033  HA_CREATE_INFO structure and transfer necessary data.
2034 */
2035 
2036 void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
2037 {
2038  DBUG_ENTER("ha_partition::update_create_info");
2039 
2040  /*
2041  Fix for bug#38751, some engines needs info-calls in ALTER.
2042  Archive need this since it flushes in ::info.
2043  HA_STATUS_AUTO is optimized so it will not always be forwarded
2044  to all partitions, but HA_STATUS_VARIABLE will.
2045  */
2046  info(HA_STATUS_VARIABLE);
2047 
2048  info(HA_STATUS_AUTO);
2049 
2050  if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2051  create_info->auto_increment_value= stats.auto_increment_value;
2052 
2053  /*
2054  DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole
2055  partitioned table, only its parts.
2056  */
2057  my_bool from_alter = (create_info->data_file_name == (const char*) -1);
2058  create_info->data_file_name= create_info->index_file_name = NULL;
2059 
2060  /*
2061  We do not need to update the individual partition DATA DIRECTORY settings
2062  since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS.
2063  */
2064  if (from_alter)
2065  DBUG_VOID_RETURN;
2066 
2067  /*
2068  send Handler::update_create_info() to the storage engine for each
2069  partition that currently has a handler object. Using a dummy
2070  HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs.
2071  */
2072 
2073  List_iterator<partition_element> part_it(m_part_info->partitions);
2074  partition_element *part_elem, *sub_elem;
2075  uint num_subparts= m_part_info->num_subparts;
2076  uint num_parts = num_subparts ? m_file_tot_parts / num_subparts
2077  : m_file_tot_parts;
2078  HA_CREATE_INFO dummy_info;
2079  memset(&dummy_info, 0, sizeof(dummy_info));
2080 
2081  /*
2082  Since update_create_info() can be called from mysql_prepare_alter_table()
2083  when not all handlers are set up, we look for that condition first.
2084  If all handlers are not available, do not call update_create_info for any.
2085  */
2086  uint i, j, part;
2087  for (i= 0; i < num_parts; i++)
2088  {
2089  part_elem= part_it++;
2090  if (!part_elem)
2091  DBUG_VOID_RETURN;
2092  if (m_is_sub_partitioned)
2093  {
2094  List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2095  for (j= 0; j < num_subparts; j++)
2096  {
2097  sub_elem= subpart_it++;
2098  if (!sub_elem)
2099  DBUG_VOID_RETURN;
2100  part= i * num_subparts + j;
2101  if (part >= m_file_tot_parts || !m_file[part])
2102  DBUG_VOID_RETURN;
2103  }
2104  }
2105  else
2106  {
2107  if (!m_file[i])
2108  DBUG_VOID_RETURN;
2109  }
2110  }
2111  part_it.rewind();
2112 
2113  for (i= 0; i < num_parts; i++)
2114  {
2115  part_elem= part_it++;
2116  DBUG_ASSERT(part_elem);
2117  if (m_is_sub_partitioned)
2118  {
2119  List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2120  for (j= 0; j < num_subparts; j++)
2121  {
2122  sub_elem= subpart_it++;
2123  DBUG_ASSERT(sub_elem);
2124  part= i * num_subparts + j;
2125  DBUG_ASSERT(part < m_file_tot_parts && m_file[part]);
2126  if (ha_legacy_type(m_file[part]->ht) == DB_TYPE_INNODB)
2127  {
2128  dummy_info.data_file_name= dummy_info.index_file_name = NULL;
2129  m_file[part]->update_create_info(&dummy_info);
2130 
2131  if (dummy_info.data_file_name || sub_elem->data_file_name)
2132  {
2133  sub_elem->data_file_name = (char*) dummy_info.data_file_name;
2134  }
2135  if (dummy_info.index_file_name || sub_elem->index_file_name)
2136  {
2137  sub_elem->index_file_name = (char*) dummy_info.index_file_name;
2138  }
2139  }
2140  }
2141  }
2142  else
2143  {
2144  DBUG_ASSERT(m_file[i]);
2145  if (ha_legacy_type(m_file[i]->ht) == DB_TYPE_INNODB)
2146  {
2147  dummy_info.data_file_name= dummy_info.index_file_name= NULL;
2148  m_file[i]->update_create_info(&dummy_info);
2149  if (dummy_info.data_file_name || part_elem->data_file_name)
2150  {
2151  part_elem->data_file_name = (char*) dummy_info.data_file_name;
2152  }
2153  if (dummy_info.index_file_name || part_elem->index_file_name)
2154  {
2155  part_elem->index_file_name = (char*) dummy_info.index_file_name;
2156  }
2157  }
2158  }
2159  }
2160  DBUG_VOID_RETURN;
2161 }
2162 
2163 
2174 void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2175 {
2176  handler **file_array;
2177  table= table_arg;
2178  table_share= share;
2179  /*
2180  m_file can be NULL when using an old cached table in DROP TABLE, when the
2181  table just has REMOVED PARTITIONING, see Bug#42438
2182  */
2183  if (m_file)
2184  {
2185  file_array= m_file;
2186  DBUG_ASSERT(*file_array);
2187  do
2188  {
2189  (*file_array)->change_table_ptr(table_arg, share);
2190  } while (*(++file_array));
2191  }
2192 
2193  if (m_added_file && m_added_file[0])
2194  {
2195  /* if in middle of a drop/rename etc */
2196  file_array= m_added_file;
2197  do
2198  {
2199  (*file_array)->change_table_ptr(table_arg, share);
2200  } while (*(++file_array));
2201  }
2202 }
2203 
2204 /*
2205  Change comments specific to handler
2206 
2207  SYNOPSIS
2208  update_table_comment()
2209  comment Original comment
2210 
2211  RETURN VALUE
2212  new comment
2213 
2214  DESCRIPTION
2215  No comment changes so far
2216 */
2217 
2218 char *ha_partition::update_table_comment(const char *comment)
2219 {
2220  return (char*) comment; /* Nothing to change */
2221 }
2222 
2223 
2242 int ha_partition::del_ren_table(const char *from, const char *to)
2243 {
2244  int save_error= 0;
2245  int error= HA_ERR_INTERNAL_ERROR;
2246  char from_buff[FN_REFLEN], to_buff[FN_REFLEN], from_lc_buff[FN_REFLEN],
2247  to_lc_buff[FN_REFLEN], buff[FN_REFLEN];
2248  char *name_buffer_ptr;
2249  const char *from_path;
2250  const char *to_path= NULL;
2251  uint i;
2252  handler **file, **abort_file;
2253  DBUG_ENTER("ha_partition::del_ren_table");
2254 
2255  fn_format(buff,from, "", ha_par_ext, MY_APPEND_EXT);
2256  /* Check if the par file exists */
2257  if (my_access(buff,F_OK))
2258  {
2259  /*
2260  If the .par file does not exist, return HA_ERR_NO_SUCH_TABLE,
2261  This will signal to the caller that it can remove the .frm
2262  file.
2263  */
2264  error= HA_ERR_NO_SUCH_TABLE;
2265  DBUG_RETURN(error);
2266  }
2267 
2268  if (get_from_handler_file(from, ha_thd()->mem_root, false))
2269  DBUG_RETURN(error);
2270  DBUG_ASSERT(m_file_buffer);
2271  DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)"));
2272  name_buffer_ptr= m_name_buffer_ptr;
2273  file= m_file;
2274  /*
2275  Since ha_partition has HA_FILE_BASED, it must alter underlying table names
2276  if they do not have HA_FILE_BASED and lower_case_table_names == 2.
2277  See Bug#37402, for Mac OS X.
2278  The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
2279  Using the first partitions handler, since mixing handlers is not allowed.
2280  */
2281  from_path= get_canonical_filename(*file, from, from_lc_buff);
2282  if (to != NULL)
2283  to_path= get_canonical_filename(*file, to, to_lc_buff);
2284  i= 0;
2285  do
2286  {
2287  create_partition_name(from_buff, from_path, name_buffer_ptr,
2288  NORMAL_PART_NAME, FALSE);
2289 
2290  if (to != NULL)
2291  { // Rename branch
2292  create_partition_name(to_buff, to_path, name_buffer_ptr,
2293  NORMAL_PART_NAME, FALSE);
2294  error= (*file)->ha_rename_table(from_buff, to_buff);
2295  if (error)
2296  goto rename_error;
2297  }
2298  else // delete branch
2299  {
2300  error= (*file)->ha_delete_table(from_buff);
2301  }
2302  name_buffer_ptr= strend(name_buffer_ptr) + 1;
2303  if (error)
2304  save_error= error;
2305  i++;
2306  } while (*(++file));
2307 
2308  if (to == NULL)
2309  {
2310  DBUG_EXECUTE_IF("crash_before_deleting_par_file", DBUG_SUICIDE(););
2311 
2312  /* Delete the .par file. If error, break.*/
2313  if ((error= handler::delete_table(from)))
2314  DBUG_RETURN(error);
2315 
2316  DBUG_EXECUTE_IF("crash_after_deleting_par_file", DBUG_SUICIDE(););
2317  }
2318 
2319  if (to != NULL)
2320  {
2321  if ((error= handler::rename_table(from, to)))
2322  {
2323  /* Try to revert everything, ignore errors */
2324  (void) handler::rename_table(to, from);
2325  goto rename_error;
2326  }
2327  }
2328  DBUG_RETURN(save_error);
2329 rename_error:
2330  name_buffer_ptr= m_name_buffer_ptr;
2331  for (abort_file= file, file= m_file; file < abort_file; file++)
2332  {
2333  /* Revert the rename, back from 'to' to the original 'from' */
2334  create_partition_name(from_buff, from_path, name_buffer_ptr,
2335  NORMAL_PART_NAME, FALSE);
2336  create_partition_name(to_buff, to_path, name_buffer_ptr,
2337  NORMAL_PART_NAME, FALSE);
2338  /* Ignore error here */
2339  (void) (*file)->ha_rename_table(to_buff, from_buff);
2340  name_buffer_ptr= strend(name_buffer_ptr) + 1;
2341  }
2342  DBUG_RETURN(error);
2343 }
2344 
2345 
2365 int ha_partition::set_up_table_before_create(TABLE *tbl,
2366  const char *partition_name_with_path,
2367  HA_CREATE_INFO *info,
2368  partition_element *part_elem)
2369 {
2370  int error= 0;
2371  const char *partition_name;
2372  THD *thd= ha_thd();
2373  DBUG_ENTER("set_up_table_before_create");
2374 
2375  DBUG_ASSERT(part_elem);
2376 
2377  if (!part_elem)
2378  DBUG_RETURN(1);
2379  tbl->s->max_rows= part_elem->part_max_rows;
2380  tbl->s->min_rows= part_elem->part_min_rows;
2381  partition_name= strrchr(partition_name_with_path, FN_LIBCHAR);
2382  if ((part_elem->index_file_name &&
2383  (error= append_file_to_dir(thd,
2384  (const char**)&part_elem->index_file_name,
2385  partition_name+1))) ||
2386  (part_elem->data_file_name &&
2387  (error= append_file_to_dir(thd,
2388  (const char**)&part_elem->data_file_name,
2389  partition_name+1))))
2390  {
2391  DBUG_RETURN(error);
2392  }
2393  info->index_file_name= part_elem->index_file_name;
2394  info->data_file_name= part_elem->data_file_name;
2395  DBUG_RETURN(0);
2396 }
2397 
2398 
2399 /*
2400  Add two names together
2401 
2402  SYNOPSIS
2403  name_add()
2404  out:dest Destination string
2405  first_name First name
2406  sec_name Second name
2407 
2408  RETURN VALUE
2409  >0 Error
2410  0 Success
2411 
2412  DESCRIPTION
2413  Routine used to add two names with '_' in between then. Service routine
2414  to create_handler_file
2415  Include the NULL in the count of characters since it is needed as separator
2416  between the partition names.
2417 */
2418 
2419 static uint name_add(char *dest, const char *first_name, const char *sec_name)
2420 {
2421  return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1;
2422 }
2423 
2424 
2439 bool ha_partition::create_handler_file(const char *name)
2440 {
2441  partition_element *part_elem, *subpart_elem;
2442  uint i, j, part_name_len, subpart_name_len;
2443  uint tot_partition_words, tot_name_len, num_parts;
2444  uint tot_parts= 0;
2445  uint tot_len_words, tot_len_byte, chksum, tot_name_words;
2446  char *name_buffer_ptr;
2447  uchar *file_buffer, *engine_array;
2448  bool result= TRUE;
2449  char file_name[FN_REFLEN];
2450  char part_name[FN_REFLEN];
2451  char subpart_name[FN_REFLEN];
2452  File file;
2453  List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2454  DBUG_ENTER("create_handler_file");
2455 
2456  num_parts= m_part_info->partitions.elements;
2457  DBUG_PRINT("info", ("table name = %s, num_parts = %u", name,
2458  num_parts));
2459  tot_name_len= 0;
2460  for (i= 0; i < num_parts; i++)
2461  {
2462  part_elem= part_it++;
2463  if (part_elem->part_state != PART_NORMAL &&
2464  part_elem->part_state != PART_TO_BE_ADDED &&
2465  part_elem->part_state != PART_CHANGED)
2466  continue;
2467  tablename_to_filename(part_elem->partition_name, part_name,
2468  FN_REFLEN);
2469  part_name_len= strlen(part_name);
2470  if (!m_is_sub_partitioned)
2471  {
2472  tot_name_len+= part_name_len + 1;
2473  tot_parts++;
2474  }
2475  else
2476  {
2477  List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2478  for (j= 0; j < m_part_info->num_subparts; j++)
2479  {
2480  subpart_elem= sub_it++;
2481  tablename_to_filename(subpart_elem->partition_name,
2482  subpart_name,
2483  FN_REFLEN);
2484  subpart_name_len= strlen(subpart_name);
2485  tot_name_len+= part_name_len + subpart_name_len + 5;
2486  tot_parts++;
2487  }
2488  }
2489  }
2490  /*
2491  File format:
2492  Length in words 4 byte
2493  Checksum 4 byte
2494  Total number of partitions 4 byte
2495  Array of engine types n * 4 bytes where
2496  n = (m_tot_parts + 3)/4
2497  Length of name part in bytes 4 bytes
2498  (Names in filename format)
2499  Name part m * 4 bytes where
2500  m = ((length_name_part + 3)/4)*4
2501 
2502  All padding bytes are zeroed
2503  */
2504  tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2505  tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2506  /* 4 static words (tot words, checksum, tot partitions, name length) */
2507  tot_len_words= 4 + tot_partition_words + tot_name_words;
2508  tot_len_byte= PAR_WORD_SIZE * tot_len_words;
2509  if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL))))
2510  DBUG_RETURN(TRUE);
2511  engine_array= (file_buffer + PAR_ENGINES_OFFSET);
2512  name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
2513  + PAR_WORD_SIZE);
2514  part_it.rewind();
2515  for (i= 0; i < num_parts; i++)
2516  {
2517  part_elem= part_it++;
2518  if (part_elem->part_state != PART_NORMAL &&
2519  part_elem->part_state != PART_TO_BE_ADDED &&
2520  part_elem->part_state != PART_CHANGED)
2521  continue;
2522  if (!m_is_sub_partitioned)
2523  {
2524  tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN);
2525  name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1;
2526  *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
2527  DBUG_PRINT("info", ("engine: %u", *engine_array));
2528  engine_array++;
2529  }
2530  else
2531  {
2532  List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2533  for (j= 0; j < m_part_info->num_subparts; j++)
2534  {
2535  subpart_elem= sub_it++;
2536  tablename_to_filename(part_elem->partition_name, part_name,
2537  FN_REFLEN);
2538  tablename_to_filename(subpart_elem->partition_name, subpart_name,
2539  FN_REFLEN);
2540  name_buffer_ptr+= name_add(name_buffer_ptr,
2541  part_name,
2542  subpart_name);
2543  *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type);
2544  DBUG_PRINT("info", ("engine: %u", *engine_array));
2545  engine_array++;
2546  }
2547  }
2548  }
2549  chksum= 0;
2550  int4store(file_buffer, tot_len_words);
2551  int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts);
2552  int4store(file_buffer + PAR_ENGINES_OFFSET +
2553  (tot_partition_words * PAR_WORD_SIZE),
2554  tot_name_len);
2555  for (i= 0; i < tot_len_words; i++)
2556  chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i);
2557  int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum);
2558  /*
2559  Add .par extension to the file name.
2560  Create and write and close file
2561  to be used at open, delete_table and rename_table
2562  */
2563  fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT);
2564  if ((file= mysql_file_create(key_file_partition,
2565  file_name, CREATE_MODE, O_RDWR | O_TRUNC,
2566  MYF(MY_WME))) >= 0)
2567  {
2568  result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
2569  MYF(MY_WME | MY_NABP)) != 0;
2570  (void) mysql_file_close(file, MYF(0));
2571  }
2572  else
2573  result= TRUE;
2574  my_free(file_buffer);
2575  DBUG_RETURN(result);
2576 }
2577 
2578 
2583 void ha_partition::clear_handler_file()
2584 {
2585  if (m_engine_array)
2586  {
2587  plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
2588  my_free(m_engine_array);
2589  m_engine_array= NULL;
2590  }
2591  if (m_file_buffer)
2592  {
2593  my_free(m_file_buffer);
2594  m_file_buffer= NULL;
2595  }
2596 }
2597 
2598 
2609 bool ha_partition::create_handlers(MEM_ROOT *mem_root)
2610 {
2611  uint i;
2612  uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2613  handlerton *hton0;
2614  DBUG_ENTER("create_handlers");
2615 
2616  if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2617  DBUG_RETURN(TRUE);
2618  m_file_tot_parts= m_tot_parts;
2619  memset(m_file, 0, alloc_len);
2620  for (i= 0; i < m_tot_parts; i++)
2621  {
2622  handlerton *hton= plugin_data(m_engine_array[i], handlerton*);
2623  if (!(m_file[i]= get_new_handler(table_share, mem_root, hton)))
2624  DBUG_RETURN(TRUE);
2625  DBUG_PRINT("info", ("engine_type: %u", hton->db_type));
2626  }
2627  /* For the moment we only support partition over the same table engine */
2628  hton0= plugin_data(m_engine_array[0], handlerton*);
2629  if (hton0 == myisam_hton)
2630  {
2631  DBUG_PRINT("info", ("MyISAM"));
2632  m_myisam= TRUE;
2633  }
2634  /* INNODB may not be compiled in... */
2635  else if (ha_legacy_type(hton0) == DB_TYPE_INNODB)
2636  {
2637  DBUG_PRINT("info", ("InnoDB"));
2638  m_innodb= TRUE;
2639  }
2640  DBUG_RETURN(FALSE);
2641 }
2642 
2643 
2644 /*
2645  Create underlying handler objects from partition info
2646 
2647  SYNOPSIS
2648  new_handlers_from_part_info()
2649  mem_root Allocate memory through this
2650 
2651  RETURN VALUE
2652  TRUE Error
2653  FALSE Success
2654 */
2655 
2656 bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root)
2657 {
2658  uint i, j, part_count;
2659  partition_element *part_elem;
2660  uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2661  List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2662  DBUG_ENTER("ha_partition::new_handlers_from_part_info");
2663 
2664  if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2665  {
2666  mem_alloc_error(alloc_len);
2667  goto error_end;
2668  }
2669  m_file_tot_parts= m_tot_parts;
2670  memset(m_file, 0, alloc_len);
2671  DBUG_ASSERT(m_part_info->num_parts > 0);
2672 
2673  i= 0;
2674  part_count= 0;
2675  /*
2676  Don't know the size of the underlying storage engine, invent a number of
2677  bytes allocated for error message if allocation fails
2678  */
2679  do
2680  {
2681  part_elem= part_it++;
2682  if (m_is_sub_partitioned)
2683  {
2684  for (j= 0; j < m_part_info->num_subparts; j++)
2685  {
2686  if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2687  part_elem->engine_type)))
2688  goto error;
2689  DBUG_PRINT("info", ("engine_type: %u",
2690  (uint) ha_legacy_type(part_elem->engine_type)));
2691  }
2692  }
2693  else
2694  {
2695  if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2696  part_elem->engine_type)))
2697  goto error;
2698  DBUG_PRINT("info", ("engine_type: %u",
2699  (uint) ha_legacy_type(part_elem->engine_type)));
2700  }
2701  } while (++i < m_part_info->num_parts);
2702  if (part_elem->engine_type == myisam_hton)
2703  {
2704  DBUG_PRINT("info", ("MyISAM"));
2705  m_myisam= TRUE;
2706  }
2707  DBUG_RETURN(FALSE);
2708 error:
2709  mem_alloc_error(sizeof(handler));
2710 error_end:
2711  DBUG_RETURN(TRUE);
2712 }
2713 
2714 
2728 bool ha_partition::read_par_file(const char *name)
2729 {
2730  char buff[FN_REFLEN], *tot_name_len_offset, *buff_p= buff;
2731  File file;
2732  char *file_buffer;
2733  uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
2734  DBUG_ENTER("ha_partition::read_par_file");
2735  DBUG_PRINT("enter", ("table name: '%s'", name));
2736 
2737  if (m_file_buffer)
2738  DBUG_RETURN(false);
2739  fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT);
2740 
2741  /* Following could be done with mysql_file_stat to read in whole file */
2742  if ((file= mysql_file_open(key_file_partition,
2743  buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
2744  DBUG_RETURN(TRUE);
2745  if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP)))
2746  goto err1;
2747  len_words= uint4korr(buff_p);
2748  len_bytes= PAR_WORD_SIZE * len_words;
2749  if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
2750  goto err1;
2751  if (!(file_buffer= (char*) my_malloc(len_bytes, MYF(0))))
2752  goto err1;
2753  if (mysql_file_read(file, (uchar *) file_buffer, len_bytes, MYF(MY_NABP)))
2754  goto err2;
2755 
2756  chksum= 0;
2757  for (i= 0; i < len_words; i++)
2758  chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i);
2759  if (chksum)
2760  goto err2;
2761  m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET);
2762  DBUG_PRINT("info", ("No of parts = %u", m_tot_parts));
2763  tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2764 
2765  tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET +
2766  PAR_WORD_SIZE * tot_partition_words;
2767  tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) /
2768  PAR_WORD_SIZE;
2769  /*
2770  Verify the total length = tot size word, checksum word, num parts word +
2771  engines array + name length word + name array.
2772  */
2773  if (len_words != (tot_partition_words + tot_name_words + 4))
2774  goto err2;
2775  (void) mysql_file_close(file, MYF(0));
2776  m_file_buffer= file_buffer; // Will be freed in clear_handler_file()
2777  m_name_buffer_ptr= tot_name_len_offset + PAR_WORD_SIZE;
2778 
2779  DBUG_RETURN(false);
2780 
2781 err2:
2782  my_free(file_buffer);
2783 err1:
2784  (void) mysql_file_close(file, MYF(0));
2785  DBUG_RETURN(true);
2786 }
2787 
2788 
2799 bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
2800 {
2801  uint i;
2802  uchar *buff;
2803  handlerton **engine_array, *first_engine;
2804  enum legacy_db_type db_type, first_db_type;
2805 
2806  DBUG_ASSERT(!m_file);
2807  DBUG_ENTER("ha_partition::setup_engine_array");
2808  engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*));
2809  if (!engine_array)
2810  DBUG_RETURN(true);
2811 
2812  buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET);
2813  first_db_type= (enum legacy_db_type) buff[0];
2814  first_engine= ha_resolve_by_legacy_type(ha_thd(), first_db_type);
2815  if (!first_engine)
2816  goto err;
2817 
2818  if (!(m_engine_array= (plugin_ref*)
2819  my_malloc(m_tot_parts * sizeof(plugin_ref), MYF(MY_WME))))
2820  goto err;
2821 
2822  for (i= 0; i < m_tot_parts; i++)
2823  {
2824  db_type= (enum legacy_db_type) buff[i];
2825  if (db_type != first_db_type)
2826  {
2827  DBUG_PRINT("error", ("partition %u engine %d is not same as "
2828  "first partition %d", i, db_type,
2829  (int) first_db_type));
2830  DBUG_ASSERT(0);
2831  clear_handler_file();
2832  goto err;
2833  }
2834  m_engine_array[i]= ha_lock_engine(NULL, first_engine);
2835  if (!m_engine_array[i])
2836  {
2837  clear_handler_file();
2838  goto err;
2839  }
2840  }
2841 
2842  my_afree((gptr) engine_array);
2843 
2844  if (create_handlers(mem_root))
2845  {
2846  clear_handler_file();
2847  DBUG_RETURN(true);
2848  }
2849 
2850  DBUG_RETURN(false);
2851 
2852 err:
2853  my_afree((gptr) engine_array);
2854  DBUG_RETURN(true);
2855 }
2856 
2857 
2873 bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root,
2874  bool is_clone)
2875 {
2876  DBUG_ENTER("ha_partition::get_from_handler_file");
2877  DBUG_PRINT("enter", ("table name: '%s'", name));
2878 
2879  if (m_file_buffer)
2880  DBUG_RETURN(false);
2881 
2882  if (read_par_file(name))
2883  DBUG_RETURN(true);
2884 
2885  if (!is_clone && setup_engine_array(mem_root))
2886  DBUG_RETURN(true);
2887 
2888  DBUG_RETURN(false);
2889 }
2890 
2891 
2892 /****************************************************************************
2893  MODULE open/close object
2894 ****************************************************************************/
2895 
2905 static uchar *get_part_name(PART_NAME_DEF *part, size_t *length,
2906  my_bool not_used __attribute__((unused)))
2907 {
2908  *length= part->length;
2909  return part->partition_name;
2910 }
2911 
2912 
2925 bool ha_partition::insert_partition_name_in_hash(const char *name, uint part_id,
2926  bool is_subpart)
2927 {
2928  PART_NAME_DEF *part_def;
2929  uchar *part_name;
2930  uint part_name_length;
2931  DBUG_ENTER("ha_partition::insert_partition_name_in_hash");
2932  /*
2933  Calculate and store the length here, to avoid doing it when
2934  searching the hash.
2935  */
2936  part_name_length= strlen(name);
2937  /*
2938  Must use memory that lives as long as table_share.
2939  Freed in the Partition_share destructor.
2940  Since we use my_multi_malloc, then my_free(part_def) will also free
2941  part_name, as a part of my_hash_free.
2942  */
2943  if (!my_multi_malloc(MY_WME,
2944  &part_def, sizeof(PART_NAME_DEF),
2945  &part_name, part_name_length + 1,
2946  NULL))
2947  DBUG_RETURN(true);
2948  memcpy(part_name, name, part_name_length + 1);
2949  part_def->partition_name= part_name;
2950  part_def->length= part_name_length;
2951  part_def->part_id= part_id;
2952  part_def->is_subpart= is_subpart;
2953  if (my_hash_insert(&part_share->partition_name_hash, (uchar *) part_def))
2954  {
2955  my_free(part_def);
2956  DBUG_RETURN(true);
2957  }
2958  DBUG_RETURN(false);
2959 }
2960 
2961 
2966 bool ha_partition::populate_partition_name_hash()
2967 {
2968  List_iterator<partition_element> part_it(m_part_info->partitions);
2969  uint num_parts= m_part_info->num_parts;
2970  uint num_subparts= m_is_sub_partitioned ? m_part_info->num_subparts : 1;
2971  uint tot_names;
2972  uint i= 0;
2973  DBUG_ASSERT(part_share);
2974 
2975  DBUG_ENTER("ha_partition::populate_partition_name_hash");
2976 
2977  /*
2978  partition_name_hash is only set once and never changed
2979  -> OK to check without locking.
2980  */
2981 
2982  if (part_share->partition_name_hash_initialized)
2983  DBUG_RETURN(false);
2985  if (part_share->partition_name_hash_initialized)
2986  {
2988  DBUG_RETURN(false);
2989  }
2990  tot_names= m_is_sub_partitioned ? m_tot_parts + num_parts : num_parts;
2991  if (my_hash_init(&part_share->partition_name_hash,
2992  system_charset_info, tot_names, 0, 0,
2993  (my_hash_get_key) get_part_name,
2994  my_free, HASH_UNIQUE))
2995  {
2997  DBUG_RETURN(TRUE);
2998  }
2999 
3000  do
3001  {
3002  partition_element *part_elem= part_it++;
3003  DBUG_ASSERT(part_elem->part_state == PART_NORMAL);
3004  if (part_elem->part_state == PART_NORMAL)
3005  {
3006  if (insert_partition_name_in_hash(part_elem->partition_name,
3007  i * num_subparts, false))
3008  goto err;
3009  if (m_is_sub_partitioned)
3010  {
3012  subpart_it(part_elem->subpartitions);
3013  partition_element *sub_elem;
3014  uint j= 0;
3015  do
3016  {
3017  sub_elem= subpart_it++;
3018  if (insert_partition_name_in_hash(sub_elem->partition_name,
3019  i * num_subparts + j, true))
3020  goto err;
3021 
3022  } while (++j < num_subparts);
3023  }
3024  }
3025  } while (++i < num_parts);
3026 
3027  part_share->partition_name_hash_initialized= true;
3029 
3030  DBUG_RETURN(FALSE);
3031 err:
3032  my_hash_free(&part_share->partition_name_hash);
3034 
3035  DBUG_RETURN(TRUE);
3036 }
3037 
3038 
3051 bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg)
3052 {
3053  Handler_share **ha_shares;
3054  uint i;
3055  DBUG_ENTER("ha_partition::set_ha_share_ref");
3056 
3057  DBUG_ASSERT(!part_share);
3058  DBUG_ASSERT(table_share);
3059  DBUG_ASSERT(!m_is_clone_of);
3060  DBUG_ASSERT(m_tot_parts);
3061  if (handler::set_ha_share_ref(ha_share_arg))
3062  DBUG_RETURN(true);
3063  if (!(part_share= get_share()))
3064  DBUG_RETURN(true);
3065  DBUG_ASSERT(part_share->partitions_share_refs);
3066  DBUG_ASSERT(part_share->partitions_share_refs->num_parts >= m_tot_parts);
3067  ha_shares= part_share->partitions_share_refs->ha_shares;
3068  for (i= 0; i < m_tot_parts; i++)
3069  {
3070  if (m_file[i]->set_ha_share_ref(&ha_shares[i]))
3071  DBUG_RETURN(true);
3072  }
3073  DBUG_RETURN(false);
3074 }
3075 
3076 
3088 Partition_share *ha_partition::get_share()
3089 {
3090  Partition_share *tmp_share;
3091  DBUG_ENTER("ha_partition::get_share");
3092  DBUG_ASSERT(table_share);
3093 
3095  if (!(tmp_share= static_cast<Partition_share*>(get_ha_share_ptr())))
3096  {
3097  tmp_share= new Partition_share;
3098  if (!tmp_share)
3099  goto err;
3100  if (tmp_share->init(m_tot_parts))
3101  {
3102  delete tmp_share;
3103  tmp_share= NULL;
3104  goto err;
3105  }
3106  set_ha_share_ptr(static_cast<Handler_share*>(tmp_share));
3107  }
3108 err:
3110  DBUG_RETURN(tmp_share);
3111 }
3112 
3113 
3114 
3119 void ha_partition::free_partition_bitmaps()
3120 {
3121  /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3122  bitmap_free(&m_bulk_insert_started);
3123  bitmap_free(&m_locked_partitions);
3124  bitmap_free(&m_partitions_to_reset);
3125  bitmap_free(&m_key_not_found_partitions);
3126 }
3127 
3128 
3133 bool ha_partition::init_partition_bitmaps()
3134 {
3135  DBUG_ENTER("ha_partition::init_partition_bitmaps");
3136  /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3137  if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
3138  DBUG_RETURN(true);
3139  bitmap_clear_all(&m_bulk_insert_started);
3140 
3141  /* Initialize the bitmap we use to keep track of locked partitions */
3142  if (bitmap_init(&m_locked_partitions, NULL, m_tot_parts, FALSE))
3143  {
3144  bitmap_free(&m_bulk_insert_started);
3145  DBUG_RETURN(true);
3146  }
3147  bitmap_clear_all(&m_locked_partitions);
3148 
3149  /*
3150  Initialize the bitmap we use to keep track of partitions which may have
3151  something to reset in ha_reset().
3152  */
3153  if (bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts, FALSE))
3154  {
3155  bitmap_free(&m_bulk_insert_started);
3156  bitmap_free(&m_locked_partitions);
3157  DBUG_RETURN(true);
3158  }
3159  bitmap_clear_all(&m_partitions_to_reset);
3160 
3161  /*
3162  Initialize the bitmap we use to keep track of partitions which returned
3163  HA_ERR_KEY_NOT_FOUND from index_read_map.
3164  */
3165  if (bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts, FALSE))
3166  {
3167  bitmap_free(&m_bulk_insert_started);
3168  bitmap_free(&m_locked_partitions);
3169  bitmap_free(&m_partitions_to_reset);
3170  DBUG_RETURN(true);
3171  }
3172  bitmap_clear_all(&m_key_not_found_partitions);
3173  m_key_not_found= false;
3174  /* Initialize the bitmap for read/lock_partitions */
3175  if (!m_is_clone_of)
3176  {
3177  DBUG_ASSERT(!m_clone_mem_root);
3178  if (m_part_info->set_partition_bitmaps(NULL))
3179  {
3180  free_partition_bitmaps();
3181  DBUG_RETURN(true);
3182  }
3183  }
3184  DBUG_RETURN(false);
3185 }
3186 
3187 
3188 /*
3189  Open handler object
3190 
3191  SYNOPSIS
3192  open()
3193  name Full path of table name
3194  mode Open mode flags
3195  test_if_locked ?
3196 
3197  RETURN VALUE
3198  >0 Error
3199  0 Success
3200 
3201  DESCRIPTION
3202  Used for opening tables. The name will be the name of the file.
3203  A table is opened when it needs to be opened. For instance
3204  when a request comes in for a select on the table (tables are not
3205  open and closed for each request, they are cached).
3206 
3207  Called from handler.cc by handler::ha_open(). The server opens all tables
3208  by calling ha_open() which then calls the handler specific open().
3209 */
3210 
3211 int ha_partition::open(const char *name, int mode, uint test_if_locked)
3212 {
3213  char *name_buffer_ptr;
3214  int error= HA_ERR_INITIALIZATION;
3215  handler **file;
3216  char name_buff[FN_REFLEN];
3217  ulonglong check_table_flags;
3218  DBUG_ENTER("ha_partition::open");
3219 
3220  DBUG_ASSERT(table->s == table_share);
3221  ref_length= 0;
3222  m_mode= mode;
3223  m_open_test_lock= test_if_locked;
3224  m_part_field_array= m_part_info->full_part_field_array;
3225  if (get_from_handler_file(name, &table->mem_root, test(m_is_clone_of)))
3226  DBUG_RETURN(error);
3227  name_buffer_ptr= m_name_buffer_ptr;
3228  if (populate_partition_name_hash())
3229  {
3230  DBUG_RETURN(HA_ERR_INITIALIZATION);
3231  }
3232  m_start_key.length= 0;
3233  m_rec0= table->record[0];
3234  m_rec_length= table_share->reclength;
3235  if (!m_part_ids_sorted_by_num_of_records)
3236  {
3237  if (!(m_part_ids_sorted_by_num_of_records=
3238  (uint32*) my_malloc(m_tot_parts * sizeof(uint32), MYF(MY_WME))))
3239  DBUG_RETURN(error);
3240  uint32 i;
3241  /* Initialize it with all partition ids. */
3242  for (i= 0; i < m_tot_parts; i++)
3243  m_part_ids_sorted_by_num_of_records[i]= i;
3244  }
3245 
3246  if (init_partition_bitmaps())
3247  DBUG_RETURN(error);
3248 
3249  DBUG_ASSERT(m_part_info);
3250 
3251  if (m_is_clone_of)
3252  {
3253  uint i, alloc_len;
3254  DBUG_ASSERT(m_clone_mem_root);
3255  /* Allocate an array of handler pointers for the partitions handlers. */
3256  alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3257  if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len)))
3258  {
3259  error= HA_ERR_INITIALIZATION;
3260  goto err_alloc;
3261  }
3262  memset(m_file, 0, alloc_len);
3263  /*
3264  Populate them by cloning the original partitions. This also opens them.
3265  Note that file->ref is allocated too.
3266  */
3267  file= m_is_clone_of->m_file;
3268  for (i= 0; i < m_tot_parts; i++)
3269  {
3270  create_partition_name(name_buff, name, name_buffer_ptr, NORMAL_PART_NAME,
3271  FALSE);
3272  /* ::clone() will also set ha_share from the original. */
3273  if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root)))
3274  {
3275  error= HA_ERR_INITIALIZATION;
3276  file= &m_file[i];
3277  goto err_handler;
3278  }
3279  name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3280  }
3281  }
3282  else
3283  {
3284  file= m_file;
3285  do
3286  {
3287  create_partition_name(name_buff, name, name_buffer_ptr, NORMAL_PART_NAME,
3288  FALSE);
3289  if ((error= (*file)->ha_open(table, name_buff, mode,
3290  test_if_locked | HA_OPEN_NO_PSI_CALL)))
3291  goto err_handler;
3292  if (m_file == file)
3293  m_num_locks= (*file)->lock_count();
3294  DBUG_ASSERT(m_num_locks == (*file)->lock_count());
3295  name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3296  } while (*(++file));
3297  }
3298 
3299  file= m_file;
3300  ref_length= (*file)->ref_length;
3301  check_table_flags= (((*file)->ha_table_flags() &
3302  ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3303  (PARTITION_ENABLED_TABLE_FLAGS));
3304  while (*(++file))
3305  {
3306  /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */
3307  set_if_bigger(ref_length, ((*file)->ref_length));
3308  /*
3309  Verify that all partitions have the same set of table flags.
3310  Mask all flags that partitioning enables/disables.
3311  */
3312  if (check_table_flags != (((*file)->ha_table_flags() &
3313  ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3314  (PARTITION_ENABLED_TABLE_FLAGS)))
3315  {
3316  error= HA_ERR_INITIALIZATION;
3317  /* set file to last handler, so all of them are closed */
3318  file = &m_file[m_tot_parts - 1];
3319  goto err_handler;
3320  }
3321  }
3322  key_used_on_scan= m_file[0]->key_used_on_scan;
3323  implicit_emptied= m_file[0]->implicit_emptied;
3324  /*
3325  Add 2 bytes for partition id in position ref length.
3326  ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
3327  */
3328  ref_length+= PARTITION_BYTES_IN_POS;
3329  m_ref_length= ref_length;
3330 
3331  /*
3332  Release buffer read from .par file. It will not be reused again after
3333  being opened once.
3334  */
3335  clear_handler_file();
3336 
3337  /*
3338  Some handlers update statistics as part of the open call. This will in
3339  some cases corrupt the statistics of the partition handler and thus
3340  to ensure we have correct statistics we call info from open after
3341  calling open on all individual handlers.
3342  */
3343  m_handler_status= handler_opened;
3344  if (m_part_info->part_expr)
3345  m_part_func_monotonicity_info=
3346  m_part_info->part_expr->get_monotonicity_info();
3347  else if (m_part_info->list_of_part_fields)
3348  m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
3349  info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
3350  DBUG_RETURN(0);
3351 
3352 err_handler:
3353  DEBUG_SYNC(ha_thd(), "partition_open_error");
3354  while (file-- != m_file)
3355  (*file)->ha_close();
3356 err_alloc:
3357  free_partition_bitmaps();
3358 
3359  DBUG_RETURN(error);
3360 }
3361 
3362 
3363 /*
3364  Disabled since it is not possible to prune yet.
3365  without pruning, it need to rebind/unbind every partition in every
3366  statement which uses a table from the table cache. Will also use
3367  as many PSI_tables as there are partitions.
3368 */
3369 #ifdef HAVE_M_PSI_PER_PARTITION
3370 void ha_partition::unbind_psi()
3371 {
3372  uint i;
3373 
3374  DBUG_ENTER("ha_partition::unbind_psi");
3375  handler::unbind_psi();
3376  for (i= 0; i < m_tot_parts; i++)
3377  {
3378  DBUG_ASSERT(m_file[i] != NULL);
3379  m_file[i]->unbind_psi();
3380  }
3381  DBUG_VOID_RETURN;
3382 }
3383 
3384 void ha_partition::rebind_psi()
3385 {
3386  uint i;
3387 
3388  DBUG_ENTER("ha_partition::rebind_psi");
3389  handler::rebind_psi();
3390  for (i= 0; i < m_tot_parts; i++)
3391  {
3392  DBUG_ASSERT(m_file[i] != NULL);
3393  m_file[i]->rebind_psi();
3394  }
3395  DBUG_VOID_RETURN;
3396 }
3397 #endif /* HAVE_M_PSI_PER_PARTITION */
3398 
3399 
3416 handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root)
3417 {
3418  ha_partition *new_handler;
3419 
3420  DBUG_ENTER("ha_partition::clone");
3421  new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info,
3422  this, mem_root);
3423  if (!new_handler)
3424  DBUG_RETURN(NULL);
3425 
3426  /*
3427  We will not clone each partition's handler here, it will be done in
3428  ha_partition::open() for clones. Also set_ha_share_ref is not needed
3429  here, since 1) ha_share is copied in the constructor used above
3430  2) each partition's cloned handler will set it from its original.
3431  */
3432 
3433  /*
3434  Allocate new_handler->ref here because otherwise ha_open will allocate it
3435  on this->table->mem_root and we will not be able to reclaim that memory
3436  when the clone handler object is destroyed.
3437  */
3438  if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
3439  ALIGN_SIZE(m_ref_length)*2)))
3440  goto err;
3441 
3442  if (new_handler->ha_open(table, name,
3443  table->db_stat,
3444  HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL))
3445  goto err;
3446 
3447  DBUG_RETURN((handler*) new_handler);
3448 
3449 err:
3450  delete new_handler;
3451  DBUG_RETURN(NULL);
3452 }
3453 
3454 
3455 /*
3456  Close handler object
3457 
3458  SYNOPSIS
3459  close()
3460 
3461  RETURN VALUE
3462  >0 Error code
3463  0 Success
3464 
3465  DESCRIPTION
3466  Called from sql_base.cc, sql_select.cc, and table.cc.
3467  In sql_select.cc it is only used to close up temporary tables or during
3468  the process where a temporary table is converted over to being a
3469  myisam table.
3470  For sql_base.cc look at close_data_tables().
3471 */
3472 
3473 int ha_partition::close(void)
3474 {
3475  bool first= TRUE;
3476  handler **file;
3477  DBUG_ENTER("ha_partition::close");
3478 
3479  DBUG_ASSERT(table->s == table_share);
3480  destroy_record_priority_queue();
3481  free_partition_bitmaps();
3482  DBUG_ASSERT(m_part_info);
3483  file= m_file;
3484 
3485 repeat:
3486  do
3487  {
3488  (*file)->ha_close();
3489  } while (*(++file));
3490 
3491  if (first && m_added_file && m_added_file[0])
3492  {
3493  file= m_added_file;
3494  first= FALSE;
3495  goto repeat;
3496  }
3497 
3498  m_handler_status= handler_closed;
3499  DBUG_RETURN(0);
3500 }
3501 
3502 /****************************************************************************
3503  MODULE start/end statement
3504 ****************************************************************************/
3505 /*
3506  A number of methods to define various constants for the handler. In
3507  the case of the partition handler we need to use some max and min
3508  of the underlying handlers in most cases.
3509 */
3510 
3511 /*
3512  Set external locks on table
3513 
3514  SYNOPSIS
3515  external_lock()
3516  thd Thread object
3517  lock_type Type of external lock
3518 
3519  RETURN VALUE
3520  >0 Error code
3521  0 Success
3522 
3523  DESCRIPTION
3524  First you should go read the section "locking functions for mysql" in
3525  lock.cc to understand this.
3526  This create a lock on the table. If you are implementing a storage engine
3527  that can handle transactions look at ha_berkeley.cc to see how you will
3528  want to go about doing this. Otherwise you should consider calling
3529  flock() here.
3530  Originally this method was used to set locks on file level to enable
3531  several MySQL Servers to work on the same data. For transactional
3532  engines it has been "abused" to also mean start and end of statements
3533  to enable proper rollback of statements and transactions. When LOCK
3534  TABLES has been issued the start_stmt method takes over the role of
3535  indicating start of statement but in this case there is no end of
3536  statement indicator(?).
3537 
3538  Called from lock.cc by lock_external() and unlock_external(). Also called
3539  from sql_table.cc by copy_data_between_tables().
3540 */
3541 
3542 int ha_partition::external_lock(THD *thd, int lock_type)
3543 {
3544  uint error;
3545  uint i, first_used_partition;
3546  MY_BITMAP *used_partitions;
3547  DBUG_ENTER("ha_partition::external_lock");
3548 
3549  DBUG_ASSERT(!auto_increment_lock && !auto_increment_safe_stmt_log_lock);
3550 
3551  if (lock_type == F_UNLCK)
3552  used_partitions= &m_locked_partitions;
3553  else
3554  used_partitions= &(m_part_info->lock_partitions);
3555 
3556  first_used_partition= bitmap_get_first_set(used_partitions);
3557 
3558  for (i= first_used_partition;
3559  i < m_tot_parts;
3560  i= bitmap_get_next_set(used_partitions, i))
3561  {
3562  DBUG_PRINT("info", ("external_lock(thd, %d) part %d", lock_type, i));
3563  if ((error= m_file[i]->ha_external_lock(thd, lock_type)))
3564  {
3565  if (lock_type != F_UNLCK)
3566  goto err_handler;
3567  }
3568  DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type));
3569  if (lock_type != F_UNLCK)
3570  bitmap_set_bit(&m_locked_partitions, i);
3571  }
3572  if (lock_type == F_UNLCK)
3573  {
3574  bitmap_clear_all(used_partitions);
3575  }
3576  else
3577  {
3578  /* Add touched partitions to be included in reset(). */
3579  bitmap_union(&m_partitions_to_reset, used_partitions);
3580  }
3581 
3582  if (m_added_file && m_added_file[0])
3583  {
3584  handler **file= m_added_file;
3585  DBUG_ASSERT(lock_type == F_UNLCK);
3586  do
3587  {
3588  (void) (*file)->ha_external_lock(thd, lock_type);
3589  } while (*(++file));
3590  }
3591  DBUG_RETURN(0);
3592 
3593 err_handler:
3594  uint j;
3595  for (j= first_used_partition;
3596  j < i;
3597  j= bitmap_get_next_set(&m_locked_partitions, j))
3598  {
3599  (void) m_file[j]->ha_external_lock(thd, F_UNLCK);
3600  }
3601  bitmap_clear_all(&m_locked_partitions);
3602  DBUG_RETURN(error);
3603 }
3604 
3605 
3606 /*
3607  Get the lock(s) for the table and perform conversion of locks if needed
3608 
3609  SYNOPSIS
3610  store_lock()
3611  thd Thread object
3612  to Lock object array
3613  lock_type Table lock type
3614 
3615  RETURN VALUE
3616  >0 Error code
3617  0 Success
3618 
3619  DESCRIPTION
3620  The idea with handler::store_lock() is the following:
3621 
3622  The statement decided which locks we should need for the table
3623  for updates/deletes/inserts we get WRITE locks, for SELECT... we get
3624  read locks.
3625 
3626  Before adding the lock into the table lock handler (see thr_lock.c)
3627  mysqld calls store lock with the requested locks. Store lock can now
3628  modify a write lock to a read lock (or some other lock), ignore the
3629  lock (if we don't want to use MySQL table locks at all) or add locks
3630  for many tables (like we do when we are using a MERGE handler).
3631 
3632  Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE
3633  (which signals that we are doing WRITES, but we are still allowing other
3634  reader's and writer's.
3635 
3636  When releasing locks, store_lock() is also called. In this case one
3637  usually doesn't have to do anything.
3638 
3639  store_lock is called when holding a global mutex to ensure that only
3640  one thread at a time changes the locking information of tables.
3641 
3642  In some exceptional cases MySQL may send a request for a TL_IGNORE;
3643  This means that we are requesting the same lock as last time and this
3644  should also be ignored. (This may happen when someone does a flush
3645  table when we have opened a part of the tables, in which case mysqld
3646  closes and reopens the tables and tries to get the same locks as last
3647  time). In the future we will probably try to remove this.
3648 
3649  Called from lock.cc by get_lock_data().
3650 */
3651 
3653  THR_LOCK_DATA **to,
3654  enum thr_lock_type lock_type)
3655 {
3656  uint i;
3657  DBUG_ENTER("ha_partition::store_lock");
3658  DBUG_ASSERT(thd == current_thd);
3659 
3660  /*
3661  This can be called from get_lock_data() in mysql_lock_abort_for_thread(),
3662  even when thd != table->in_use. In that case don't use partition pruning,
3663  but use all partitions instead to avoid using another threads structures.
3664  */
3665  if (thd != table->in_use)
3666  {
3667  for (i= 0; i < m_tot_parts; i++)
3668  to= m_file[i]->store_lock(thd, to, lock_type);
3669  }
3670  else
3671  {
3672  for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
3673  i < m_tot_parts;
3674  i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
3675  {
3676  DBUG_PRINT("info", ("store lock %d iteration", i));
3677  to= m_file[i]->store_lock(thd, to, lock_type);
3678  }
3679  }
3680  DBUG_RETURN(to);
3681 }
3682 
3683 /*
3684  Start a statement when table is locked
3685 
3686  SYNOPSIS
3687  start_stmt()
3688  thd Thread object
3689  lock_type Type of external lock
3690 
3691  RETURN VALUE
3692  >0 Error code
3693  0 Success
3694 
3695  DESCRIPTION
3696  This method is called instead of external lock when the table is locked
3697  before the statement is executed.
3698 */
3699 
3700 int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type)
3701 {
3702  int error= 0;
3703  uint i;
3704  /* Assert that read_partitions is included in lock_partitions */
3705  DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions,
3706  &m_part_info->lock_partitions));
3707  /*
3708  m_locked_partitions is set in previous external_lock/LOCK TABLES.
3709  Current statement's lock requests must not include any partitions
3710  not previously locked.
3711  */
3712  DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions,
3713  &m_locked_partitions));
3714  DBUG_ENTER("ha_partition::start_stmt");
3715 
3716  for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
3717  i < m_tot_parts;
3718  i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
3719  {
3720  if ((error= m_file[i]->start_stmt(thd, lock_type)))
3721  break;
3722  /* Add partition to be called in reset(). */
3723  bitmap_set_bit(&m_partitions_to_reset, i);
3724  }
3725  DBUG_RETURN(error);
3726 }
3727 
3728 
3741 uint ha_partition::lock_count() const
3742 {
3743  DBUG_ENTER("ha_partition::lock_count");
3744  /*
3745  The caller want to know the upper bound, to allocate enough memory.
3746  There is no performance lost if we simply return maximum number locks
3747  needed, only some minor over allocation of memory in get_lock_data().
3748 
3749  Also notice that this may be called for another thread != table->in_use,
3750  when mysql_lock_abort_for_thread() is called. So this is more safe, then
3751  using number of partitions after pruning.
3752  */
3753  DBUG_RETURN(m_tot_parts * m_num_locks);
3754 }
3755 
3756 
3757 /*
3758  Unlock last accessed row
3759 
3760  SYNOPSIS
3761  unlock_row()
3762 
3763  RETURN VALUE
3764  NONE
3765 
3766  DESCRIPTION
3767  Record currently processed was not in the result set of the statement
3768  and is thus unlocked. Used for UPDATE and DELETE queries.
3769 */
3770 
3771 void ha_partition::unlock_row()
3772 {
3773  DBUG_ENTER("ha_partition::unlock_row");
3774  m_file[m_last_part]->unlock_row();
3775  DBUG_VOID_RETURN;
3776 }
3777 
3802 {
3803  DBUG_ENTER("ha_partition::was_semi_consistent_read");
3804  DBUG_ASSERT(m_last_part < m_tot_parts &&
3805  bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
3806  DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read());
3807 }
3808 
3829 {
3830  uint i;
3831  DBUG_ENTER("ha_partition::try_semi_consistent_read");
3832 
3833  i= bitmap_get_first_set(&(m_part_info->read_partitions));
3834  DBUG_ASSERT(i != MY_BIT_NONE);
3835  for (;
3836  i < m_tot_parts;
3837  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
3838  {
3839  m_file[i]->try_semi_consistent_read(yes);
3840  }
3841  DBUG_VOID_RETURN;
3842 }
3843 
3844 
3845 /****************************************************************************
3846  MODULE change record
3847 ****************************************************************************/
3848 
3849 /*
3850  Insert a row to the table
3851 
3852  SYNOPSIS
3853  write_row()
3854  buf The row in MySQL Row Format
3855 
3856  RETURN VALUE
3857  >0 Error code
3858  0 Success
3859 
3860  DESCRIPTION
3861  write_row() inserts a row. buf() is a byte array of data, normally
3862  record[0].
3863 
3864  You can use the field information to extract the data from the native byte
3865  array type.
3866 
3867  Example of this would be:
3868  for (Field **field=table->field ; *field ; field++)
3869  {
3870  ...
3871  }
3872 
3873  See ha_tina.cc for a variant of extracting all of the data as strings.
3874  ha_berkeley.cc has a variant of how to store it intact by "packing" it
3875  for ha_berkeley's own native storage type.
3876 
3877  Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
3878  sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
3879 
3880 */
3881 
3882 int ha_partition::write_row(uchar * buf)
3883 {
3884  uint32 part_id;
3885  int error;
3886  longlong func_value;
3887  bool have_auto_increment= table->next_number_field && buf == table->record[0];
3888  my_bitmap_map *old_map;
3889  THD *thd= ha_thd();
3890  sql_mode_t saved_sql_mode= thd->variables.sql_mode;
3891  bool saved_auto_inc_field_not_null= table->auto_increment_field_not_null;
3892  DBUG_ENTER("ha_partition::write_row");
3893  DBUG_ASSERT(buf == m_rec0);
3894 
3895  /*
3896  If we have an auto_increment column and we are writing a changed row
3897  or a new row, then update the auto_increment value in the record.
3898  */
3899  if (have_auto_increment)
3900  {
3901  if (!part_share->auto_inc_initialized &&
3902  !table_share->next_number_keypart)
3903  {
3904  /*
3905  If auto_increment in table_share is not initialized, start by
3906  initializing it.
3907  */
3908  info(HA_STATUS_AUTO);
3909  }
3910  error= update_auto_increment();
3911 
3912  /*
3913  If we have failed to set the auto-increment value for this row,
3914  it is highly likely that we will not be able to insert it into
3915  the correct partition. We must check and fail if neccessary.
3916  */
3917  if (error)
3918  goto exit;
3919 
3920  /*
3921  Don't allow generation of auto_increment value the partitions handler.
3922  If a partitions handler would change the value, then it might not
3923  match the partition any longer.
3924  This can occur if 'SET INSERT_ID = 0; INSERT (NULL)',
3925  So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode.
3926  The partitions handler::next_insert_id must always be 0. Otherwise
3927  we need to forward release_auto_increment, or reset it for all
3928  partitions.
3929  */
3930  if (table->next_number_field->val_int() == 0)
3931  {
3932  table->auto_increment_field_not_null= TRUE;
3933  thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO;
3934  }
3935  }
3936 
3937  old_map= dbug_tmp_use_all_columns(table, table->read_set);
3938  error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
3939  dbug_tmp_restore_column_map(table->read_set, old_map);
3940  if (unlikely(error))
3941  {
3942  m_part_info->err_value= func_value;
3943  goto exit;
3944  }
3945  if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
3946  {
3947  DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)",
3948  part_id, (long) func_value));
3949  error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
3950  goto exit;
3951  }
3952  m_last_part= part_id;
3953  DBUG_PRINT("info", ("Insert in partition %d", part_id));
3954  start_part_bulk_insert(thd, part_id);
3955 
3956  tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
3957  error= m_file[part_id]->ha_write_row(buf);
3958  if (have_auto_increment && !table->s->next_number_keypart)
3959  set_auto_increment_if_higher(table->next_number_field);
3960  reenable_binlog(thd);
3961 exit:
3962  thd->variables.sql_mode= saved_sql_mode;
3963  table->auto_increment_field_not_null= saved_auto_inc_field_not_null;
3964  DBUG_RETURN(error);
3965 }
3966 
3967 
3968 /*
3969  Update an existing row
3970 
3971  SYNOPSIS
3972  update_row()
3973  old_data Old record in MySQL Row Format
3974  new_data New record in MySQL Row Format
3975 
3976  RETURN VALUE
3977  >0 Error code
3978  0 Success
3979 
3980  DESCRIPTION
3981  Yes, update_row() does what you expect, it updates a row. old_data will
3982  have the previous row record in it, while new_data will have the newest
3983  data in it.
3984  Keep in mind that the server can do updates based on ordering if an
3985  ORDER BY clause was used. Consecutive ordering is not guarenteed.
3986 
3987  Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
3988  new_data is always record[0]
3989  old_data is always record[1]
3990 */
3991 
3992 int ha_partition::update_row(const uchar *old_data, uchar *new_data)
3993 {
3994  THD *thd= ha_thd();
3995  uint32 new_part_id, old_part_id;
3996  int error= 0;
3997  longlong func_value;
3998  DBUG_ENTER("ha_partition::update_row");
3999  m_err_rec= NULL;
4000 
4001  // Need to read partition-related columns, to locate the row's partition:
4002  DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4003  table->read_set));
4004  if ((error= get_parts_for_update(old_data, new_data, table->record[0],
4005  m_part_info, &old_part_id, &new_part_id,
4006  &func_value)))
4007  {
4008  m_part_info->err_value= func_value;
4009  goto exit;
4010  }
4011  DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id));
4012  if (!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id))
4013  {
4014  error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4015  goto exit;
4016  }
4017 
4018  /*
4019  The protocol for updating a row is:
4020  1) position the handler (cursor) on the row to be updated,
4021  either through the last read row (rnd or index) or by rnd_pos.
4022  2) call update_row with both old and new full records as arguments.
4023 
4024  This means that m_last_part should already be set to actual partition
4025  where the row was read from. And if that is not the same as the
4026  calculated part_id we found a misplaced row, we return an error to
4027  notify the user that something is broken in the row distribution
4028  between partitions! Since we don't check all rows on read, we return an
4029  error instead of correcting m_last_part, to make the user aware of the
4030  problem!
4031 
4032  Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4033  so this is not supported for this engine.
4034  */
4035  if (old_part_id != m_last_part)
4036  {
4037  m_err_rec= old_data;
4038  DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
4039  }
4040 
4041  m_last_part= new_part_id;
4042  start_part_bulk_insert(thd, new_part_id);
4043  if (new_part_id == old_part_id)
4044  {
4045  DBUG_PRINT("info", ("Update in partition %d", new_part_id));
4046  tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4047  error= m_file[new_part_id]->ha_update_row(old_data, new_data);
4048  reenable_binlog(thd);
4049  goto exit;
4050  }
4051  else
4052  {
4053  Field *saved_next_number_field= table->next_number_field;
4054  /*
4055  Don't allow generation of auto_increment value for update.
4056  table->next_number_field is never set on UPDATE.
4057  But is set for INSERT ... ON DUPLICATE KEY UPDATE,
4058  and since update_row() does not generate or update an auto_inc value,
4059  we cannot have next_number_field set when moving a row
4060  to another partition with write_row(), since that could
4061  generate/update the auto_inc value.
4062  This gives the same behavior for partitioned vs non partitioned tables.
4063  */
4064  table->next_number_field= NULL;
4065  DBUG_PRINT("info", ("Update from partition %d to partition %d",
4066  old_part_id, new_part_id));
4067  tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4068  error= m_file[new_part_id]->ha_write_row(new_data);
4069  reenable_binlog(thd);
4070  table->next_number_field= saved_next_number_field;
4071  if (error)
4072  goto exit;
4073 
4074  tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4075  error= m_file[old_part_id]->ha_delete_row(old_data);
4076  reenable_binlog(thd);
4077  if (error)
4078  {
4079 #ifdef IN_THE_FUTURE
4080  (void) m_file[new_part_id]->delete_last_inserted_row(new_data);
4081 #endif
4082  goto exit;
4083  }
4084  }
4085 
4086 exit:
4087  /*
4088  if updating an auto_increment column, update
4089  part_share->next_auto_inc_val if needed.
4090  (not to be used if auto_increment on secondary field in a multi-column
4091  index)
4092  mysql_update does not set table->next_number_field, so we use
4093  table->found_next_number_field instead.
4094  Also checking that the field is marked in the write set.
4095  */
4096  if (table->found_next_number_field &&
4097  new_data == table->record[0] &&
4098  !table->s->next_number_keypart &&
4099  bitmap_is_set(table->write_set,
4100  table->found_next_number_field->field_index))
4101  {
4102  if (!part_share->auto_inc_initialized)
4103  info(HA_STATUS_AUTO);
4104  set_auto_increment_if_higher(table->found_next_number_field);
4105  }
4106  DBUG_RETURN(error);
4107 }
4108 
4109 
4110 /*
4111  Remove an existing row
4112 
4113  SYNOPSIS
4114  delete_row
4115  buf Deleted row in MySQL Row Format
4116 
4117  RETURN VALUE
4118  >0 Error Code
4119  0 Success
4120 
4121  DESCRIPTION
4122  This will delete a row. buf will contain a copy of the row to be deleted.
4123  The server will call this right after the current row has been read
4124  (from either a previous rnd_xxx() or index_xxx() call).
4125  If you keep a pointer to the last row or can access a primary key it will
4126  make doing the deletion quite a bit easier.
4127  Keep in mind that the server does no guarentee consecutive deletions.
4128  ORDER BY clauses can be used.
4129 
4130  Called in sql_acl.cc and sql_udf.cc to manage internal table information.
4131  Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
4132  it is used for removing duplicates while in insert it is used for REPLACE
4133  calls.
4134 
4135  buf is either record[0] or record[1]
4136 */
4137 
4138 int ha_partition::delete_row(const uchar *buf)
4139 {
4140  uint32 part_id;
4141  int error;
4142  THD *thd= ha_thd();
4143  DBUG_ENTER("ha_partition::delete_row");
4144  m_err_rec= NULL;
4145 
4146  DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4147  table->read_set));
4148  if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id)))
4149  {
4150  DBUG_RETURN(error);
4151  }
4152  /* Should never call delete_row on a partition which is not read */
4153  DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
4154  DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4155  if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
4156  DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS);
4157 
4158  /*
4159  The protocol for deleting a row is:
4160  1) position the handler (cursor) on the row to be deleted,
4161  either through the last read row (rnd or index) or by rnd_pos.
4162  2) call delete_row with the full record as argument.
4163 
4164  This means that m_last_part should already be set to actual partition
4165  where the row was read from. And if that is not the same as the
4166  calculated part_id we found a misplaced row, we return an error to
4167  notify the user that something is broken in the row distribution
4168  between partitions! Since we don't check all rows on read, we return an
4169  error instead of forwarding the delete to the correct (m_last_part)
4170  partition!
4171 
4172  Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4173  so this is not supported for this engine.
4174 
4175  TODO: change the assert in InnoDB into an error instead and make this one
4176  an assert instead and remove the get_part_for_delete()!
4177  */
4178  if (part_id != m_last_part)
4179  {
4180  m_err_rec= buf;
4181  DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
4182  }
4183 
4184  m_last_part= part_id;
4185  tmp_disable_binlog(thd);
4186  error= m_file[part_id]->ha_delete_row(buf);
4187  reenable_binlog(thd);
4188  DBUG_RETURN(error);
4189 }
4190 
4191 
4192 /*
4193  Delete all rows in a table
4194 
4195  SYNOPSIS
4196  delete_all_rows()
4197 
4198  RETURN VALUE
4199  >0 Error Code
4200  0 Success
4201 
4202  DESCRIPTION
4203  Used to delete all rows in a table. Both for cases of truncate and
4204  for cases where the optimizer realizes that all rows will be
4205  removed as a result of a SQL statement.
4206 
4207  Called from item_sum.cc by Item_func_group_concat::clear(),
4208  Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4209  Called from sql_delete.cc by mysql_delete().
4210  Called from sql_select.cc by JOIN::reset().
4211  Called from sql_union.cc by st_select_lex_unit::exec().
4212 */
4213 
4215 {
4216  int error;
4217  uint i;
4218  DBUG_ENTER("ha_partition::delete_all_rows");
4219 
4220  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4221  i < m_tot_parts;
4222  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4223  {
4224  /* Can be pruned, like DELETE FROM t PARTITION (pX) */
4225  if ((error= m_file[i]->ha_delete_all_rows()))
4226  DBUG_RETURN(error);
4227  }
4228  DBUG_RETURN(0);
4229 }
4230 
4231 
4240 {
4241  int error;
4242  handler **file;
4243  DBUG_ENTER("ha_partition::truncate");
4244 
4245  /*
4246  TRUNCATE also means resetting auto_increment. Hence, reset
4247  it so that it will be initialized again at the next use.
4248  */
4249  lock_auto_increment();
4250  part_share->next_auto_inc_val= 0;
4251  part_share->auto_inc_initialized= false;
4252  unlock_auto_increment();
4253 
4254  file= m_file;
4255  do
4256  {
4257  if ((error= (*file)->ha_truncate()))
4258  DBUG_RETURN(error);
4259  } while (*(++file));
4260  DBUG_RETURN(0);
4261 }
4262 
4263 
4272 int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt)
4273 {
4274  int error= 0;
4275  List_iterator<partition_element> part_it(m_part_info->partitions);
4276  uint num_parts= m_part_info->num_parts;
4277  uint num_subparts= m_part_info->num_subparts;
4278  uint i= 0;
4279  DBUG_ENTER("ha_partition::truncate_partition");
4280 
4281  /* Only binlog when it starts any call to the partitions handlers */
4282  *binlog_stmt= false;
4283 
4284  if (set_part_state(alter_info, m_part_info, PART_ADMIN))
4285  DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
4286 
4287  /*
4288  TRUNCATE also means resetting auto_increment. Hence, reset
4289  it so that it will be initialized again at the next use.
4290  */
4291  lock_auto_increment();
4292  part_share->next_auto_inc_val= 0;
4293  part_share->auto_inc_initialized= FALSE;
4294  unlock_auto_increment();
4295 
4296  *binlog_stmt= true;
4297 
4298  do
4299  {
4300  partition_element *part_elem= part_it++;
4301  if (part_elem->part_state == PART_ADMIN)
4302  {
4303  if (m_is_sub_partitioned)
4304  {
4306  subpart_it(part_elem->subpartitions);
4307  partition_element *sub_elem;
4308  uint j= 0, part;
4309  do
4310  {
4311  sub_elem= subpart_it++;
4312  part= i * num_subparts + j;
4313  DBUG_PRINT("info", ("truncate subpartition %u (%s)",
4314  part, sub_elem->partition_name));
4315  if ((error= m_file[part]->ha_truncate()))
4316  break;
4317  sub_elem->part_state= PART_NORMAL;
4318  } while (++j < num_subparts);
4319  }
4320  else
4321  {
4322  DBUG_PRINT("info", ("truncate partition %u (%s)", i,
4323  part_elem->partition_name));
4324  error= m_file[i]->ha_truncate();
4325  }
4326  part_elem->part_state= PART_NORMAL;
4327  }
4328  } while (!error && (++i < num_parts));
4329  DBUG_RETURN(error);
4330 }
4331 
4332 
4333 /*
4334  Start a large batch of insert rows
4335 
4336  SYNOPSIS
4337  start_bulk_insert()
4338  rows Number of rows to insert
4339 
4340  RETURN VALUE
4341  NONE
4342 
4343  DESCRIPTION
4344  rows == 0 means we will probably insert many rows
4345 */
4346 void ha_partition::start_bulk_insert(ha_rows rows)
4347 {
4348  DBUG_ENTER("ha_partition::start_bulk_insert");
4349 
4350  m_bulk_inserted_rows= 0;
4351  bitmap_clear_all(&m_bulk_insert_started);
4352  /* use the last bit for marking if bulk_insert_started was called */
4353  bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
4354  DBUG_VOID_RETURN;
4355 }
4356 
4357 
4358 /*
4359  Check if start_bulk_insert has been called for this partition,
4360  if not, call it and mark it called
4361 */
4362 void ha_partition::start_part_bulk_insert(THD *thd, uint part_id)
4363 {
4364  long old_buffer_size;
4365  if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
4366  bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4367  {
4368  DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4369  old_buffer_size= thd->variables.read_buff_size;
4370  /* Update read_buffer_size for this partition */
4371  thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
4372  m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
4373  bitmap_set_bit(&m_bulk_insert_started, part_id);
4374  thd->variables.read_buff_size= old_buffer_size;
4375  }
4376  m_bulk_inserted_rows++;
4377 }
4378 
4379 /*
4380  Estimate the read buffer size for each partition.
4381  SYNOPSIS
4382  ha_partition::estimate_read_buffer_size()
4383  original_size read buffer size originally set for the server
4384  RETURN VALUE
4385  estimated buffer size.
4386  DESCRIPTION
4387  If the estimated number of rows to insert is less than 10 (but not 0)
4388  the new buffer size is same as original buffer size.
4389  In case of first partition of when partition function is monotonic
4390  new buffer size is same as the original buffer size.
4391  For rest of the partition total buffer of 10*original_size is divided
4392  equally if number of partition is more than 10 other wise each partition
4393  will be allowed to use original buffer size.
4394 */
4395 long ha_partition::estimate_read_buffer_size(long original_size)
4396 {
4397  /*
4398  If number of rows to insert is less than 10, but not 0,
4399  return original buffer size.
4400  */
4401  if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
4402  return (original_size);
4403  /*
4404  If first insert/partition and monotonic partition function,
4405  allow using buffer size originally set.
4406  */
4407  if (!m_bulk_inserted_rows &&
4408  m_part_func_monotonicity_info != NON_MONOTONIC &&
4409  m_tot_parts > 1)
4410  return original_size;
4411  /*
4412  Allow total buffer used in all partition to go up to 10*read_buffer_size.
4413  11*read_buffer_size in case of monotonic partition function.
4414  */
4415 
4416  if (m_tot_parts < 10)
4417  return original_size;
4418  return (original_size * 10 / m_tot_parts);
4419 }
4420 
4421 /*
4422  Try to predict the number of inserts into this partition.
4423 
4424  If less than 10 rows (including 0 which means Unknown)
4425  just give that as a guess
4426  If monotonic partitioning function was used
4427  guess that 50 % of the inserts goes to the first partition
4428  For all other cases, guess on equal distribution between the partitions
4429 */
4430 ha_rows ha_partition::guess_bulk_insert_rows()
4431 {
4432  DBUG_ENTER("guess_bulk_insert_rows");
4433 
4434  if (estimation_rows_to_insert < 10)
4435  DBUG_RETURN(estimation_rows_to_insert);
4436 
4437  /* If first insert/partition and monotonic partition function, guess 50%. */
4438  if (!m_bulk_inserted_rows &&
4439  m_part_func_monotonicity_info != NON_MONOTONIC &&
4440  m_tot_parts > 1)
4441  DBUG_RETURN(estimation_rows_to_insert / 2);
4442 
4443  /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
4444  if (m_bulk_inserted_rows < estimation_rows_to_insert)
4445  DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
4446  / m_tot_parts) + 1);
4447  /* The estimation was wrong, must say 'Unknown' */
4448  DBUG_RETURN(0);
4449 }
4450 
4451 
4460 int ha_partition::end_bulk_insert()
4461 {
4462  int error= 0;
4463  uint i;
4464  DBUG_ENTER("ha_partition::end_bulk_insert");
4465 
4466  if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4467  {
4468  DBUG_ASSERT(0);
4469  DBUG_RETURN(error);
4470  }
4471 
4472  for (i= bitmap_get_first_set(&m_bulk_insert_started);
4473  i < m_tot_parts;
4474  i= bitmap_get_next_set(&m_bulk_insert_started, i))
4475  {
4476  int tmp;
4477  if ((tmp= m_file[i]->ha_end_bulk_insert()))
4478  error= tmp;
4479  }
4480  bitmap_clear_all(&m_bulk_insert_started);
4481  DBUG_RETURN(error);
4482 }
4483 
4484 
4485 /****************************************************************************
4486  MODULE full table scan
4487 ****************************************************************************/
4488 /*
4489  Initialize engine for random reads
4490 
4491  SYNOPSIS
4492  ha_partition::rnd_init()
4493  scan 0 Initialize for random reads through rnd_pos()
4494  1 Initialize for random scan through rnd_next()
4495 
4496  RETURN VALUE
4497  >0 Error code
4498  0 Success
4499 
4500  DESCRIPTION
4501  rnd_init() is called when the server wants the storage engine to do a
4502  table scan or when the server wants to access data through rnd_pos.
4503 
4504  When scan is used we will scan one handler partition at a time.
4505  When preparing for rnd_pos we will init all handler partitions.
4506  No extra cache handling is needed when scannning is not performed.
4507 
4508  Before initialising we will call rnd_end to ensure that we clean up from
4509  any previous incarnation of a table scan.
4510  Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
4511  sql_table.cc, and sql_update.cc.
4512 */
4513 
4514 int ha_partition::rnd_init(bool scan)
4515 {
4516  int error;
4517  uint i= 0;
4518  uint32 part_id;
4519  DBUG_ENTER("ha_partition::rnd_init");
4520 
4521  /*
4522  For operations that may need to change data, we may need to extend
4523  read_set.
4524  */
4525  if (get_lock_type() == F_WRLCK)
4526  {
4527  /*
4528  If write_set contains any of the fields used in partition and
4529  subpartition expression, we need to set all bits in read_set because
4530  the row may need to be inserted in a different [sub]partition. In
4531  other words update_row() can be converted into write_row(), which
4532  requires a complete record.
4533  */
4534  if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
4535  table->write_set))
4536  bitmap_set_all(table->read_set);
4537  else
4538  {
4539  /*
4540  Some handlers only read fields as specified by the bitmap for the
4541  read set. For partitioned handlers we always require that the
4542  fields of the partition functions are read such that we can
4543  calculate the partition id to place updated and deleted records.
4544  */
4545  bitmap_union(table->read_set, &m_part_info->full_part_field_set);
4546  }
4547  }
4548 
4549  /* Now we see what the index of our first important partition is */
4550  DBUG_PRINT("info", ("m_part_info->read_partitions: 0x%lx",
4551  (long) m_part_info->read_partitions.bitmap));
4552  part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
4553  DBUG_PRINT("info", ("m_part_spec.start_part %d", part_id));
4554 
4555  if (MY_BIT_NONE == part_id)
4556  {
4557  error= 0;
4558  goto err1;
4559  }
4560 
4561  /*
4562  We have a partition and we are scanning with rnd_next
4563  so we bump our cache
4564  */
4565  DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
4566  if (scan)
4567  {
4568  /*
4569  rnd_end() is needed for partitioning to reset internal data if scan
4570  is already in use
4571  */
4572  rnd_end();
4573  late_extra_cache(part_id);
4574  if ((error= m_file[part_id]->ha_rnd_init(scan)))
4575  goto err;
4576  }
4577  else
4578  {
4579  for (i= part_id;
4580  i < m_tot_parts;
4581  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4582  {
4583  if ((error= m_file[i]->ha_rnd_init(scan)))
4584  goto err;
4585  }
4586  }
4587  m_scan_value= scan;
4588  m_part_spec.start_part= part_id;
4589  m_part_spec.end_part= m_tot_parts - 1;
4590  DBUG_PRINT("info", ("m_scan_value=%d", m_scan_value));
4591  DBUG_RETURN(0);
4592 
4593 err:
4594  /* Call rnd_end for all previously inited partitions. */
4595  for (;
4596  part_id < i;
4597  part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id))
4598  {
4599  m_file[part_id]->ha_rnd_end();
4600  }
4601 err1:
4602  m_scan_value= 2;
4603  m_part_spec.start_part= NO_CURRENT_PART_ID;
4604  DBUG_RETURN(error);
4605 }
4606 
4607 
4608 /*
4609  End of a table scan
4610 
4611  SYNOPSIS
4612  rnd_end()
4613 
4614  RETURN VALUE
4615  >0 Error code
4616  0 Success
4617 */
4618 
4619 int ha_partition::rnd_end()
4620 {
4621  DBUG_ENTER("ha_partition::rnd_end");
4622  switch (m_scan_value) {
4623  case 2: // Error
4624  break;
4625  case 1:
4626  if (NO_CURRENT_PART_ID != m_part_spec.start_part) // Table scan
4627  {
4628  late_extra_no_cache(m_part_spec.start_part);
4629  m_file[m_part_spec.start_part]->ha_rnd_end();
4630  }
4631  break;
4632  case 0:
4633  uint i;
4634  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4635  i < m_tot_parts;
4636  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4637  {
4638  m_file[i]->ha_rnd_end();
4639  }
4640  break;
4641  }
4642  m_scan_value= 2;
4643  m_part_spec.start_part= NO_CURRENT_PART_ID;
4644  DBUG_RETURN(0);
4645 }
4646 
4647 /*
4648  read next row during full table scan (scan in random row order)
4649 
4650  SYNOPSIS
4651  rnd_next()
4652  buf buffer that should be filled with data
4653 
4654  RETURN VALUE
4655  >0 Error code
4656  0 Success
4657 
4658  DESCRIPTION
4659  This is called for each row of the table scan. When you run out of records
4660  you should return HA_ERR_END_OF_FILE.
4661  The Field structure for the table is the key to getting data into buf
4662  in a manner that will allow the server to understand it.
4663 
4664  Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
4665  sql_table.cc, and sql_update.cc.
4666 */
4667 
4668 int ha_partition::rnd_next(uchar *buf)
4669 {
4670  handler *file;
4671  int result= HA_ERR_END_OF_FILE;
4672  uint part_id= m_part_spec.start_part;
4673  DBUG_ENTER("ha_partition::rnd_next");
4674 
4675  if (NO_CURRENT_PART_ID == part_id)
4676  {
4677  /*
4678  The original set of partitions to scan was empty and thus we report
4679  the result here.
4680  */
4681  goto end;
4682  }
4683 
4684  DBUG_ASSERT(m_scan_value == 1);
4685  file= m_file[part_id];
4686 
4687  while (TRUE)
4688  {
4689  result= file->ha_rnd_next(buf);
4690  if (!result)
4691  {
4692  m_last_part= part_id;
4693  m_part_spec.start_part= part_id;
4694  table->status= 0;
4695  DBUG_RETURN(0);
4696  }
4697 
4698  /*
4699  if we get here, then the current partition ha_rnd_next returned failure
4700  */
4701  if (result == HA_ERR_RECORD_DELETED)
4702  continue; // Probably MyISAM
4703 
4704  if (result != HA_ERR_END_OF_FILE)
4705  goto end_dont_reset_start_part; // Return error
4706 
4707  /* End current partition */
4708  late_extra_no_cache(part_id);
4709  DBUG_PRINT("info", ("rnd_end on partition %d", part_id));
4710  if ((result= file->ha_rnd_end()))
4711  break;
4712 
4713  /* Shift to next partition */
4714  part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id);
4715  if (part_id >= m_tot_parts)
4716  {
4717  result= HA_ERR_END_OF_FILE;
4718  break;
4719  }
4720  m_last_part= part_id;
4721  m_part_spec.start_part= part_id;
4722  file= m_file[part_id];
4723  DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
4724  if ((result= file->ha_rnd_init(1)))
4725  break;
4726  late_extra_cache(part_id);
4727  }
4728 
4729 end:
4730  m_part_spec.start_part= NO_CURRENT_PART_ID;
4731 end_dont_reset_start_part:
4732  table->status= STATUS_NOT_FOUND;
4733  DBUG_RETURN(result);
4734 }
4735 
4736 
4737 /*
4738  Save position of current row
4739 
4740  SYNOPSIS
4741  position()
4742  record Current record in MySQL Row Format
4743 
4744  RETURN VALUE
4745  NONE
4746 
4747  DESCRIPTION
4748  position() is called after each call to rnd_next() if the data needs
4749  to be ordered. You can do something like the following to store
4750  the position:
4751  ha_store_ptr(ref, ref_length, current_position);
4752 
4753  The server uses ref to store data. ref_length in the above case is
4754  the size needed to store current_position. ref is just a byte array
4755  that the server will maintain. If you are using offsets to mark rows, then
4756  current_position should be the offset. If it is a primary key like in
4757  BDB, then it needs to be a primary key.
4758 
4759  Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
4760 */
4761 
4762 void ha_partition::position(const uchar *record)
4763 {
4764  handler *file= m_file[m_last_part];
4765  uint pad_length;
4766  DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4767  DBUG_ENTER("ha_partition::position");
4768 
4769  file->position(record);
4770  int2store(ref, m_last_part);
4771  memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, file->ref_length);
4772  pad_length= m_ref_length - PARTITION_BYTES_IN_POS - file->ref_length;
4773  if (pad_length)
4774  memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length);
4775 
4776  DBUG_VOID_RETURN;
4777 }
4778 
4779 
4780 /*
4781  Read row using position
4782 
4783  SYNOPSIS
4784  rnd_pos()
4785  out:buf Row read in MySQL Row Format
4786  position Position of read row
4787 
4788  RETURN VALUE
4789  >0 Error code
4790  0 Success
4791 
4792  DESCRIPTION
4793  This is like rnd_next, but you are given a position to use
4794  to determine the row. The position will be of the type that you stored in
4795  ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
4796  or position you saved when position() was called.
4797  Called from filesort.cc records.cc sql_insert.cc sql_select.cc
4798  sql_update.cc.
4799 */
4800 
4801 int ha_partition::rnd_pos(uchar * buf, uchar *pos)
4802 {
4803  uint part_id;
4804  handler *file;
4805  DBUG_ENTER("ha_partition::rnd_pos");
4806 
4807  part_id= uint2korr((const uchar *) pos);
4808  DBUG_ASSERT(part_id < m_tot_parts);
4809  file= m_file[part_id];
4810  DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
4811  m_last_part= part_id;
4812  DBUG_RETURN(file->ha_rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
4813 }
4814 
4815 
4816 /*
4817  Read row using position using given record to find
4818 
4819  SYNOPSIS
4820  rnd_pos_by_record()
4821  record Current record in MySQL Row Format
4822 
4823  RETURN VALUE
4824  >0 Error code
4825  0 Success
4826 
4827  DESCRIPTION
4828  this works as position()+rnd_pos() functions, but does some extra work,
4829  calculating m_last_part - the partition to where the 'record'
4830  should go.
4831 
4832  called from replication (log_event.cc)
4833 */
4834 
4835 int ha_partition::rnd_pos_by_record(uchar *record)
4836 {
4837  DBUG_ENTER("ha_partition::rnd_pos_by_record");
4838 
4839  if (unlikely(get_part_for_delete(record, m_rec0, m_part_info, &m_last_part)))
4840  DBUG_RETURN(1);
4841 
4842  DBUG_RETURN(handler::rnd_pos_by_record(record));
4843 }
4844 
4845 
4846 /****************************************************************************
4847  MODULE index scan
4848 ****************************************************************************/
4849 /*
4850  Positions an index cursor to the index specified in the handle. Fetches the
4851  row if available. If the key value is null, begin at the first key of the
4852  index.
4853 
4854  There are loads of optimisations possible here for the partition handler.
4855  The same optimisations can also be checked for full table scan although
4856  only through conditions and not from index ranges.
4857  Phase one optimisations:
4858  Check if the fields of the partition function are bound. If so only use
4859  the single partition it becomes bound to.
4860  Phase two optimisations:
4861  If it can be deducted through range or list partitioning that only a
4862  subset of the partitions are used, then only use those partitions.
4863 */
4864 
4865 
4870 bool ha_partition::init_record_priority_queue()
4871 {
4872  DBUG_ENTER("ha_partition::init_record_priority_queue");
4873  DBUG_ASSERT(!m_ordered_rec_buffer);
4874  /*
4875  Initialize the ordered record buffer.
4876  */
4877  if (!m_ordered_rec_buffer)
4878  {
4879  uint alloc_len;
4880  uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
4881  /* Allocate record buffer for each used partition. */
4882  alloc_len= used_parts * (m_rec_length + PARTITION_BYTES_IN_POS);
4883  /* Allocate a key for temporary use when setting up the scan. */
4884  alloc_len+= table_share->max_key_length;
4885 
4886  if (!(m_ordered_rec_buffer= (uchar*)my_malloc(alloc_len, MYF(MY_WME))))
4887  DBUG_RETURN(true);
4888 
4889  /*
4890  We set-up one record per partition and each record has 2 bytes in
4891  front where the partition id is written. This is used by ordered
4892  index_read.
4893  We also set-up a reference to the first record for temporary use in
4894  setting up the scan.
4895  */
4896  char *ptr= (char*) m_ordered_rec_buffer;
4897  uint i;
4898  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4899  i < m_tot_parts;
4900  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4901  {
4902  DBUG_PRINT("info", ("init rec-buf for part %u", i));
4903  int2store(ptr, i);
4904  ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
4905  }
4906  m_start_key.key= (const uchar*)ptr;
4907  /* Initialize priority queue, initialized to reading forward. */
4908  if (init_queue(&m_queue, used_parts, (uint) PARTITION_BYTES_IN_POS,
4909  0, key_rec_cmp, (void*)m_curr_key_info))
4910  {
4911  my_free(m_ordered_rec_buffer);
4912  m_ordered_rec_buffer= NULL;
4913  DBUG_RETURN(true);
4914  }
4915  }
4916  DBUG_RETURN(false);
4917 }
4918 
4919 
4924 void ha_partition::destroy_record_priority_queue()
4925 {
4926  DBUG_ENTER("ha_partition::destroy_record_priority_queue");
4927  if (m_ordered_rec_buffer)
4928  {
4929  delete_queue(&m_queue);
4930  my_free(m_ordered_rec_buffer);
4931  m_ordered_rec_buffer= NULL;
4932  }
4933  DBUG_VOID_RETURN;
4934 }
4935 
4936 
4937 /*
4938  Initialize handler before start of index scan
4939 
4940  SYNOPSIS
4941  index_init()
4942  inx Index number
4943  sorted Is rows to be returned in sorted order
4944 
4945  RETURN VALUE
4946  >0 Error code
4947  0 Success
4948 
4949  DESCRIPTION
4950  index_init is always called before starting index scans (except when
4951  starting through index_read_idx and using read_range variants).
4952 */
4953 
4954 int ha_partition::index_init(uint inx, bool sorted)
4955 {
4956  int error= 0;
4957  uint i;
4958  DBUG_ENTER("ha_partition::index_init");
4959 
4960  DBUG_PRINT("info", ("inx %u sorted %u", inx, sorted));
4961  active_index= inx;
4962  m_part_spec.start_part= NO_CURRENT_PART_ID;
4963  m_start_key.length= 0;
4964  m_ordered= sorted;
4965  m_curr_key_info[0]= table->key_info+inx;
4966  if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
4967  {
4968  /*
4969  if PK is clustered, then the key cmp must use the pk to
4970  differentiate between equal key in given index.
4971  */
4972  DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
4973  m_curr_key_info[1]= table->key_info+table->s->primary_key;
4974  m_curr_key_info[2]= NULL;
4975  }
4976  else
4977  m_curr_key_info[1]= NULL;
4978 
4979  if (init_record_priority_queue())
4980  DBUG_RETURN(HA_ERR_OUT_OF_MEM);
4981 
4982  /*
4983  Some handlers only read fields as specified by the bitmap for the
4984  read set. For partitioned handlers we always require that the
4985  fields of the partition functions are read such that we can
4986  calculate the partition id to place updated and deleted records.
4987  But this is required for operations that may need to change data only.
4988  */
4989  if (get_lock_type() == F_WRLCK)
4990  bitmap_union(table->read_set, &m_part_info->full_part_field_set);
4991  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4992  i < m_tot_parts;
4993  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4994  {
4995  if ((error= m_file[i]->ha_index_init(inx, sorted)))
4996  goto err;
4997 
4998  DBUG_EXECUTE_IF("ha_partition_fail_index_init", {
4999  i++;
5000  error= HA_ERR_NO_PARTITION_FOUND;
5001  goto err;
5002  });
5003  }
5004 err:
5005  if (error)
5006  {
5007  /* End the previously initialized indexes. */
5008  uint j;
5009  for (j= bitmap_get_first_set(&m_part_info->read_partitions);
5010  j < i;
5011  j= bitmap_get_next_set(&m_part_info->read_partitions, j))
5012  {
5013  (void) m_file[j]->ha_index_end();
5014  }
5015  }
5016  DBUG_RETURN(error);
5017 }
5018 
5019 
5020 /*
5021  End of index scan
5022 
5023  SYNOPSIS
5024  index_end()
5025 
5026  RETURN VALUE
5027  >0 Error code
5028  0 Success
5029 
5030  DESCRIPTION
5031  index_end is called at the end of an index scan to clean up any
5032  things needed to clean up.
5033 */
5034 
5035 int ha_partition::index_end()
5036 {
5037  int error= 0;
5038  uint i;
5039  DBUG_ENTER("ha_partition::index_end");
5040 
5041  active_index= MAX_KEY;
5042  m_part_spec.start_part= NO_CURRENT_PART_ID;
5043  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5044  i < m_tot_parts;
5045  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5046  {
5047  int tmp;
5048  if ((tmp= m_file[i]->ha_index_end()))
5049  error= tmp;
5050  }
5051  destroy_record_priority_queue();
5052  DBUG_RETURN(error);
5053 }
5054 
5055 
5056 /*
5057  Read one record in an index scan and start an index scan
5058 
5059  SYNOPSIS
5060  index_read_map()
5061  buf Read row in MySQL Row Format
5062  key Key parts in consecutive order
5063  keypart_map Which part of key is used
5064  find_flag What type of key condition is used
5065 
5066  RETURN VALUE
5067  >0 Error code
5068  0 Success
5069 
5070  DESCRIPTION
5071  index_read_map starts a new index scan using a start key. The MySQL Server
5072  will check the end key on its own. Thus to function properly the
5073  partitioned handler need to ensure that it delivers records in the sort
5074  order of the MySQL Server.
5075  index_read_map can be restarted without calling index_end on the previous
5076  index scan and without calling index_init. In this case the index_read_map
5077  is on the same index as the previous index_scan. This is particularly
5078  used in conjuntion with multi read ranges.
5079 */
5080 
5081 int ha_partition::index_read_map(uchar *buf, const uchar *key,
5082  key_part_map keypart_map,
5083  enum ha_rkey_function find_flag)
5084 {
5085  DBUG_ENTER("ha_partition::index_read_map");
5086  end_range= 0;
5087  m_index_scan_type= partition_index_read;
5088  m_start_key.key= key;
5089  m_start_key.keypart_map= keypart_map;
5090  m_start_key.flag= find_flag;
5091  DBUG_RETURN(common_index_read(buf, TRUE));
5092 }
5093 
5094 
5124 int ha_partition::common_index_read(uchar *buf, bool have_start_key)
5125 {
5126  int error;
5127  uint UNINIT_VAR(key_len); /* used if have_start_key==TRUE */
5128  bool reverse_order= FALSE;
5129  DBUG_ENTER("ha_partition::common_index_read");
5130 
5131  DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u",
5132  m_ordered, m_ordered_scan_ongoing));
5133 
5134  if (have_start_key)
5135  {
5136  m_start_key.length= key_len= calculate_key_len(table, active_index,
5137  m_start_key.key,
5138  m_start_key.keypart_map);
5139  DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u",
5140  m_start_key.keypart_map, m_start_key.flag, key_len));
5141  DBUG_ASSERT(key_len);
5142  }
5143  if ((error= partition_scan_set_up(buf, have_start_key)))
5144  {
5145  DBUG_RETURN(error);
5146  }
5147 
5148  if (have_start_key &&
5149  (m_start_key.flag == HA_READ_PREFIX_LAST ||
5150  m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
5151  m_start_key.flag == HA_READ_BEFORE_KEY))
5152  {
5153  reverse_order= TRUE;
5154  m_ordered_scan_ongoing= TRUE;
5155  }
5156  DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
5157  m_ordered, m_ordered_scan_ongoing, have_start_key));
5158  if (!m_ordered_scan_ongoing)
5159  {
5160  /*
5161  We use unordered index scan when read_range is used and flag
5162  is set to not use ordered.
5163  We also use an unordered index scan when the number of partitions to
5164  scan is only one.
5165  The unordered index scan will use the partition set created.
5166  */
5167  DBUG_PRINT("info", ("doing unordered scan"));
5168  error= handle_unordered_scan_next_partition(buf);
5169  }
5170  else
5171  {
5172  /*
5173  In all other cases we will use the ordered index scan. This will use
5174  the partition set created by the get_partition_set method.
5175  */
5176  error= handle_ordered_index_scan(buf, reverse_order);
5177  }
5178  DBUG_RETURN(error);
5179 }
5180 
5181 
5182 /*
5183  Start an index scan from leftmost record and return first record
5184 
5185  SYNOPSIS
5186  index_first()
5187  buf Read row in MySQL Row Format
5188 
5189  RETURN VALUE
5190  >0 Error code
5191  0 Success
5192 
5193  DESCRIPTION
5194  index_first() asks for the first key in the index.
5195  This is similar to index_read except that there is no start key since
5196  the scan starts from the leftmost entry and proceeds forward with
5197  index_next.
5198 
5199  Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5200  and sql_select.cc.
5201 */
5202 
5203 int ha_partition::index_first(uchar * buf)
5204 {
5205  DBUG_ENTER("ha_partition::index_first");
5206 
5207  end_range= 0;
5208  m_index_scan_type= partition_index_first;
5209  DBUG_RETURN(common_first_last(buf));
5210 }
5211 
5212 
5213 /*
5214  Start an index scan from rightmost record and return first record
5215 
5216  SYNOPSIS
5217  index_last()
5218  buf Read row in MySQL Row Format
5219 
5220  RETURN VALUE
5221  >0 Error code
5222  0 Success
5223 
5224  DESCRIPTION
5225  index_last() asks for the last key in the index.
5226  This is similar to index_read except that there is no start key since
5227  the scan starts from the rightmost entry and proceeds forward with
5228  index_prev.
5229 
5230  Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5231  and sql_select.cc.
5232 */
5233 
5234 int ha_partition::index_last(uchar * buf)
5235 {
5236  DBUG_ENTER("ha_partition::index_last");
5237 
5238  m_index_scan_type= partition_index_last;
5239  DBUG_RETURN(common_first_last(buf));
5240 }
5241 
5242 /*
5243  Common routine for index_first/index_last
5244 
5245  SYNOPSIS
5246  ha_partition::common_first_last()
5247 
5248  see index_first for rest
5249 */
5250 
5251 int ha_partition::common_first_last(uchar *buf)
5252 {
5253  int error;
5254 
5255  if ((error= partition_scan_set_up(buf, FALSE)))
5256  return error;
5257  if (!m_ordered_scan_ongoing &&
5258  m_index_scan_type != partition_index_last)
5259  return handle_unordered_scan_next_partition(buf);
5260  return handle_ordered_index_scan(buf, FALSE);
5261 }
5262 
5263 
5264 /*
5265  Read last using key
5266 
5267  SYNOPSIS
5268  index_read_last_map()
5269  buf Read row in MySQL Row Format
5270  key Key
5271  keypart_map Which part of key is used
5272 
5273  RETURN VALUE
5274  >0 Error code
5275  0 Success
5276 
5277  DESCRIPTION
5278  This is used in join_read_last_key to optimise away an ORDER BY.
5279  Can only be used on indexes supporting HA_READ_ORDER
5280 */
5281 
5282 int ha_partition::index_read_last_map(uchar *buf, const uchar *key,
5283  key_part_map keypart_map)
5284 {
5285  DBUG_ENTER("ha_partition::index_read_last_map");
5286 
5287  m_ordered= TRUE; // Safety measure
5288  end_range= 0;
5289  m_index_scan_type= partition_index_read_last;
5290  m_start_key.key= key;
5291  m_start_key.keypart_map= keypart_map;
5292  m_start_key.flag= HA_READ_PREFIX_LAST;
5293  DBUG_RETURN(common_index_read(buf, TRUE));
5294 }
5295 
5296 
5297 /*
5298  Optimization of the default implementation to take advantage of dynamic
5299  partition pruning.
5300 */
5301 int ha_partition::index_read_idx_map(uchar *buf, uint index,
5302  const uchar *key,
5303  key_part_map keypart_map,
5304  enum ha_rkey_function find_flag)
5305 {
5306  int error= HA_ERR_KEY_NOT_FOUND;
5307  DBUG_ENTER("ha_partition::index_read_idx_map");
5308 
5309  if (find_flag == HA_READ_KEY_EXACT)
5310  {
5311  uint part;
5312  m_start_key.key= key;
5313  m_start_key.keypart_map= keypart_map;
5314  m_start_key.flag= find_flag;
5315  m_start_key.length= calculate_key_len(table, index, m_start_key.key,
5316  m_start_key.keypart_map);
5317 
5318  get_partition_set(table, buf, index, &m_start_key, &m_part_spec);
5319 
5320  /*
5321  We have either found exactly 1 partition
5322  (in which case start_part == end_part)
5323  or no matching partitions (start_part > end_part)
5324  */
5325  DBUG_ASSERT(m_part_spec.start_part >= m_part_spec.end_part);
5326  /* The start part is must be marked as used. */
5327  DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part ||
5328  bitmap_is_set(&(m_part_info->read_partitions),
5329  m_part_spec.start_part));
5330 
5331  for (part= m_part_spec.start_part;
5332  part <= m_part_spec.end_part;
5333  part= bitmap_get_next_set(&m_part_info->read_partitions, part))
5334  {
5335  error= m_file[part]->ha_index_read_idx_map(buf, index, key,
5336  keypart_map, find_flag);
5337  if (error != HA_ERR_KEY_NOT_FOUND &&
5338  error != HA_ERR_END_OF_FILE)
5339  break;
5340  }
5341  if (part <= m_part_spec.end_part)
5342  m_last_part= part;
5343  }
5344  else
5345  {
5346  /*
5347  If not only used with READ_EXACT, we should investigate if possible
5348  to optimize for other find_flag's as well.
5349  */
5350  DBUG_ASSERT(0);
5351  /* fall back on the default implementation */
5352  error= handler::index_read_idx_map(buf, index, key, keypart_map, find_flag);
5353  }
5354  DBUG_RETURN(error);
5355 }
5356 
5357 
5358 /*
5359  Read next record in a forward index scan
5360 
5361  SYNOPSIS
5362  index_next()
5363  buf Read row in MySQL Row Format
5364 
5365  RETURN VALUE
5366  >0 Error code
5367  0 Success
5368 
5369  DESCRIPTION
5370  Used to read forward through the index.
5371 */
5372 
5373 int ha_partition::index_next(uchar * buf)
5374 {
5375  DBUG_ENTER("ha_partition::index_next");
5376 
5377  /*
5378  TODO(low priority):
5379  If we want partition to work with the HANDLER commands, we
5380  must be able to do index_last() -> index_prev() -> index_next()
5381  and if direction changes, we must step back those partitions in
5382  the record queue so we don't return a value from the wrong direction.
5383  */
5384  DBUG_ASSERT(m_index_scan_type != partition_index_last);
5385  if (!m_ordered_scan_ongoing)
5386  {
5387  DBUG_RETURN(handle_unordered_next(buf, FALSE));
5388  }
5389  DBUG_RETURN(handle_ordered_next(buf, FALSE));
5390 }
5391 
5392 
5393 /*
5394  Read next record special
5395 
5396  SYNOPSIS
5397  index_next_same()
5398  buf Read row in MySQL Row Format
5399  key Key
5400  keylen Length of key
5401 
5402  RETURN VALUE
5403  >0 Error code
5404  0 Success
5405 
5406  DESCRIPTION
5407  This routine is used to read the next but only if the key is the same
5408  as supplied in the call.
5409 */
5410 
5411 int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
5412 {
5413  DBUG_ENTER("ha_partition::index_next_same");
5414 
5415  DBUG_ASSERT(keylen == m_start_key.length);
5416  DBUG_ASSERT(m_index_scan_type != partition_index_last);
5417  if (!m_ordered_scan_ongoing)
5418  DBUG_RETURN(handle_unordered_next(buf, TRUE));
5419  DBUG_RETURN(handle_ordered_next(buf, TRUE));
5420 }
5421 
5422 
5423 /*
5424  Read next record when performing index scan backwards
5425 
5426  SYNOPSIS
5427  index_prev()
5428  buf Read row in MySQL Row Format
5429 
5430  RETURN VALUE
5431  >0 Error code
5432  0 Success
5433 
5434  DESCRIPTION
5435  Used to read backwards through the index.
5436 */
5437 
5438 int ha_partition::index_prev(uchar * buf)
5439 {
5440  DBUG_ENTER("ha_partition::index_prev");
5441 
5442  /* TODO: read comment in index_next */
5443  DBUG_ASSERT(m_index_scan_type != partition_index_first);
5444  DBUG_RETURN(handle_ordered_prev(buf));
5445 }
5446 
5447 
5448 /*
5449  Start a read of one range with start and end key
5450 
5451  SYNOPSIS
5452  read_range_first()
5453  start_key Specification of start key
5454  end_key Specification of end key
5455  eq_range_arg Is it equal range
5456  sorted Should records be returned in sorted order
5457 
5458  RETURN VALUE
5459  >0 Error code
5460  0 Success
5461 
5462  DESCRIPTION
5463  We reimplement read_range_first since we don't want the compare_key
5464  check at the end. This is already performed in the partition handler.
5465  read_range_next is very much different due to that we need to scan
5466  all underlying handlers.
5467 */
5468 
5469 int ha_partition::read_range_first(const key_range *start_key,
5470  const key_range *end_key,
5471  bool eq_range_arg, bool sorted)
5472 {
5473  int error;
5474  DBUG_ENTER("ha_partition::read_range_first");
5475 
5476  m_ordered= sorted;
5477  eq_range= eq_range_arg;
5478  set_end_range(end_key, RANGE_SCAN_ASC);
5479 
5480  range_key_part= m_curr_key_info[0]->key_part;
5481  if (start_key)
5482  m_start_key= *start_key;
5483  else
5484  m_start_key.key= NULL;
5485 
5486  m_index_scan_type= partition_read_range;
5487  error= common_index_read(m_rec0, test(start_key));
5488  DBUG_RETURN(error);
5489 }
5490 
5491 
5492 /*
5493  Read next record in read of a range with start and end key
5494 
5495  SYNOPSIS
5496  read_range_next()
5497 
5498  RETURN VALUE
5499  >0 Error code
5500  0 Success
5501 */
5502 
5504 {
5505  DBUG_ENTER("ha_partition::read_range_next");
5506 
5507  if (m_ordered_scan_ongoing)
5508  {
5509  DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
5510  }
5511  DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
5512 }
5513 
5514 
5515 /*
5516  Common routine to set up index scans
5517 
5518  SYNOPSIS
5519  ha_partition::partition_scan_set_up()
5520  buf Buffer to later return record in (this function
5521  needs it to calculcate partitioning function
5522  values)
5523 
5524  idx_read_flag TRUE <=> m_start_key has range start endpoint which
5525  probably can be used to determine the set of partitions
5526  to scan.
5527  FALSE <=> there is no start endpoint.
5528 
5529  DESCRIPTION
5530  Find out which partitions we'll need to read when scanning the specified
5531  range.
5532 
5533  If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
5534  as we will not need to do merge ordering.
5535 
5536  RETURN VALUE
5537  >0 Error code
5538  0 Success
5539 */
5540 
5541 int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
5542 {
5543  DBUG_ENTER("ha_partition::partition_scan_set_up");
5544 
5545  if (idx_read_flag)
5546  get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec);
5547  else
5548  {
5549  m_part_spec.start_part= 0;
5550  m_part_spec.end_part= m_tot_parts - 1;
5551  }
5552  if (m_part_spec.start_part > m_part_spec.end_part)
5553  {
5554  /*
5555  We discovered a partition set but the set was empty so we report
5556  key not found.
5557  */
5558  DBUG_PRINT("info", ("scan with no partition to scan"));
5559  table->status= STATUS_NOT_FOUND;
5560  DBUG_RETURN(HA_ERR_END_OF_FILE);
5561  }
5562  if (m_part_spec.start_part == m_part_spec.end_part)
5563  {
5564  /*
5565  We discovered a single partition to scan, this never needs to be
5566  performed using the ordered index scan.
5567  */
5568  DBUG_PRINT("info", ("index scan using the single partition %d",
5569  m_part_spec.start_part));
5570  m_ordered_scan_ongoing= FALSE;
5571  }
5572  else
5573  {
5574  /*
5575  Set m_ordered_scan_ongoing according how the scan should be done
5576  Only exact partitions are discovered atm by get_partition_set.
5577  Verify this, also bitmap must have at least one bit set otherwise
5578  the result from this table is the empty set.
5579  */
5580  uint start_part= bitmap_get_first_set(&(m_part_info->read_partitions));
5581  if (start_part == MY_BIT_NONE)
5582  {
5583  DBUG_PRINT("info", ("scan with no partition to scan"));
5584  table->status= STATUS_NOT_FOUND;
5585  DBUG_RETURN(HA_ERR_END_OF_FILE);
5586  }
5587  if (start_part > m_part_spec.start_part)
5588  m_part_spec.start_part= start_part;
5589  DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
5590  m_ordered_scan_ongoing= m_ordered;
5591  }
5592  DBUG_ASSERT(m_part_spec.start_part < m_tot_parts &&
5593  m_part_spec.end_part < m_tot_parts);
5594  DBUG_RETURN(0);
5595 }
5596 
5597 
5598 /****************************************************************************
5599  Unordered Index Scan Routines
5600 ****************************************************************************/
5601 /*
5602  Common routine to handle index_next with unordered results
5603 
5604  SYNOPSIS
5605  handle_unordered_next()
5606  out:buf Read row in MySQL Row Format
5607  next_same Called from index_next_same
5608 
5609  RETURN VALUE
5610  HA_ERR_END_OF_FILE End of scan
5611  0 Success
5612  other Error code
5613 
5614  DESCRIPTION
5615  These routines are used to scan partitions without considering order.
5616  This is performed in two situations.
5617  1) In read_multi_range this is the normal case
5618  2) When performing any type of index_read, index_first, index_last where
5619  all fields in the partition function is bound. In this case the index
5620  scan is performed on only one partition and thus it isn't necessary to
5621  perform any sort.
5622 */
5623 
5624 int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
5625 {
5626  handler *file;
5627  int error;
5628  DBUG_ENTER("ha_partition::handle_unordered_next");
5629 
5630  if (m_part_spec.start_part >= m_tot_parts)
5631  {
5632  /* Should never happen! */
5633  DBUG_ASSERT(0);
5634  DBUG_RETURN(HA_ERR_END_OF_FILE);
5635  }
5636  file= m_file[m_part_spec.start_part];
5637 
5638  /*
5639  We should consider if this should be split into three functions as
5640  partition_read_range is_next_same are always local constants
5641  */
5642 
5643  if (m_index_scan_type == partition_read_range)
5644  {
5645  if (!(error= file->read_range_next()))
5646  {
5647  m_last_part= m_part_spec.start_part;
5648  DBUG_RETURN(0);
5649  }
5650  }
5651  else if (is_next_same)
5652  {
5653  if (!(error= file->ha_index_next_same(buf, m_start_key.key,
5654  m_start_key.length)))
5655  {
5656  m_last_part= m_part_spec.start_part;
5657  DBUG_RETURN(0);
5658  }
5659  }
5660  else
5661  {
5662  if (!(error= file->ha_index_next(buf)))
5663  {
5664  m_last_part= m_part_spec.start_part;
5665  DBUG_RETURN(0); // Row was in range
5666  }
5667  }
5668 
5669  if (error == HA_ERR_END_OF_FILE)
5670  {
5671  m_part_spec.start_part++; // Start using next part
5672  error= handle_unordered_scan_next_partition(buf);
5673  }
5674  DBUG_RETURN(error);
5675 }
5676 
5677 
5678 /*
5679  Handle index_next when changing to new partition
5680 
5681  SYNOPSIS
5682  handle_unordered_scan_next_partition()
5683  buf Read row in MySQL Row Format
5684 
5685  RETURN VALUE
5686  HA_ERR_END_OF_FILE End of scan
5687  0 Success
5688  other Error code
5689 
5690  DESCRIPTION
5691  This routine is used to start the index scan on the next partition.
5692  Both initial start and after completing scan on one partition.
5693 */
5694 
5695 int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
5696 {
5697  uint i= m_part_spec.start_part;
5698  int saved_error= HA_ERR_END_OF_FILE;
5699  DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
5700 
5701  if (i)
5702  i= bitmap_get_next_set(&m_part_info->read_partitions, i - 1);
5703  else
5704  i= bitmap_get_first_set(&m_part_info->read_partitions);
5705 
5706  for (;
5707  i <= m_part_spec.end_part;
5708  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5709  {
5710  int error;
5711  handler *file= m_file[i];
5712  m_part_spec.start_part= i;
5713  switch (m_index_scan_type) {
5714  case partition_read_range:
5715  DBUG_PRINT("info", ("read_range_first on partition %d", i));
5716  error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
5717  end_range, eq_range, FALSE);
5718  break;
5719  case partition_index_read:
5720  DBUG_PRINT("info", ("index_read on partition %d", i));
5721  error= file->ha_index_read_map(buf, m_start_key.key,
5722  m_start_key.keypart_map,
5723  m_start_key.flag);
5724  break;
5725  case partition_index_first:
5726  DBUG_PRINT("info", ("index_first on partition %d", i));
5727  error= file->ha_index_first(buf);
5728  break;
5729  case partition_index_first_unordered:
5730  /*
5731  We perform a scan without sorting and this means that we
5732  should not use the index_first since not all handlers
5733  support it and it is also unnecessary to restrict sort
5734  order.
5735  */
5736  DBUG_PRINT("info", ("read_range_first on partition %d", i));
5737  table->record[0]= buf;
5738  error= file->read_range_first(0, end_range, eq_range, 0);
5739  table->record[0]= m_rec0;
5740  break;
5741  default:
5742  DBUG_ASSERT(FALSE);
5743  DBUG_RETURN(1);
5744  }
5745  if (!error)
5746  {
5747  m_last_part= i;
5748  DBUG_RETURN(0);
5749  }
5750  if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
5751  DBUG_RETURN(error);
5752 
5753  /*
5754  If HA_ERR_KEY_NOT_FOUND, we must return that error instead of
5755  HA_ERR_END_OF_FILE, to be able to continue search.
5756  */
5757  if (saved_error != HA_ERR_KEY_NOT_FOUND)
5758  saved_error= error;
5759  DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %d", i));
5760  }
5761  if (saved_error == HA_ERR_END_OF_FILE)
5762  m_part_spec.start_part= NO_CURRENT_PART_ID;
5763  DBUG_RETURN(saved_error);
5764 }
5765 
5766 
5794 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
5795 {
5796  uint i;
5797  uint j= 0;
5798  bool found= FALSE;
5799  uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
5800  int saved_error= HA_ERR_END_OF_FILE;
5801  DBUG_ENTER("ha_partition::handle_ordered_index_scan");
5802 
5803  if (m_key_not_found)
5804  {
5805  m_key_not_found= false;
5806  bitmap_clear_all(&m_key_not_found_partitions);
5807  }
5808  m_top_entry= NO_CURRENT_PART_ID;
5809  queue_remove_all(&m_queue);
5810  DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions,
5811  m_part_spec.start_part));
5812 
5813  /*
5814  Position part_rec_buf_ptr to point to the first used partition >=
5815  start_part. There may be partitions marked by used_partitions,
5816  but is before start_part. These partitions has allocated record buffers
5817  but is dynamically pruned, so those buffers must be skipped.
5818  */
5819  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5820  i < m_part_spec.start_part;
5821  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5822  {
5823  part_rec_buf_ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
5824  }
5825  DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
5826  m_part_spec.start_part, i));
5827  for (/* continue from above */ ;
5828  i <= m_part_spec.end_part;
5829  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5830  {
5831  DBUG_PRINT("info", ("reading from part %u (scan_type: %u)",
5832  i, m_index_scan_type));
5833  DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
5834  uchar *rec_buf_ptr= part_rec_buf_ptr + PARTITION_BYTES_IN_POS;
5835  int error;
5836  handler *file= m_file[i];
5837 
5838  switch (m_index_scan_type) {
5839  case partition_index_read:
5840  error= file->ha_index_read_map(rec_buf_ptr,
5841  m_start_key.key,
5842  m_start_key.keypart_map,
5843  m_start_key.flag);
5844  break;
5845  case partition_index_first:
5846  error= file->ha_index_first(rec_buf_ptr);
5847  reverse_order= FALSE;
5848  break;
5849  case partition_index_last:
5850  error= file->ha_index_last(rec_buf_ptr);
5851  reverse_order= TRUE;
5852  break;
5853  case partition_index_read_last:
5854  error= file->ha_index_read_last_map(rec_buf_ptr,
5855  m_start_key.key,
5856  m_start_key.keypart_map);
5857  reverse_order= TRUE;
5858  break;
5859  case partition_read_range:
5860  {
5861  /*
5862  This can only read record to table->record[0], as it was set when
5863  the table was being opened. We have to memcpy data ourselves.
5864  */
5865  error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
5866  end_range, eq_range, TRUE);
5867  memcpy(rec_buf_ptr, table->record[0], m_rec_length);
5868  reverse_order= FALSE;
5869  break;
5870  }
5871  default:
5872  DBUG_ASSERT(FALSE);
5873  DBUG_RETURN(HA_ERR_END_OF_FILE);
5874  }
5875  if (!error)
5876  {
5877  found= TRUE;
5878  /*
5879  Initialize queue without order first, simply insert
5880  */
5881  queue_element(&m_queue, j++)= part_rec_buf_ptr;
5882  }
5883  else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
5884  {
5885  DBUG_RETURN(error);
5886  }
5887  else if (error == HA_ERR_KEY_NOT_FOUND)
5888  {
5889  DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i));
5890  bitmap_set_bit(&m_key_not_found_partitions, i);
5891  m_key_not_found= true;
5892  saved_error= error;
5893  }
5894  part_rec_buf_ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
5895  }
5896  if (found)
5897  {
5898  /*
5899  We found at least one partition with data, now sort all entries and
5900  after that read the first entry and copy it to the buffer to return in.
5901  */
5902  queue_set_max_at_top(&m_queue, reverse_order);
5903  queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info);
5904  m_queue.elements= j;
5905  queue_fix(&m_queue);
5906  return_top_record(buf);
5907  table->status= 0;
5908  DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
5909  DBUG_RETURN(0);
5910  }
5911  DBUG_RETURN(saved_error);
5912 }
5913 
5914 
5915 /*
5916  Return the top record in sort order
5917 
5918  SYNOPSIS
5919  return_top_record()
5920  out:buf Row returned in MySQL Row Format
5921 
5922  RETURN VALUE
5923  NONE
5924 */
5925 
5926 void ha_partition::return_top_record(uchar *buf)
5927 {
5928  uint part_id;
5929  uchar *key_buffer= queue_top(&m_queue);
5930  uchar *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS;
5931 
5932  part_id= uint2korr(key_buffer);
5933  memcpy(buf, rec_buffer, m_rec_length);
5934  m_last_part= part_id;
5935  m_top_entry= part_id;
5936 }
5937 
5938 
5947 int ha_partition::handle_ordered_index_scan_key_not_found()
5948 {
5949  int error;
5950  uint i;
5951  uchar *part_buf= m_ordered_rec_buffer;
5952  uchar *curr_rec_buf= NULL;
5953  DBUG_ENTER("ha_partition::handle_ordered_index_scan_key_not_found");
5954  DBUG_ASSERT(m_key_not_found);
5955  /*
5956  Loop over all used partitions to get the correct offset
5957  into m_ordered_rec_buffer.
5958  */
5959  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5960  i < m_tot_parts;
5961  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5962  {
5963  if (bitmap_is_set(&m_key_not_found_partitions, i))
5964  {
5965  /*
5966  This partition is used and did return HA_ERR_KEY_NOT_FOUND
5967  in index_read_map.
5968  */
5969  curr_rec_buf= part_buf + PARTITION_BYTES_IN_POS;
5970  error= m_file[i]->ha_index_next(curr_rec_buf);
5971  /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
5972  DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
5973  if (!error)
5974  queue_insert(&m_queue, part_buf);
5975  else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
5976  DBUG_RETURN(error);
5977  }
5978  part_buf+= m_rec_length + PARTITION_BYTES_IN_POS;
5979  }
5980  DBUG_ASSERT(curr_rec_buf);
5981  bitmap_clear_all(&m_key_not_found_partitions);
5982  m_key_not_found= false;
5983 
5984  /* Update m_top_entry, which may have changed. */
5985  uchar *key_buffer= queue_top(&m_queue);
5986  m_top_entry= uint2korr(key_buffer);
5987  DBUG_RETURN(0);
5988 }
5989 
5990 
5991 /*
5992  Common routine to handle index_next with ordered results
5993 
5994  SYNOPSIS
5995  handle_ordered_next()
5996  out:buf Read row in MySQL Row Format
5997  next_same Called from index_next_same
5998 
5999  RETURN VALUE
6000  HA_ERR_END_OF_FILE End of scan
6001  0 Success
6002  other Error code
6003 */
6004 
6005 int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
6006 {
6007  int error;
6008  uint part_id= m_top_entry;
6009  uchar *rec_buf= queue_top(&m_queue) + PARTITION_BYTES_IN_POS;
6010  handler *file;
6011  DBUG_ENTER("ha_partition::handle_ordered_next");
6012 
6013  if (m_key_not_found)
6014  {
6015  if (is_next_same)
6016  {
6017  /* Only rows which match the key. */
6018  m_key_not_found= false;
6019  bitmap_clear_all(&m_key_not_found_partitions);
6020  }
6021  else
6022  {
6023  /* There are partitions not included in the index record queue. */
6024  uint old_elements= m_queue.elements;
6025  if ((error= handle_ordered_index_scan_key_not_found()))
6026  DBUG_RETURN(error);
6027  /*
6028  If the queue top changed, i.e. one of the partitions that gave
6029  HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
6030  return it.
6031  Otherwise replace the old with a call to index_next (fall through).
6032  */
6033  if (old_elements != m_queue.elements && part_id != m_top_entry)
6034  {
6035  return_top_record(buf);
6036  DBUG_RETURN(0);
6037  }
6038  }
6039  }
6040  if (part_id >= m_tot_parts)
6041  DBUG_RETURN(HA_ERR_END_OF_FILE);
6042 
6043  file= m_file[part_id];
6044 
6045  if (m_index_scan_type == partition_read_range)
6046  {
6047  error= file->read_range_next();
6048  memcpy(rec_buf, table->record[0], m_rec_length);
6049  }
6050  else if (!is_next_same)
6051  error= file->ha_index_next(rec_buf);
6052  else
6053  error= file->ha_index_next_same(rec_buf, m_start_key.key,
6054  m_start_key.length);
6055  if (error)
6056  {
6057  if (error == HA_ERR_END_OF_FILE)
6058  {
6059  /* Return next buffered row */
6060  queue_remove(&m_queue, (uint) 0);
6061  if (m_queue.elements)
6062  {
6063  DBUG_PRINT("info", ("Record returned from partition %u (2)",
6064  m_top_entry));
6065  return_top_record(buf);
6066  table->status= 0;
6067  error= 0;
6068  }
6069  }
6070  DBUG_RETURN(error);
6071  }
6072  queue_replaced(&m_queue);
6073  return_top_record(buf);
6074  DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
6075  DBUG_RETURN(0);
6076 }
6077 
6078 
6079 /*
6080  Common routine to handle index_prev with ordered results
6081 
6082  SYNOPSIS
6083  handle_ordered_prev()
6084  out:buf Read row in MySQL Row Format
6085 
6086  RETURN VALUE
6087  HA_ERR_END_OF_FILE End of scan
6088  0 Success
6089  other Error code
6090 */
6091 
6092 int ha_partition::handle_ordered_prev(uchar *buf)
6093 {
6094  int error;
6095  uint part_id= m_top_entry;
6096  uchar *rec_buf= queue_top(&m_queue) + PARTITION_BYTES_IN_POS;
6097  handler *file= m_file[part_id];
6098  DBUG_ENTER("ha_partition::handle_ordered_prev");
6099 
6100  if ((error= file->ha_index_prev(rec_buf)))
6101  {
6102  if (error == HA_ERR_END_OF_FILE)
6103  {
6104  queue_remove(&m_queue, (uint) 0);
6105  if (m_queue.elements)
6106  {
6107  return_top_record(buf);
6108  DBUG_PRINT("info", ("Record returned from partition %d (2)",
6109  m_top_entry));
6110  error= 0;
6111  table->status= 0;
6112  }
6113  }
6114  DBUG_RETURN(error);
6115  }
6116  queue_replaced(&m_queue);
6117  return_top_record(buf);
6118  DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
6119  DBUG_RETURN(0);
6120 }
6121 
6122 
6123 /****************************************************************************
6124  MODULE information calls
6125 ****************************************************************************/
6126 
6127 /*
6128  These are all first approximations of the extra, info, scan_time
6129  and read_time calls
6130 */
6131 
6136 int ha_partition::compare_number_of_records(ha_partition *me,
6137  const uint32 *a,
6138  const uint32 *b)
6139 {
6140  handler **file= me->m_file;
6141  /* Note: sorting in descending order! */
6142  if (file[*a]->stats.records > file[*b]->stats.records)
6143  return -1;
6144  if (file[*a]->stats.records < file[*b]->stats.records)
6145  return 1;
6146  return 0;
6147 }
6148 
6149 
6150 /*
6151  General method to gather info from handler
6152 
6153  SYNOPSIS
6154  info()
6155  flag Specifies what info is requested
6156 
6157  RETURN VALUE
6158  NONE
6159 
6160  DESCRIPTION
6161  ::info() is used to return information to the optimizer.
6162  Currently this table handler doesn't implement most of the fields
6163  really needed. SHOW also makes use of this data
6164  Another note, if your handler doesn't proved exact record count,
6165  you will probably want to have the following in your code:
6166  if (records < 2)
6167  records = 2;
6168  The reason is that the server will optimize for cases of only a single
6169  record. If in a table scan you don't know the number of records
6170  it will probably be better to set records to two so you can return
6171  as many records as you need.
6172 
6173  Along with records a few more variables you may wish to set are:
6174  records
6175  deleted
6176  data_file_length
6177  index_file_length
6178  delete_length
6179  check_time
6180  Take a look at the public variables in handler.h for more information.
6181 
6182  Called in:
6183  filesort.cc
6184  ha_heap.cc
6185  item_sum.cc
6186  opt_sum.cc
6187  sql_delete.cc
6188  sql_delete.cc
6189  sql_derived.cc
6190  sql_select.cc
6191  sql_select.cc
6192  sql_select.cc
6193  sql_select.cc
6194  sql_select.cc
6195  sql_show.cc
6196  sql_show.cc
6197  sql_show.cc
6198  sql_show.cc
6199  sql_table.cc
6200  sql_union.cc
6201  sql_update.cc
6202 
6203  Some flags that are not implemented
6204  HA_STATUS_POS:
6205  This parameter is never used from the MySQL Server. It is checked in a
6206  place in MyISAM so could potentially be used by MyISAM specific
6207  programs.
6208  HA_STATUS_NO_LOCK:
6209  This is declared and often used. It's only used by MyISAM.
6210  It means that MySQL doesn't need the absolute latest statistics
6211  information. This may save the handler from doing internal locks while
6212  retrieving statistics data.
6213 */
6214 
6215 int ha_partition::info(uint flag)
6216 {
6217  uint no_lock_flag= flag & HA_STATUS_NO_LOCK;
6218  uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA;
6219  DBUG_ENTER("ha_partition::info");
6220 
6221 #ifndef DBUG_OFF
6222  if (bitmap_is_set_all(&(m_part_info->read_partitions)))
6223  DBUG_PRINT("info", ("All partitions are used"));
6224 #endif /* DBUG_OFF */
6225  if (flag & HA_STATUS_AUTO)
6226  {
6227  bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0);
6228  DBUG_PRINT("info", ("HA_STATUS_AUTO"));
6229  if (!table->found_next_number_field)
6230  stats.auto_increment_value= 0;
6231  else if (part_share->auto_inc_initialized)
6232  {
6233  lock_auto_increment();
6234  stats.auto_increment_value= part_share->next_auto_inc_val;
6235  unlock_auto_increment();
6236  }
6237  else
6238  {
6239  lock_auto_increment();
6240  /* to avoid two concurrent initializations, check again when locked */
6241  if (part_share->auto_inc_initialized)
6242  stats.auto_increment_value= part_share->next_auto_inc_val;
6243  else
6244  {
6245  /*
6246  The auto-inc mutex in the table_share is locked, so we do not need
6247  to have the handlers locked.
6248  HA_STATUS_NO_LOCK is not checked, since we cannot skip locking
6249  the mutex, because it is initialized.
6250  */
6251  handler *file, **file_array;
6252  ulonglong auto_increment_value= 0;
6253  file_array= m_file;
6254  DBUG_PRINT("info",
6255  ("checking all partitions for auto_increment_value"));
6256  do
6257  {
6258  file= *file_array;
6259  file->info(HA_STATUS_AUTO | no_lock_flag);
6260  set_if_bigger(auto_increment_value,
6261  file->stats.auto_increment_value);
6262  } while (*(++file_array));
6263 
6264  DBUG_ASSERT(auto_increment_value);
6265  stats.auto_increment_value= auto_increment_value;
6266  if (auto_inc_is_first_in_idx)
6267  {
6268  set_if_bigger(part_share->next_auto_inc_val,
6269  auto_increment_value);
6270  part_share->auto_inc_initialized= true;
6271  DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu",
6272  (ulong) part_share->next_auto_inc_val));
6273  }
6274  }
6275  unlock_auto_increment();
6276  }
6277  }
6278  if (flag & HA_STATUS_VARIABLE)
6279  {
6280  uint i;
6281  DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
6282  /*
6283  Calculates statistical variables
6284  records: Estimate of number records in table
6285  We report sum (always at least 2 if not empty)
6286  deleted: Estimate of number holes in the table due to
6287  deletes
6288  We report sum
6289  data_file_length: Length of data file, in principle bytes in table
6290  We report sum
6291  index_file_length: Length of index file, in principle bytes in
6292  indexes in the table
6293  We report sum
6294  delete_length: Length of free space easily used by new records in table
6295  We report sum
6296  mean_record_length:Mean record length in the table
6297  We calculate this
6298  check_time: Time of last check (only applicable to MyISAM)
6299  We report last time of all underlying handlers
6300  */
6301  handler *file;
6302  stats.records= 0;
6303  stats.deleted= 0;
6304  stats.data_file_length= 0;
6305  stats.index_file_length= 0;
6306  stats.check_time= 0;
6307  stats.delete_length= 0;
6308  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
6309  i < m_tot_parts;
6310  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
6311  {
6312  file= m_file[i];
6313  file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
6314  stats.records+= file->stats.records;
6315  stats.deleted+= file->stats.deleted;
6316  stats.data_file_length+= file->stats.data_file_length;
6317  stats.index_file_length+= file->stats.index_file_length;
6318  stats.delete_length+= file->stats.delete_length;
6319  if (file->stats.check_time > stats.check_time)
6320  stats.check_time= file->stats.check_time;
6321  }
6322  if (stats.records && stats.records < 2 &&
6323  !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
6324  stats.records= 2;
6325  if (stats.records > 0)
6326  stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records);
6327  else
6328  stats.mean_rec_length= 0;
6329  }
6330  if (flag & HA_STATUS_CONST)
6331  {
6332  DBUG_PRINT("info", ("HA_STATUS_CONST"));
6333  /*
6334  Recalculate loads of constant variables. MyISAM also sets things
6335  directly on the table share object.
6336 
6337  Check whether this should be fixed since handlers should not
6338  change things directly on the table object.
6339 
6340  Monty comment: This should NOT be changed! It's the handlers
6341  responsibility to correct table->s->keys_xxxx information if keys
6342  have been disabled.
6343 
6344  The most important parameters set here is records per key on
6345  all indexes. block_size and primar key ref_length.
6346 
6347  For each index there is an array of rec_per_key.
6348  As an example if we have an index with three attributes a,b and c
6349  we will have an array of 3 rec_per_key.
6350  rec_per_key[0] is an estimate of number of records divided by
6351  number of unique values of the field a.
6352  rec_per_key[1] is an estimate of the number of records divided
6353  by the number of unique combinations of the fields a and b.
6354  rec_per_key[2] is an estimate of the number of records divided
6355  by the number of unique combinations of the fields a,b and c.
6356 
6357  Many handlers only set the value of rec_per_key when all fields
6358  are bound (rec_per_key[2] in the example above).
6359 
6360  If the handler doesn't support statistics, it should set all of the
6361  above to 0.
6362 
6363  We first scans through all partitions to get the one holding most rows.
6364  We will then allow the handler with the most rows to set
6365  the rec_per_key and use this as an estimate on the total table.
6366 
6367  max_data_file_length: Maximum data file length
6368  We ignore it, is only used in
6369  SHOW TABLE STATUS
6370  max_index_file_length: Maximum index file length
6371  We ignore it since it is never used
6372  block_size: Block size used
6373  We set it to the value of the first handler
6374  ref_length: We set this to the value calculated
6375  and stored in local object
6376  create_time: Creation time of table
6377 
6378  So we calculate these constants by using the variables from the
6379  handler with most rows.
6380  */
6381  handler *file, **file_array;
6382  ulonglong max_records= 0;
6383  uint32 i= 0;
6384  uint32 handler_instance= 0;
6385 
6386  file_array= m_file;
6387  do
6388  {
6389  file= *file_array;
6390  /* Get variables if not already done */
6391  if (!(flag & HA_STATUS_VARIABLE) ||
6392  !bitmap_is_set(&(m_part_info->read_partitions),
6393  (file_array - m_file)))
6394  file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
6395  if (file->stats.records > max_records)
6396  {
6397  max_records= file->stats.records;
6398  handler_instance= i;
6399  }
6400  i++;
6401  } while (*(++file_array));
6402  /*
6403  Sort the array of part_ids by number of records in
6404  in descending order.
6405  */
6406  my_qsort2((void*) m_part_ids_sorted_by_num_of_records,
6407  m_tot_parts,
6408  sizeof(uint32),
6409  (qsort2_cmp) compare_number_of_records,
6410  this);
6411 
6412  file= m_file[handler_instance];
6413  file->info(HA_STATUS_CONST | no_lock_flag);
6414  stats.block_size= file->stats.block_size;
6415  stats.create_time= file->stats.create_time;
6416  ref_length= m_ref_length;
6417  }
6418  if (flag & HA_STATUS_ERRKEY)
6419  {
6420  handler *file= m_file[m_last_part];
6421  DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
6422  /*
6423  This flag is used to get index number of the unique index that
6424  reported duplicate key
6425  We will report the errkey on the last handler used and ignore the rest
6426  Note: all engines does not support HA_STATUS_ERRKEY, so set errkey.
6427  */
6428  file->errkey= errkey;
6429  file->info(HA_STATUS_ERRKEY | no_lock_flag);
6430  errkey= file->errkey;
6431  }
6432  if (flag & HA_STATUS_TIME)
6433  {
6434  handler *file, **file_array;
6435  DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
6436  /*
6437  This flag is used to set the latest update time of the table.
6438  Used by SHOW commands
6439  We will report the maximum of these times
6440  */
6441  stats.update_time= 0;
6442  file_array= m_file;
6443  do
6444  {
6445  file= *file_array;
6446  file->info(HA_STATUS_TIME | no_lock_flag);
6447  if (file->stats.update_time > stats.update_time)
6448  stats.update_time= file->stats.update_time;
6449  } while (*(++file_array));
6450  }
6451  DBUG_RETURN(0);
6452 }
6453 
6454 
6455 void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
6456  uint part_id)
6457 {
6458  handler *file= m_file[part_id];
6459  DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
6460  file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE |
6461  HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK);
6462 
6463  stat_info->records= file->stats.records;
6464  stat_info->mean_rec_length= file->stats.mean_rec_length;
6465  stat_info->data_file_length= file->stats.data_file_length;
6466  stat_info->max_data_file_length= file->stats.max_data_file_length;
6467  stat_info->index_file_length= file->stats.index_file_length;
6468  stat_info->delete_length= file->stats.delete_length;
6469  stat_info->create_time= file->stats.create_time;
6470  stat_info->update_time= file->stats.update_time;
6471  stat_info->check_time= file->stats.check_time;
6472  stat_info->check_sum= 0;
6473  if (file->ha_table_flags() & HA_HAS_CHECKSUM)
6474  stat_info->check_sum= file->checksum();
6475  return;
6476 }
6477 
6478 
6791 int ha_partition::extra(enum ha_extra_function operation)
6792 {
6793  DBUG_ENTER("ha_partition:extra");
6794  DBUG_PRINT("info", ("operation: %d", (int) operation));
6795 
6796  switch (operation) {
6797  /* Category 1), used by most handlers */
6798  case HA_EXTRA_KEYREAD:
6799  case HA_EXTRA_NO_KEYREAD:
6800  case HA_EXTRA_FLUSH:
6801  DBUG_RETURN(loop_extra(operation));
6802  case HA_EXTRA_PREPARE_FOR_RENAME:
6803  case HA_EXTRA_FORCE_REOPEN:
6804  DBUG_RETURN(loop_extra_alter(operation));
6805  break;
6806 
6807  /* Category 2), used by non-MyISAM handlers */
6808  case HA_EXTRA_IGNORE_DUP_KEY:
6809  case HA_EXTRA_NO_IGNORE_DUP_KEY:
6810  case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
6811  {
6812  if (!m_myisam)
6813  DBUG_RETURN(loop_extra(operation));
6814  break;
6815  }
6816 
6817  /* Category 3), used by MyISAM handlers */
6818  case HA_EXTRA_PREPARE_FOR_UPDATE:
6819  /*
6820  Needs to be run on the first partition in the range now, and
6821  later in late_extra_cache, when switching to a new partition to scan.
6822  */
6823  m_extra_prepare_for_update= TRUE;
6824  if (m_part_spec.start_part != NO_CURRENT_PART_ID)
6825  {
6826  if (!m_extra_cache)
6827  m_extra_cache_part_id= m_part_spec.start_part;
6828  DBUG_ASSERT(m_extra_cache_part_id == m_part_spec.start_part);
6829  (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
6830  }
6831  break;
6832  case HA_EXTRA_NORMAL:
6833  case HA_EXTRA_QUICK:
6834  case HA_EXTRA_PREPARE_FOR_DROP:
6835  case HA_EXTRA_FLUSH_CACHE:
6836  {
6837  if (m_myisam)
6838  DBUG_RETURN(loop_extra(operation));
6839  break;
6840  }
6841  case HA_EXTRA_NO_READCHECK:
6842  {
6843  /*
6844  This is only done as a part of ha_open, which is also used in
6845  ha_partition::open, so no need to do anything.
6846  */
6847  break;
6848  }
6849  case HA_EXTRA_CACHE:
6850  {
6851  prepare_extra_cache(0);
6852  break;
6853  }
6854  case HA_EXTRA_NO_CACHE:
6855  {
6856  int ret= 0;
6857  if (m_extra_cache_part_id != NO_CURRENT_PART_ID)
6858  ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE);
6859  m_extra_cache= FALSE;
6860  m_extra_cache_size= 0;
6861  m_extra_prepare_for_update= FALSE;
6862  m_extra_cache_part_id= NO_CURRENT_PART_ID;
6863  DBUG_RETURN(ret);
6864  }
6865  case HA_EXTRA_WRITE_CACHE:
6866  {
6867  m_extra_cache= FALSE;
6868  m_extra_cache_size= 0;
6869  m_extra_prepare_for_update= FALSE;
6870  m_extra_cache_part_id= NO_CURRENT_PART_ID;
6871  DBUG_RETURN(loop_extra(operation));
6872  }
6873  case HA_EXTRA_IGNORE_NO_KEY:
6874  case HA_EXTRA_NO_IGNORE_NO_KEY:
6875  {
6876  /*
6877  Ignore as these are specific to NDB for handling
6878  idempotency
6879  */
6880  break;
6881  }
6882  case HA_EXTRA_WRITE_CAN_REPLACE:
6883  case HA_EXTRA_WRITE_CANNOT_REPLACE:
6884  {
6885  /*
6886  Informs handler that write_row() can replace rows which conflict
6887  with row being inserted by PK/unique key without reporting error
6888  to the SQL-layer.
6889 
6890  This optimization is not safe for partitioned table in general case
6891  since we may have to put new version of row into partition which is
6892  different from partition in which old version resides (for example
6893  when we partition by non-PK column or by some column which is not
6894  part of unique key which were violated).
6895  And since NDB which is the only engine at the moment that supports
6896  this optimization handles partitioning on its own we simple disable
6897  it here. (BTW for NDB this optimization is safe since it supports
6898  only KEY partitioning and won't use this optimization for tables
6899  which have additional unique constraints).
6900  */
6901  break;
6902  }
6903  /* Category 7), used by federated handlers */
6904  case HA_EXTRA_INSERT_WITH_UPDATE:
6905  DBUG_RETURN(loop_extra(operation));
6906  /* Category 8) Operations only used by NDB */
6907  case HA_EXTRA_DELETE_CANNOT_BATCH:
6908  case HA_EXTRA_UPDATE_CANNOT_BATCH:
6909  {
6910  /* Currently only NDB use the *_CANNOT_BATCH */
6911  break;
6912  }
6913  /* Category 9) Operations only used by MERGE */
6914  case HA_EXTRA_ADD_CHILDREN_LIST:
6915  case HA_EXTRA_ATTACH_CHILDREN:
6916  case HA_EXTRA_IS_ATTACHED_CHILDREN:
6917  case HA_EXTRA_DETACH_CHILDREN:
6918  {
6919  /* Special actions for MERGE tables. Ignore. */
6920  break;
6921  }
6922  /*
6923  http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html
6924  says we no longer support logging to partitioned tables, so we fail
6925  here.
6926  */
6927  case HA_EXTRA_MARK_AS_LOG_TABLE:
6928  DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE);
6929  default:
6930  {
6931  /* Temporary crash to discover what is wrong */
6932  DBUG_ASSERT(0);
6933  break;
6934  }
6935  }
6936  DBUG_RETURN(0);
6937 }
6938 
6939 
6953 int ha_partition::reset(void)
6954 {
6955  int result= 0;
6956  int tmp;
6957  uint i;
6958  DBUG_ENTER("ha_partition::reset");
6959 
6960  for (i= bitmap_get_first_set(&m_partitions_to_reset);
6961  i < m_tot_parts;
6962  i= bitmap_get_next_set(&m_partitions_to_reset, i))
6963  {
6964  if ((tmp= m_file[i]->ha_reset()))
6965  result= tmp;
6966  }
6967  bitmap_clear_all(&m_partitions_to_reset);
6968  DBUG_RETURN(result);
6969 }
6970 
6971 /*
6972  Special extra method for HA_EXTRA_CACHE with cachesize as extra parameter
6973 
6974  SYNOPSIS
6975  extra_opt()
6976  operation Must be HA_EXTRA_CACHE
6977  cachesize Size of cache in full table scan
6978 
6979  RETURN VALUE
6980  >0 Error code
6981  0 Success
6982 */
6983 
6984 int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize)
6985 {
6986  DBUG_ENTER("ha_partition::extra_opt()");
6987 
6988  DBUG_ASSERT(HA_EXTRA_CACHE == operation);
6989  prepare_extra_cache(cachesize);
6990  DBUG_RETURN(0);
6991 }
6992 
6993 
6994 /*
6995  Call extra on handler with HA_EXTRA_CACHE and cachesize
6996 
6997  SYNOPSIS
6998  prepare_extra_cache()
6999  cachesize Size of cache for full table scan
7000 
7001  RETURN VALUE
7002  NONE
7003 */
7004 
7005 void ha_partition::prepare_extra_cache(uint cachesize)
7006 {
7007  DBUG_ENTER("ha_partition::prepare_extra_cache()");
7008  DBUG_PRINT("info", ("cachesize %u", cachesize));
7009 
7010  m_extra_cache= TRUE;
7011  m_extra_cache_size= cachesize;
7012  if (m_part_spec.start_part != NO_CURRENT_PART_ID)
7013  {
7014  DBUG_ASSERT(bitmap_is_set(&m_partitions_to_reset,
7015  m_part_spec.start_part));
7016  bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part);
7017  late_extra_cache(m_part_spec.start_part);
7018  }
7019  DBUG_VOID_RETURN;
7020 }
7021 
7022 
7033 int ha_partition::loop_extra_alter(enum ha_extra_function operation)
7034 {
7035  int result= 0, tmp;
7036  handler **file;
7037  DBUG_ENTER("ha_partition::loop_extra_alter()");
7038  DBUG_ASSERT(operation == HA_EXTRA_PREPARE_FOR_RENAME ||
7039  operation == HA_EXTRA_FORCE_REOPEN);
7040 
7041  if (m_new_file != NULL)
7042  {
7043  for (file= m_new_file; *file; file++)
7044  if ((tmp= (*file)->extra(operation)))
7045  result= tmp;
7046  }
7047  if (m_reorged_file != NULL)
7048  {
7049  for (file= m_reorged_file; *file; file++)
7050  if ((tmp= (*file)->extra(operation)))
7051  result= tmp;
7052  }
7053  if ((tmp= loop_extra(operation)))
7054  result= tmp;
7055  DBUG_RETURN(result);
7056 }
7057 
7058 /*
7059  Call extra on all partitions
7060 
7061  SYNOPSIS
7062  loop_extra()
7063  operation extra operation type
7064 
7065  RETURN VALUE
7066  >0 Error code
7067  0 Success
7068 */
7069 
7070 int ha_partition::loop_extra(enum ha_extra_function operation)
7071 {
7072  int result= 0, tmp;
7073  uint i;
7074  DBUG_ENTER("ha_partition::loop_extra()");
7075 
7076  for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
7077  i < m_tot_parts;
7078  i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
7079  {
7080  if ((tmp= m_file[i]->extra(operation)))
7081  result= tmp;
7082  }
7083  /* Add all used partitions to be called in reset(). */
7084  bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions);
7085  DBUG_RETURN(result);
7086 }
7087 
7088 
7089 /*
7090  Call extra(HA_EXTRA_CACHE) on next partition_id
7091 
7092  SYNOPSIS
7093  late_extra_cache()
7094  partition_id Partition id to call extra on
7095 
7096  RETURN VALUE
7097  NONE
7098 */
7099 
7100 void ha_partition::late_extra_cache(uint partition_id)
7101 {
7102  handler *file;
7103  DBUG_ENTER("ha_partition::late_extra_cache");
7104  DBUG_PRINT("info", ("extra_cache %u prepare %u partid %u size %u",
7105  m_extra_cache, m_extra_prepare_for_update,
7106  partition_id, m_extra_cache_size));
7107 
7108  if (!m_extra_cache && !m_extra_prepare_for_update)
7109  DBUG_VOID_RETURN;
7110  file= m_file[partition_id];
7111  if (m_extra_cache)
7112  {
7113  if (m_extra_cache_size == 0)
7114  (void) file->extra(HA_EXTRA_CACHE);
7115  else
7116  (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size);
7117  }
7118  if (m_extra_prepare_for_update)
7119  {
7120  (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
7121  }
7122  m_extra_cache_part_id= partition_id;
7123  DBUG_VOID_RETURN;
7124 }
7125 
7126 
7127 /*
7128  Call extra(HA_EXTRA_NO_CACHE) on next partition_id
7129 
7130  SYNOPSIS
7131  late_extra_no_cache()
7132  partition_id Partition id to call extra on
7133 
7134  RETURN VALUE
7135  NONE
7136 */
7137 
7138 void ha_partition::late_extra_no_cache(uint partition_id)
7139 {
7140  handler *file;
7141  DBUG_ENTER("ha_partition::late_extra_no_cache");
7142 
7143  if (!m_extra_cache && !m_extra_prepare_for_update)
7144  DBUG_VOID_RETURN;
7145  file= m_file[partition_id];
7146  (void) file->extra(HA_EXTRA_NO_CACHE);
7147  DBUG_ASSERT(partition_id == m_extra_cache_part_id);
7148  m_extra_cache_part_id= NO_CURRENT_PART_ID;
7149  DBUG_VOID_RETURN;
7150 }
7151 
7152 
7153 /****************************************************************************
7154  MODULE optimiser support
7155 ****************************************************************************/
7156 
7166 const key_map *ha_partition::keys_to_use_for_scanning()
7167 {
7168  DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
7169  DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
7170 }
7171 
7172 
7177 ha_rows ha_partition::min_rows_for_estimate()
7178 {
7179  uint i, max_used_partitions, tot_used_partitions;
7180  DBUG_ENTER("ha_partition::min_rows_for_estimate");
7181 
7182  tot_used_partitions= bitmap_bits_set(&m_part_info->read_partitions);
7183 
7184  /*
7185  All partitions might have been left as unused during partition pruning
7186  due to, for example, an impossible WHERE condition. Nonetheless, the
7187  optimizer might still attempt to perform (e.g. range) analysis where an
7188  estimate of the the number of rows is calculated using records_in_range.
7189  Hence, to handle this and other possible cases, use zero as the minimum
7190  number of rows to base the estimate on if no partition is being used.
7191  */
7192  if (!tot_used_partitions)
7193  DBUG_RETURN(0);
7194 
7195  /*
7196  Allow O(log2(tot_partitions)) increase in number of used partitions.
7197  This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on.
7198  I.e when the total number of partitions doubles, allow one more
7199  partition to be checked.
7200  */
7201  i= 2;
7202  max_used_partitions= 1;
7203  while (i < m_tot_parts)
7204  {
7205  max_used_partitions++;
7206  i= i << 1;
7207  }
7208  if (max_used_partitions > tot_used_partitions)
7209  max_used_partitions= tot_used_partitions;
7210 
7211  /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */
7212  DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu",
7213  max_used_partitions,
7214  (ulong) stats.records));
7215  DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu",
7216  tot_used_partitions,
7217  (ulong) stats.records * max_used_partitions
7218  / tot_used_partitions));
7219  DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions);
7220 }
7221 
7222 
7239 uint ha_partition::get_biggest_used_partition(uint *part_index)
7240 {
7241  uint part_id;
7242  while ((*part_index) < m_tot_parts)
7243  {
7244  part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++];
7245  if (bitmap_is_set(&m_part_info->read_partitions, part_id))
7246  return part_id;
7247  }
7248  return NO_CURRENT_PART_ID;
7249 }
7250 
7251 
7252 /*
7253  Return time for a scan of the table
7254 
7255  SYNOPSIS
7256  scan_time()
7257 
7258  RETURN VALUE
7259  time for scan
7260 */
7261 
7262 double ha_partition::scan_time()
7263 {
7264  double scan_time= 0;
7265  uint i;
7266  DBUG_ENTER("ha_partition::scan_time");
7267 
7268  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7269  i < m_tot_parts;
7270  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7271  scan_time+= m_file[i]->scan_time();
7272  DBUG_RETURN(scan_time);
7273 }
7274 
7275 
7289 ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
7290  key_range *max_key)
7291 {
7292  ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0;
7293  uint partition_index= 0, part_id;
7294  DBUG_ENTER("ha_partition::records_in_range");
7295 
7296  min_rows_to_check= min_rows_for_estimate();
7297 
7298  while ((part_id= get_biggest_used_partition(&partition_index))
7299  != NO_CURRENT_PART_ID)
7300  {
7301  rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
7302 
7303  DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows,
7304  (ulong) m_file[part_id]->stats.records));
7305 
7306  if (rows == HA_POS_ERROR)
7307  DBUG_RETURN(HA_POS_ERROR);
7308  estimated_rows+= rows;
7309  checked_rows+= m_file[part_id]->stats.records;
7310  /*
7311  Returning 0 means no rows can be found, so we must continue
7312  this loop as long as we have estimated_rows == 0.
7313  Also many engines return 1 to indicate that there may exist
7314  a matching row, we do not normalize this by dividing by number of
7315  used partitions, but leave it to be returned as a sum, which will
7316  reflect that we will need to scan each partition's index.
7317 
7318  Note that this statistics may not always be correct, so we must
7319  continue even if the current partition has 0 rows, since we might have
7320  deleted rows from the current partition, or inserted to the next
7321  partition.
7322  */
7323  if (estimated_rows && checked_rows &&
7324  checked_rows >= min_rows_to_check)
7325  {
7326  DBUG_PRINT("info",
7327  ("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
7328  inx,
7329  (ulong) (estimated_rows * stats.records / checked_rows),
7330  (ulong) estimated_rows,
7331  (ulong) stats.records,
7332  (ulong) checked_rows));
7333  DBUG_RETURN(estimated_rows * stats.records / checked_rows);
7334  }
7335  }
7336  DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
7337  inx,
7338  (ulong) estimated_rows));
7339  DBUG_RETURN(estimated_rows);
7340 }
7341 
7342 
7350 {
7351  ha_rows rows, tot_rows= 0;
7352  handler **file= m_file;
7353  DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
7354 
7355  do
7356  {
7357  if (bitmap_is_set(&(m_part_info->read_partitions), (file - m_file)))
7358  {
7359  rows= (*file)->estimate_rows_upper_bound();
7360  if (rows == HA_POS_ERROR)
7361  DBUG_RETURN(HA_POS_ERROR);
7362  tot_rows+= rows;
7363  }
7364  } while (*(++file));
7365  DBUG_RETURN(tot_rows);
7366 }
7367 
7368 
7369 /*
7370  Get time to read
7371 
7372  SYNOPSIS
7373  read_time()
7374  index Index number used
7375  ranges Number of ranges
7376  rows Number of rows
7377 
7378  RETURN VALUE
7379  time for read
7380 
7381  DESCRIPTION
7382  This will be optimised later to include whether or not the index can
7383  be used with partitioning. To achieve we need to add another parameter
7384  that specifies how many of the index fields that are bound in the ranges.
7385  Possibly added as a new call to handlers.
7386 */
7387 
7388 double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
7389 {
7390  DBUG_ENTER("ha_partition::read_time");
7391 
7392  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
7393 }
7394 
7395 
7402 ha_rows ha_partition::records()
7403 {
7404  ha_rows rows, tot_rows= 0;
7405  uint i;
7406  DBUG_ENTER("ha_partition::records");
7407 
7408  for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7409  i < m_tot_parts;
7410  i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7411  {
7412  rows= m_file[i]->records();
7413  if (rows == HA_POS_ERROR)
7414  DBUG_RETURN(HA_POS_ERROR);
7415  tot_rows+= rows;
7416  }
7417  DBUG_RETURN(tot_rows);
7418 }
7419 
7420 
7421 /*
7422  Is it ok to switch to a new engine for this table
7423 
7424  SYNOPSIS
7425  can_switch_engine()
7426 
7427  RETURN VALUE
7428  TRUE Ok
7429  FALSE Not ok
7430 
7431  DESCRIPTION
7432  Used to ensure that tables with foreign key constraints are not moved
7433  to engines without foreign key support.
7434 */
7435 
7437 {
7438  handler **file;
7439  DBUG_ENTER("ha_partition::can_switch_engines");
7440 
7441  file= m_file;
7442  do
7443  {
7444  if (!(*file)->can_switch_engines())
7445  DBUG_RETURN(FALSE);
7446  } while (*(++file));
7447  DBUG_RETURN(TRUE);
7448 }
7449 
7450 
7451 /*
7452  Is table cache supported
7453 
7454  SYNOPSIS
7455  table_cache_type()
7456 
7457 */
7458 
7460 {
7461  DBUG_ENTER("ha_partition::table_cache_type");
7462 
7463  DBUG_RETURN(m_file[0]->table_cache_type());
7464 }
7465 
7466 
7478 uint32 ha_partition::calculate_key_hash_value(Field **field_array)
7479 {
7480  ulong nr1= 1;
7481  ulong nr2= 4;
7482  bool use_51_hash;
7483  use_51_hash= test((*field_array)->table->part_info->key_algorithm ==
7484  partition_info::KEY_ALGORITHM_51);
7485 
7486  do
7487  {
7488  Field *field= *field_array;
7489  if (use_51_hash)
7490  {
7491  switch (field->real_type()) {
7492  case MYSQL_TYPE_TINY:
7493  case MYSQL_TYPE_SHORT:
7494  case MYSQL_TYPE_LONG:
7495  case MYSQL_TYPE_FLOAT:
7496  case MYSQL_TYPE_DOUBLE:
7497  case MYSQL_TYPE_NEWDECIMAL:
7498  case MYSQL_TYPE_TIMESTAMP:
7499  case MYSQL_TYPE_LONGLONG:
7500  case MYSQL_TYPE_INT24:
7501  case MYSQL_TYPE_TIME:
7502  case MYSQL_TYPE_DATETIME:
7503  case MYSQL_TYPE_YEAR:
7504  case MYSQL_TYPE_NEWDATE:
7505  {
7506  if (field->is_null())
7507  {
7508  nr1^= (nr1 << 1) | 1;
7509  continue;
7510  }
7511  /* Force this to my_hash_sort_bin, which was used in 5.1! */
7512  uint len= field->pack_length();
7513  my_charset_bin.coll->hash_sort(&my_charset_bin, field->ptr, len,
7514  &nr1, &nr2);
7515  /* Done with this field, continue with next one. */
7516  continue;
7517  }
7518  case MYSQL_TYPE_STRING:
7519  case MYSQL_TYPE_VARCHAR:
7520  case MYSQL_TYPE_BIT:
7521  /* Not affected, same in 5.1 and 5.5 */
7522  break;
7523  /*
7524  ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1)
7525  and my_hash_sort_bin in 5.5!
7526  */
7527  case MYSQL_TYPE_ENUM:
7528  case MYSQL_TYPE_SET:
7529  {
7530  if (field->is_null())
7531  {
7532  nr1^= (nr1 << 1) | 1;
7533  continue;
7534  }
7535  /* Force this to my_hash_sort_bin, which was used in 5.1! */
7536  uint len= field->pack_length();
7537  my_charset_latin1.coll->hash_sort(&my_charset_latin1, field->ptr,
7538  len, &nr1, &nr2);
7539  continue;
7540  }
7541  /* New types in mysql-5.6. */
7542  case MYSQL_TYPE_DATETIME2:
7543  case MYSQL_TYPE_TIME2:
7544  case MYSQL_TYPE_TIMESTAMP2:
7545  /* Not affected, 5.6+ only! */
7546  break;
7547 
7548  /* These types should not be allowed for partitioning! */
7549  case MYSQL_TYPE_NULL:
7550  case MYSQL_TYPE_DECIMAL:
7551  case MYSQL_TYPE_DATE:
7552  case MYSQL_TYPE_TINY_BLOB:
7553  case MYSQL_TYPE_MEDIUM_BLOB:
7554  case MYSQL_TYPE_LONG_BLOB:
7555  case MYSQL_TYPE_BLOB:
7556  case MYSQL_TYPE_VAR_STRING:
7557  case MYSQL_TYPE_GEOMETRY:
7558  /* fall through. */
7559  default:
7560  DBUG_ASSERT(0); // New type?
7561  /* Fall through for default hashing (5.5). */
7562  }
7563  /* fall through, use collation based hashing. */
7564  }
7565  field->hash(&nr1, &nr2);
7566  } while (*(++field_array));
7567  return (uint32) nr1;
7568 }
7569 
7570 
7571 /****************************************************************************
7572  MODULE print messages
7573 ****************************************************************************/
7574 
7575 const char *ha_partition::index_type(uint inx)
7576 {
7577  uint first_used_partition;
7578  DBUG_ENTER("ha_partition::index_type");
7579 
7580  first_used_partition= bitmap_get_first_set(&(m_part_info->read_partitions));
7581 
7582  if (first_used_partition == MY_BIT_NONE)
7583  {
7584  DBUG_ASSERT(0); // How can this happen?
7585  DBUG_RETURN(handler::index_type(inx));
7586  }
7587 
7588  DBUG_RETURN(m_file[first_used_partition]->index_type(inx));
7589 }
7590 
7591 
7592 enum row_type ha_partition::get_row_type() const
7593 {
7594  uint i;
7595  enum row_type type;
7596  DBUG_ENTER("ha_partition::get_row_type");
7597 
7598  i= bitmap_get_first_set(&m_part_info->read_partitions);
7599  DBUG_ASSERT(i < m_tot_parts);
7600  if (i >= m_tot_parts)
7601  DBUG_RETURN(ROW_TYPE_NOT_USED);
7602 
7603  type= m_file[i]->get_row_type();
7604  DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
7605 
7606  for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i);
7607  i < m_tot_parts;
7608  i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
7609  {
7610  enum row_type part_type= m_file[i]->get_row_type();
7611  DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
7612  if (part_type != type)
7613  DBUG_RETURN(ROW_TYPE_NOT_USED);
7614  }
7615 
7616  DBUG_RETURN(type);
7617 }
7618 
7619 
7620 void ha_partition::append_row_to_str(String &str)
7621 {
7622  const uchar *rec;
7623  bool is_rec0= !m_err_rec || m_err_rec == table->record[0];
7624  if (is_rec0)
7625  rec= table->record[0];
7626  else
7627  rec= m_err_rec;
7628  // If PK, use full PK instead of full part field array!
7629  if (table->s->primary_key != MAX_KEY)
7630  {
7631  KEY *key= table->key_info + table->s->primary_key;
7632  KEY_PART_INFO *key_part= key->key_part;
7633  KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
7634  if (!is_rec0)
7635  set_key_field_ptr(key, rec, table->record[0]);
7636  for (; key_part != key_part_end; key_part++)
7637  {
7638  Field *field= key_part->field;
7639  str.append(" ");
7640  str.append(field->field_name);
7641  str.append(":");
7642  field_unpack(&str, field, rec, 0, false);
7643  }
7644  if (!is_rec0)
7645  set_key_field_ptr(key, table->record[0], rec);
7646  }
7647  else
7648  {
7649  Field **field_ptr;
7650  if (!is_rec0)
7651  set_field_ptr(m_part_info->full_part_field_array, rec,
7652  table->record[0]);
7653  /* No primary key, use full partition field array. */
7654  for (field_ptr= m_part_info->full_part_field_array;
7655  *field_ptr;
7656  field_ptr++)
7657  {
7658  Field *field= *field_ptr;
7659  str.append(" ");
7660  str.append(field->field_name);
7661  str.append(":");
7662  field_unpack(&str, field, rec, 0, false);
7663  }
7664  if (!is_rec0)
7665  set_field_ptr(m_part_info->full_part_field_array, table->record[0],
7666  rec);
7667  }
7668 }
7669 
7670 
7671 void ha_partition::print_error(int error, myf errflag)
7672 {
7673  THD *thd= ha_thd();
7674  DBUG_ENTER("ha_partition::print_error");
7675 
7676  /* Should probably look for my own errors first */
7677  DBUG_PRINT("enter", ("error: %d", error));
7678 
7679  if ((error == HA_ERR_NO_PARTITION_FOUND) &&
7680  ! (thd->lex->alter_info.flags & Alter_info::ALTER_TRUNCATE_PARTITION))
7681  m_part_info->print_no_partition_found(table);
7682  else if (error == HA_ERR_ROW_IN_WRONG_PARTITION)
7683  {
7684  /* Should only happen on DELETE or UPDATE! */
7685  DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE ||
7686  thd_sql_command(thd) == SQLCOM_DELETE_MULTI ||
7687  thd_sql_command(thd) == SQLCOM_UPDATE ||
7688  thd_sql_command(thd) == SQLCOM_UPDATE_MULTI);
7689  DBUG_ASSERT(m_err_rec);
7690  if (m_err_rec)
7691  {
7692  uint max_length;
7693  char buf[MAX_KEY_LENGTH];
7694  String str(buf,sizeof(buf),system_charset_info);
7695  uint32 part_id;
7696  str.length(0);
7697  str.append("(");
7698  str.append_ulonglong(m_last_part);
7699  str.append(" != ");
7700  if (get_part_for_delete(m_err_rec, m_rec0, m_part_info, &part_id))
7701  str.append("?");
7702  else
7703  str.append_ulonglong(part_id);
7704  str.append(")");
7705  append_row_to_str(str);
7706 
7707  /* Log this error, so the DBA can notice it and fix it! */
7708  sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s\n"
7709  "Please REPAIR the table!",
7710  table->s->table_name.str,
7711  str.c_ptr_safe());
7712 
7713  max_length= (MYSQL_ERRMSG_SIZE - (uint) strlen(ER(ER_ROW_IN_WRONG_PARTITION)));
7714  if (str.length() >= max_length)
7715  {
7716  str.length(max_length-4);
7717  str.append(STRING_WITH_LEN("..."));
7718  }
7719  my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe());
7720  m_err_rec= NULL;
7721  DBUG_VOID_RETURN;
7722  }
7723  /* fall through to generic error handling. */
7724  }
7725 
7726  /* In case m_file has not been initialized, like in bug#42438 */
7727  if (m_file)
7728  {
7729  if (m_last_part >= m_tot_parts)
7730  {
7731  DBUG_ASSERT(0);
7732  m_last_part= 0;
7733  }
7734  m_file[m_last_part]->print_error(error, errflag);
7735  }
7736  else
7737  handler::print_error(error, errflag);
7738  DBUG_VOID_RETURN;
7739 }
7740 
7741 
7742 bool ha_partition::get_error_message(int error, String *buf)
7743 {
7744  DBUG_ENTER("ha_partition::get_error_message");
7745 
7746  /* Should probably look for my own errors first */
7747 
7748  /* In case m_file has not been initialized, like in bug#42438 */
7749  if (m_file)
7750  DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf));
7751  DBUG_RETURN(handler::get_error_message(error, buf));
7752 
7753 }
7754 
7755 
7756 /****************************************************************************
7757  MODULE in-place ALTER
7758 ****************************************************************************/
7763 handler::Table_flags ha_partition::table_flags() const
7764 {
7765  uint first_used_partition= 0;
7766  DBUG_ENTER("ha_partition::table_flags");
7767  if (m_handler_status < handler_initialized ||
7768  m_handler_status >= handler_closed)
7769  DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS);
7770 
7771  if (get_lock_type() != F_UNLCK)
7772  {
7773  /*
7774  The flags are cached after external_lock, and may depend on isolation
7775  level. So we should use a locked partition to get the correct flags.
7776  */
7777  first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions);
7778  if (first_used_partition == MY_BIT_NONE)
7779  first_used_partition= 0;
7780  }
7781  DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() &
7782  ~(PARTITION_DISABLED_TABLE_FLAGS)) |
7783  (PARTITION_ENABLED_TABLE_FLAGS));
7784 }
7785 
7786 
7792 {
7793  uint flags_to_return;
7794  DBUG_ENTER("ha_partition::alter_table_flags");
7795 
7796  flags_to_return= ht->alter_table_flags(flags);
7797  flags_to_return|= m_file[0]->alter_table_flags(flags);
7798 
7799  DBUG_RETURN(flags_to_return);
7800 }
7801 
7802 
7807  uint table_changes)
7808 {
7809  handler **file;
7810  bool ret= COMPATIBLE_DATA_YES;
7811 
7812  /*
7813  The check for any partitioning related changes have already been done
7814  in mysql_alter_table (by fix_partition_func), so it is only up to
7815  the underlying handlers.
7816  */
7817  for (file= m_file; *file; file++)
7818  if ((ret= (*file)->check_if_incompatible_data(create_info,
7819  table_changes)) !=
7820  COMPATIBLE_DATA_YES)
7821  break;
7822  return ret;
7823 }
7824 
7825 
7834 class ha_partition_inplace_ctx : public inplace_alter_handler_ctx
7835 {
7836 public:
7837  inplace_alter_handler_ctx **handler_ctx_array;
7838 private:
7839  uint m_tot_parts;
7840 
7841 public:
7842  ha_partition_inplace_ctx(THD *thd, uint tot_parts)
7844  handler_ctx_array(NULL),
7845  m_tot_parts(tot_parts)
7846  {}
7847 
7848  ~ha_partition_inplace_ctx()
7849  {
7850  if (handler_ctx_array)
7851  {
7852  for (uint index= 0; index < m_tot_parts; index++)
7853  delete handler_ctx_array[index];
7854  }
7855  }
7856 };
7857 
7858 
7859 enum_alter_inplace_result
7861  Alter_inplace_info *ha_alter_info)
7862 {
7863  uint index= 0;
7864  enum_alter_inplace_result result= HA_ALTER_INPLACE_NO_LOCK;
7865  ha_partition_inplace_ctx *part_inplace_ctx;
7866  bool first_is_set= false;
7867  THD *thd= ha_thd();
7868 
7869  DBUG_ENTER("ha_partition::check_if_supported_inplace_alter");
7870  /*
7871  Support inplace change of KEY () -> KEY ALGORITHM = N ().
7872  Any other change would set partition_changed in
7873  prep_alter_part_table() in mysql_alter_table().
7874  */
7875  if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
7876  DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
7877 
7878  part_inplace_ctx=
7879  new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts);
7880  if (!part_inplace_ctx)
7881  DBUG_RETURN(HA_ALTER_ERROR);
7882 
7883  part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **)
7884  thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1));
7885  if (!part_inplace_ctx->handler_ctx_array)
7886  DBUG_RETURN(HA_ALTER_ERROR);
7887 
7888  /* Set all to NULL, including the terminating one. */
7889  for (index= 0; index <= m_tot_parts; index++)
7890  part_inplace_ctx->handler_ctx_array[index]= NULL;
7891 
7892  for (index= 0; index < m_tot_parts; index++)
7893  {
7894  enum_alter_inplace_result p_result=
7895  m_file[index]->check_if_supported_inplace_alter(altered_table,
7896  ha_alter_info);
7897  part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
7898 
7899  if (index == 0)
7900  {
7901  first_is_set= (ha_alter_info->handler_ctx != NULL);
7902  }
7903  else if (first_is_set != (ha_alter_info->handler_ctx != NULL))
7904  {
7905  /* Either none or all partitions must set handler_ctx! */
7906  DBUG_ASSERT(0);
7907  DBUG_RETURN(HA_ALTER_ERROR);
7908  }
7909  if (p_result < result)
7910  result= p_result;
7911  if (result == HA_ALTER_ERROR)
7912  break;
7913  }
7914 
7915  ha_alter_info->handler_ctx= part_inplace_ctx;
7916  /*
7917  To indicate for future inplace calls that there are several
7918  partitions/handlers that need to be committed together,
7919  we set group_commit_ctx to the NULL terminated array of
7920  the partitions handlers.
7921  */
7922  ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array;
7923 
7924  DBUG_RETURN(result);
7925 }
7926 
7927 
7929  Alter_inplace_info *ha_alter_info)
7930 {
7931  uint index= 0;
7932  bool error= false;
7933  ha_partition_inplace_ctx *part_inplace_ctx;
7934 
7935  DBUG_ENTER("ha_partition::prepare_inplace_alter_table");
7936 
7937  /*
7938  Changing to similar partitioning, only update metadata.
7939  Non allowed changes would be catched in prep_alter_part_table().
7940  */
7941  if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
7942  DBUG_RETURN(false);
7943 
7944  part_inplace_ctx=
7945  static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
7946 
7947  for (index= 0; index < m_tot_parts && !error; index++)
7948  {
7949  ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
7950  if (m_file[index]->ha_prepare_inplace_alter_table(altered_table,
7951  ha_alter_info))
7952  error= true;
7953  part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
7954  }
7955  ha_alter_info->handler_ctx= part_inplace_ctx;
7956 
7957  DBUG_RETURN(error);
7958 }
7959 
7960 
7961 bool ha_partition::inplace_alter_table(TABLE *altered_table,
7962  Alter_inplace_info *ha_alter_info)
7963 {
7964  uint index= 0;
7965  bool error= false;
7966  ha_partition_inplace_ctx *part_inplace_ctx;
7967 
7968  DBUG_ENTER("ha_partition::inplace_alter_table");
7969 
7970  /*
7971  Changing to similar partitioning, only update metadata.
7972  Non allowed changes would be catched in prep_alter_part_table().
7973  */
7974  if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
7975  DBUG_RETURN(false);
7976 
7977  part_inplace_ctx=
7978  static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
7979 
7980  for (index= 0; index < m_tot_parts && !error; index++)
7981  {
7982  ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
7983  if (m_file[index]->ha_inplace_alter_table(altered_table,
7984  ha_alter_info))
7985  error= true;
7986  part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
7987  }
7988  ha_alter_info->handler_ctx= part_inplace_ctx;
7989 
7990  DBUG_RETURN(error);
7991 }
7992 
7993 
7994 /*
7995  Note that this function will try rollback failed ADD INDEX by
7996  executing DROP INDEX for the indexes that were committed (if any)
7997  before the error occured. This means that the underlying storage
7998  engine must be able to drop index in-place with X-lock held.
7999  (As X-lock will be held here if new indexes are to be committed)
8000 */
8002  Alter_inplace_info *ha_alter_info,
8003  bool commit)
8004 {
8005  ha_partition_inplace_ctx *part_inplace_ctx;
8006  bool error= false;
8007 
8008  DBUG_ENTER("ha_partition::commit_inplace_alter_table");
8009 
8010  /*
8011  Changing to similar partitioning, only update metadata.
8012  Non allowed changes would be catched in prep_alter_part_table().
8013  */
8014  if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
8015  DBUG_RETURN(false);
8016 
8017  part_inplace_ctx=
8018  static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
8019 
8020  if (commit)
8021  {
8022  DBUG_ASSERT(ha_alter_info->group_commit_ctx ==
8023  part_inplace_ctx->handler_ctx_array);
8024  ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0];
8025  error= m_file[0]->ha_commit_inplace_alter_table(altered_table,
8026  ha_alter_info, commit);
8027  if (error)
8028  goto end;
8029  if (ha_alter_info->group_commit_ctx)
8030  {
8031  /*
8032  If ha_alter_info->group_commit_ctx is not set to NULL,
8033  then the engine did only commit the first partition!
8034  The engine is probably new, since both innodb and the default
8035  implementation of handler::commit_inplace_alter_table sets it to NULL
8036  and simply return false, since it allows metadata changes only.
8037  Loop over all other partitions as to follow the protocol!
8038  */
8039  uint i;
8040  DBUG_ASSERT(0);
8041  for (i= 1; i < m_tot_parts; i++)
8042  {
8043  ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
8044  error|= m_file[i]->ha_commit_inplace_alter_table(altered_table,
8045  ha_alter_info,
8046  true);
8047  }
8048  }
8049  }
8050  else
8051  {
8052  uint i;
8053  for (i= 0; i < m_tot_parts; i++)
8054  {
8055  /* Rollback, commit == false, is done for each partition! */
8056  ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
8057  if (m_file[i]->ha_commit_inplace_alter_table(altered_table,
8058  ha_alter_info, false))
8059  error= true;
8060  }
8061  }
8062 end:
8063  ha_alter_info->handler_ctx= part_inplace_ctx;
8064 
8065  DBUG_RETURN(error);
8066 }
8067 
8068 
8070 {
8071  handler **file;
8072 
8073  DBUG_ENTER("ha_partition::notify_table_changed");
8074 
8075  for (file= m_file; *file; file++)
8076  (*file)->ha_notify_table_changed();
8077 
8078  DBUG_VOID_RETURN;
8079 }
8080 
8081 
8082 /*
8083  If frm_error() is called then we will use this to to find out what file
8084  extensions exist for the storage engine. This is also used by the default
8085  rename_table and delete_table method in handler.cc.
8086 */
8087 
8088 static const char *ha_partition_ext[]=
8089 {
8090  ha_par_ext, NullS
8091 };
8092 
8093 const char **ha_partition::bas_ext() const
8094 { return ha_partition_ext; }
8095 
8096 
8097 uint ha_partition::min_of_the_max_uint(
8098  uint (handler::*operator_func)(void) const) const
8099 {
8100  handler **file;
8101  uint min_of_the_max= ((*m_file)->*operator_func)();
8102 
8103  for (file= m_file+1; *file; file++)
8104  {
8105  uint tmp= ((*file)->*operator_func)();
8106  set_if_smaller(min_of_the_max, tmp);
8107  }
8108  return min_of_the_max;
8109 }
8110 
8111 
8112 uint ha_partition::max_supported_key_parts() const
8113 {
8114  return min_of_the_max_uint(&handler::max_supported_key_parts);
8115 }
8116 
8117 
8118 uint ha_partition::max_supported_key_length() const
8119 {
8120  return min_of_the_max_uint(&handler::max_supported_key_length);
8121 }
8122 
8123 
8124 uint ha_partition::max_supported_key_part_length() const
8125 {
8126  return min_of_the_max_uint(&handler::max_supported_key_part_length);
8127 }
8128 
8129 
8130 uint ha_partition::max_supported_record_length() const
8131 {
8132  return min_of_the_max_uint(&handler::max_supported_record_length);
8133 }
8134 
8135 
8136 uint ha_partition::max_supported_keys() const
8137 {
8138  return min_of_the_max_uint(&handler::max_supported_keys);
8139 }
8140 
8141 
8142 uint ha_partition::extra_rec_buf_length() const
8143 {
8144  handler **file;
8145  uint max= (*m_file)->extra_rec_buf_length();
8146 
8147  for (file= m_file, file++; *file; file++)
8148  if (max < (*file)->extra_rec_buf_length())
8149  max= (*file)->extra_rec_buf_length();
8150  return max;
8151 }
8152 
8153 
8154 uint ha_partition::min_record_length(uint options) const
8155 {
8156  handler **file;
8157  uint max= (*m_file)->min_record_length(options);
8158 
8159  for (file= m_file, file++; *file; file++)
8160  if (max < (*file)->min_record_length(options))
8161  max= (*file)->min_record_length(options);
8162  return max;
8163 }
8164 
8165 
8166 /****************************************************************************
8167  MODULE compare records
8168 ****************************************************************************/
8169 /*
8170  Compare two positions
8171 
8172  SYNOPSIS
8173  cmp_ref()
8174  ref1 First position
8175  ref2 Second position
8176 
8177  RETURN VALUE
8178  <0 ref1 < ref2
8179  0 Equal
8180  >0 ref1 > ref2
8181 
8182  DESCRIPTION
8183  We get two references and need to check if those records are the same.
8184  If they belong to different partitions we decide that they are not
8185  the same record. Otherwise we use the particular handler to decide if
8186  they are the same. Sort in partition id order if not equal.
8187 */
8188 
8189 int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
8190 {
8191  uint part_id;
8192  my_ptrdiff_t diff1, diff2;
8193  handler *file;
8194  DBUG_ENTER("ha_partition::cmp_ref");
8195 
8196  if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1]))
8197  {
8198  part_id= uint2korr(ref1);
8199  file= m_file[part_id];
8200  DBUG_ASSERT(part_id < m_tot_parts);
8201  DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
8202  (ref2 + PARTITION_BYTES_IN_POS)));
8203  }
8204  diff1= ref2[1] - ref1[1];
8205  diff2= ref2[0] - ref1[0];
8206  if (diff1 > 0)
8207  {
8208  DBUG_RETURN(-1);
8209  }
8210  if (diff1 < 0)
8211  {
8212  DBUG_RETURN(+1);
8213  }
8214  if (diff2 > 0)
8215  {
8216  DBUG_RETURN(-1);
8217  }
8218  DBUG_RETURN(+1);
8219 }
8220 
8221 
8222 /****************************************************************************
8223  MODULE auto increment
8224 ****************************************************************************/
8225 
8226 
8227 int ha_partition::reset_auto_increment(ulonglong value)
8228 {
8229  handler **file= m_file;
8230  int res;
8231  DBUG_ENTER("ha_partition::reset_auto_increment");
8232  lock_auto_increment();
8233  part_share->auto_inc_initialized= false;
8234  part_share->next_auto_inc_val= 0;
8235  do
8236  {
8237  if ((res= (*file)->ha_reset_auto_increment(value)) != 0)
8238  break;
8239  } while (*(++file));
8240  unlock_auto_increment();
8241  DBUG_RETURN(res);
8242 }
8243 
8244 
8254 void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment,
8255  ulonglong nb_desired_values,
8256  ulonglong *first_value,
8257  ulonglong *nb_reserved_values)
8258 {
8259  DBUG_ENTER("ha_partition::get_auto_increment");
8260  DBUG_PRINT("info", ("offset: %lu inc: %lu desired_values: %lu "
8261  "first_value: %lu", (ulong) offset, (ulong) increment,
8262  (ulong) nb_desired_values, (ulong) *first_value));
8263  DBUG_ASSERT(increment && nb_desired_values);
8264  *first_value= 0;
8265  if (table->s->next_number_keypart)
8266  {
8267  /*
8268  next_number_keypart is != 0 if the auto_increment column is a secondary
8269  column in the index (it is allowed in MyISAM)
8270  */
8271  DBUG_PRINT("info", ("next_number_keypart != 0"));
8272  ulonglong nb_reserved_values_part;
8273  ulonglong first_value_part, max_first_value;
8274  handler **file= m_file;
8275  first_value_part= max_first_value= *first_value;
8276  /* Must lock and find highest value among all partitions. */
8277  lock_auto_increment();
8278  do
8279  {
8280  /* Only nb_desired_values = 1 makes sense */
8281  (*file)->get_auto_increment(offset, increment, 1,
8282  &first_value_part, &nb_reserved_values_part);
8283  if (first_value_part == ULONGLONG_MAX) // error in one partition
8284  {
8285  *first_value= first_value_part;
8286  /* log that the error was between table/partition handler */
8287  sql_print_error("Partition failed to reserve auto_increment value");
8288  unlock_auto_increment();
8289  DBUG_VOID_RETURN;
8290  }
8291  DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part));
8292  set_if_bigger(max_first_value, first_value_part);
8293  } while (*(++file));
8294  *first_value= max_first_value;
8295  *nb_reserved_values= 1;
8296  unlock_auto_increment();
8297  }
8298  else
8299  {
8300  THD *thd= ha_thd();
8301  /*
8302  This is initialized in the beginning of the first write_row call.
8303  */
8304  DBUG_ASSERT(part_share->auto_inc_initialized);
8305  /*
8306  Get a lock for handling the auto_increment in part_share
8307  for avoiding two concurrent statements getting the same number.
8308  */
8309 
8310  lock_auto_increment();
8311 
8312  /*
8313  In a multi-row insert statement like INSERT SELECT and LOAD DATA
8314  where the number of candidate rows to insert is not known in advance
8315  we must hold a lock/mutex for the whole statement if we have statement
8316  based replication. Because the statement-based binary log contains
8317  only the first generated value used by the statement, and slaves assumes
8318  all other generated values used by this statement were consecutive to
8319  this first one, we must exclusively lock the generator until the statement
8320  is done.
8321  */
8322  if (!auto_increment_safe_stmt_log_lock &&
8323  thd->lex->sql_command != SQLCOM_INSERT &&
8324  mysql_bin_log.is_open() &&
8325  !thd->is_current_stmt_binlog_format_row() &&
8326  (thd->variables.option_bits & OPTION_BIN_LOG))
8327  {
8328  DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock"));
8329  auto_increment_safe_stmt_log_lock= TRUE;
8330  }
8331 
8332  /* this gets corrected (for offset/increment) in update_auto_increment */
8333  *first_value= part_share->next_auto_inc_val;
8334  part_share->next_auto_inc_val+= nb_desired_values * increment;
8335 
8336  unlock_auto_increment();
8337  DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value));
8338  *nb_reserved_values= nb_desired_values;
8339  }
8340  DBUG_VOID_RETURN;
8341 }
8342 
8343 void ha_partition::release_auto_increment()
8344 {
8345  DBUG_ENTER("ha_partition::release_auto_increment");
8346 
8347  if (table->s->next_number_keypart)
8348  {
8349  uint i;
8350  for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
8351  i < m_tot_parts;
8352  i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
8353  {
8354  m_file[i]->ha_release_auto_increment();
8355  }
8356  }
8357  else if (next_insert_id)
8358  {
8359  ulonglong next_auto_inc_val;
8360  lock_auto_increment();
8361  next_auto_inc_val= part_share->next_auto_inc_val;
8362  /*
8363  If the current auto_increment values is lower than the reserved
8364  value, and the reserved value was reserved by this thread,
8365  we can lower the reserved value.
8366  */
8367  if (next_insert_id < next_auto_inc_val &&
8368  auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val)
8369  {
8370  THD *thd= ha_thd();
8371  /*
8372  Check that we do not lower the value because of a failed insert
8373  with SET INSERT_ID, i.e. forced/non generated values.
8374  */
8375  if (thd->auto_inc_intervals_forced.maximum() < next_insert_id)
8376  part_share->next_auto_inc_val= next_insert_id;
8377  }
8378  DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu",
8379  (ulong) part_share->next_auto_inc_val));
8380 
8381  /* Unlock the multi row statement lock taken in get_auto_increment */
8382  if (auto_increment_safe_stmt_log_lock)
8383  {
8384  auto_increment_safe_stmt_log_lock= FALSE;
8385  DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock"));
8386  }
8387 
8388  unlock_auto_increment();
8389  }
8390  DBUG_VOID_RETURN;
8391 }
8392 
8393 /****************************************************************************
8394  MODULE initialize handler for HANDLER call
8395 ****************************************************************************/
8396 
8397 void ha_partition::init_table_handle_for_HANDLER()
8398 {
8399  return;
8400 }
8401 
8402 
8407 uint ha_partition::checksum() const
8408 {
8409  ha_checksum sum= 0;
8410 
8411  DBUG_ENTER("ha_partition::checksum");
8412  if ((table_flags() & HA_HAS_CHECKSUM))
8413  {
8414  handler **file= m_file;
8415  do
8416  {
8417  sum+= (*file)->checksum();
8418  } while (*(++file));
8419  }
8420  DBUG_RETURN(sum);
8421 }
8422 
8423 
8424 /****************************************************************************
8425  MODULE enable/disable indexes
8426 ****************************************************************************/
8427 
8428 /*
8429  Disable indexes for a while
8430  SYNOPSIS
8431  disable_indexes()
8432  mode Mode
8433  RETURN VALUES
8434  0 Success
8435  != 0 Error
8436 */
8437 
8438 int ha_partition::disable_indexes(uint mode)
8439 {
8440  handler **file;
8441  int error= 0;
8442 
8443  DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
8444  for (file= m_file; *file; file++)
8445  {
8446  if ((error= (*file)->ha_disable_indexes(mode)))
8447  break;
8448  }
8449  return error;
8450 }
8451 
8452 
8453 /*
8454  Enable indexes again
8455  SYNOPSIS
8456  enable_indexes()
8457  mode Mode
8458  RETURN VALUES
8459  0 Success
8460  != 0 Error
8461 */
8462 
8463 int ha_partition::enable_indexes(uint mode)
8464 {
8465  handler **file;
8466  int error= 0;
8467 
8468  DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
8469  for (file= m_file; *file; file++)
8470  {
8471  if ((error= (*file)->ha_enable_indexes(mode)))
8472  break;
8473  }
8474  return error;
8475 }
8476 
8477 
8478 /*
8479  Check if indexes are disabled
8480  SYNOPSIS
8481  indexes_are_disabled()
8482 
8483  RETURN VALUES
8484  0 Indexes are enabled
8485  != 0 Indexes are disabled
8486 */
8487 
8488 int ha_partition::indexes_are_disabled(void)
8489 {
8490  handler **file;
8491  int error= 0;
8492 
8493  DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
8494  for (file= m_file; *file; file++)
8495  {
8496  if ((error= (*file)->indexes_are_disabled()))
8497  break;
8498  }
8499  return error;
8500 }
8501 
8502 
8514 int ha_partition::check_misplaced_rows(uint read_part_id, bool repair)
8515 {
8516  int result= 0;
8517  uint32 correct_part_id;
8518  longlong func_value;
8519  longlong num_misplaced_rows= 0;
8520 
8521  DBUG_ENTER("ha_partition::check_misplaced_rows");
8522 
8523  DBUG_ASSERT(m_file);
8524 
8525  if (repair)
8526  {
8527  /* We must read the full row, if we need to move it! */
8528  bitmap_set_all(table->read_set);
8529  bitmap_set_all(table->write_set);
8530  }
8531  else
8532  {
8533  /* Only need to read the partitioning fields. */
8534  bitmap_union(table->read_set, &m_part_info->full_part_field_set);
8535  }
8536 
8537  if ((result= m_file[read_part_id]->ha_rnd_init(1)))
8538  DBUG_RETURN(result);
8539 
8540  while (true)
8541  {
8542  if ((result= m_file[read_part_id]->ha_rnd_next(m_rec0)))
8543  {
8544  if (result == HA_ERR_RECORD_DELETED)
8545  continue;
8546  if (result != HA_ERR_END_OF_FILE)
8547  break;
8548 
8549  if (num_misplaced_rows > 0)
8550  {
8551  print_admin_msg(ha_thd(), MI_MAX_MSG_BUF, "warning",
8552  table_share->db.str, table->alias,
8553  opt_op_name[REPAIR_PARTS],
8554  "Moved %lld misplaced rows",
8555  num_misplaced_rows);
8556  }
8557  /* End-of-file reached, all rows are now OK, reset result and break. */
8558  result= 0;
8559  break;
8560  }
8561 
8562  result= m_part_info->get_partition_id(m_part_info, &correct_part_id,
8563  &func_value);
8564  if (result)
8565  break;
8566 
8567  if (correct_part_id != read_part_id)
8568  {
8569  num_misplaced_rows++;
8570  if (!repair)
8571  {
8572  /* Check. */
8573  print_admin_msg(ha_thd(), MI_MAX_MSG_BUF, "error",
8574  table_share->db.str, table->alias,
8575  opt_op_name[CHECK_PARTS],
8576  "Found a misplaced row");
8577  /* Break on first misplaced row! */
8578  result= HA_ADMIN_NEEDS_UPGRADE;
8579  break;
8580  }
8581  else
8582  {
8583  DBUG_PRINT("info", ("Moving row from partition %d to %d",
8584  read_part_id, correct_part_id));
8585 
8586  /*
8587  Insert row into correct partition. Notice that there are no commit
8588  for every N row, so the repair will be one large transaction!
8589  */
8590  if ((result= m_file[correct_part_id]->ha_write_row(m_rec0)))
8591  {
8592  /*
8593  We have failed to insert a row, it might have been a duplicate!
8594  */
8595  char buf[MAX_KEY_LENGTH];
8596  String str(buf,sizeof(buf),system_charset_info);
8597  str.length(0);
8598  if (result == HA_ERR_FOUND_DUPP_KEY)
8599  {
8600  str.append("Duplicate key found, "
8601  "please update or delete the record:\n");
8602  result= HA_ADMIN_CORRUPT;
8603  }
8604  m_err_rec= NULL;
8605  append_row_to_str(str);
8606 
8607  /*
8608  If the engine supports transactions, the failure will be
8609  rollbacked.
8610  */
8611  if (!m_file[correct_part_id]->has_transactions())
8612  {
8613  /* Log this error, so the DBA can notice it and fix it! */
8614  sql_print_error("Table '%-192s' failed to move/insert a row"
8615  " from part %d into part %d:\n%s",
8616  table->s->table_name.str,
8617  read_part_id,
8618  correct_part_id,
8619  str.c_ptr_safe());
8620  }
8621  print_admin_msg(ha_thd(), MI_MAX_MSG_BUF, "error",
8622  table_share->db.str, table->alias,
8623  opt_op_name[REPAIR_PARTS],
8624  "Failed to move/insert a row"
8625  " from part %d into part %d:\n%s",
8626  read_part_id,
8627  correct_part_id,
8628  str.c_ptr_safe());
8629  break;
8630  }
8631 
8632  /* Delete row from wrong partition. */
8633  if ((result= m_file[read_part_id]->ha_delete_row(m_rec0)))
8634  {
8635  if (m_file[correct_part_id]->has_transactions())
8636  break;
8637  /*
8638  We have introduced a duplicate, since we failed to remove it
8639  from the wrong partition.
8640  */
8641  char buf[MAX_KEY_LENGTH];
8642  String str(buf,sizeof(buf),system_charset_info);
8643  str.length(0);
8644  m_err_rec= NULL;
8645  append_row_to_str(str);
8646 
8647  /* Log this error, so the DBA can notice it and fix it! */
8648  sql_print_error("Table '%-192s': Delete from part %d failed with"
8649  " error %d. But it was already inserted into"
8650  " part %d, when moving the misplaced row!"
8651  "\nPlease manually fix the duplicate row:\n%s",
8652  table->s->table_name.str,
8653  read_part_id,
8654  result,
8655  correct_part_id,
8656  str.c_ptr_safe());
8657  break;
8658  }
8659  }
8660  }
8661  }
8662 
8663  int tmp_result= m_file[read_part_id]->ha_rnd_end();
8664  DBUG_RETURN(result ? result : tmp_result);
8665 }
8666 
8667 
8668 #define KEY_PARTITIONING_CHANGED_STR \
8669  "KEY () partitioning changed, please run:\n" \
8670  "ALTER TABLE %s.%s ALGORITHM = INPLACE %s"
8671 
8673 {
8674  int error= HA_ADMIN_NEEDS_CHECK;
8675  DBUG_ENTER("ha_partition::check_for_upgrade");
8676 
8677  /*
8678  This is called even without FOR UPGRADE,
8679  if the .frm version is lower than the current version.
8680  In that case return that it needs checking!
8681  */
8682  if (!(check_opt->sql_flags & TT_FOR_UPGRADE))
8683  DBUG_RETURN(error);
8684 
8685  /*
8686  Partitions will be checked for during their ha_check!
8687 
8688  Check if KEY (sub)partitioning was used and any field's hash calculation
8689  differs from 5.1, see bug#14521864.
8690  */
8691  if (table->s->mysql_version < 50503 && // 5.1 table (<5.5.3)
8692  ((m_part_info->part_type == HASH_PARTITION && // KEY partitioned
8693  m_part_info->list_of_part_fields) ||
8694  (m_is_sub_partitioned && // KEY subpartitioned
8695  m_part_info->list_of_subpart_fields)))
8696  {
8697  Field **field;
8698  if (m_is_sub_partitioned)
8699  {
8700  field= m_part_info->subpart_field_array;
8701  }
8702  else
8703  {
8704  field= m_part_info->part_field_array;
8705  }
8706  for (; *field; field++)
8707  {
8708  switch ((*field)->real_type()) {
8709  case MYSQL_TYPE_TINY:
8710  case MYSQL_TYPE_SHORT:
8711  case MYSQL_TYPE_LONG:
8712  case MYSQL_TYPE_FLOAT:
8713  case MYSQL_TYPE_DOUBLE:
8714  case MYSQL_TYPE_NEWDECIMAL:
8715  case MYSQL_TYPE_TIMESTAMP:
8716  case MYSQL_TYPE_LONGLONG:
8717  case MYSQL_TYPE_INT24:
8718  case MYSQL_TYPE_TIME:
8719  case MYSQL_TYPE_DATETIME:
8720  case MYSQL_TYPE_YEAR:
8721  case MYSQL_TYPE_NEWDATE:
8722  case MYSQL_TYPE_ENUM:
8723  case MYSQL_TYPE_SET:
8724  {
8725  THD *thd= ha_thd();
8726  char *part_buf;
8727  String db_name, table_name;
8728  uint part_buf_len;
8729  bool skip_generation= false;
8730  partition_info::enum_key_algorithm old_algorithm;
8731  old_algorithm= m_part_info->key_algorithm;
8732  error= HA_ADMIN_FAILED;
8733  append_identifier(ha_thd(), &db_name, table_share->db.str,
8734  table_share->db.length);
8735  append_identifier(ha_thd(), &table_name, table_share->table_name.str,
8736  table_share->table_name.length);
8737  if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE)
8738  {
8739  /*
8740  Only possible when someone tampered with .frm files,
8741  like during tests :)
8742  */
8743  skip_generation= true;
8744  }
8745  m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51;
8746  if (skip_generation ||
8747  !(part_buf= generate_partition_syntax(m_part_info,
8748  &part_buf_len,
8749  true,
8750  true,
8751  NULL,
8752  NULL,
8753  NULL)) ||
8754  print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, "error",
8755  table_share->db.str,
8756  table->alias,
8757  opt_op_name[CHECK_PARTS],
8758  KEY_PARTITIONING_CHANGED_STR,
8759  db_name.c_ptr_safe(),
8760  table_name.c_ptr_safe(),
8761  part_buf))
8762  {
8763  /* Error creating admin message (too long string?). */
8764  print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
8765  table_share->db.str, table->alias,
8766  opt_op_name[CHECK_PARTS],
8767  KEY_PARTITIONING_CHANGED_STR,
8768  db_name.c_ptr_safe(), table_name.c_ptr_safe(),
8769  "<old partition clause>, but add ALGORITHM = 1"
8770  " between 'KEY' and '(' to change the metadata"
8771  " without the need of a full table rebuild.");
8772  }
8773  m_part_info->key_algorithm= old_algorithm;
8774  DBUG_RETURN(error);
8775  }
8776  default:
8777  /* Not affected! */
8778  ;
8779  }
8780  }
8781  }
8782 
8783  DBUG_RETURN(error);
8784 }
8785 
8786 
8787 struct st_mysql_storage_engine partition_storage_engine=
8788 { MYSQL_HANDLERTON_INTERFACE_VERSION };
8789 
8790 mysql_declare_plugin(partition)
8791 {
8792  MYSQL_STORAGE_ENGINE_PLUGIN,
8793  &partition_storage_engine,
8794  "partition",
8795  "Mikael Ronstrom, MySQL AB",
8796  "Partition Storage Engine Helper",
8797  PLUGIN_LICENSE_GPL,
8798  partition_initialize, /* Plugin Init */
8799  NULL, /* Plugin Deinit */
8800  0x0100, /* 1.0 */
8801  NULL, /* status variables */
8802  NULL, /* system variables */
8803  NULL, /* config options */
8804  0, /* flags */
8805 }
8806 mysql_declare_plugin_end;
8807 
8808 #endif