MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sql_join_buffer.cc
Go to the documentation of this file.
1 /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
26 #include "sql_priv.h"
27 #include "sql_select.h"
28 #include "key.h"
29 #include "sql_optimizer.h" // JOIN
30 #include "sql_join_buffer.h"
31 #include "sql_tmp_table.h" // instantiate_tmp_table()
32 
33 #include <algorithm>
34 using std::max;
35 using std::min;
36 
37 
38 /*****************************************************************************
39  * Join cache module
40 ******************************************************************************/
41 
42 /*
43  Fill in the descriptor of a flag field associated with a join cache
44 
45  SYNOPSIS
46  add_field_flag_to_join_cache()
47  str position in a record buffer to copy the field from/to
48  length length of the field
49  field IN/OUT pointer to the field descriptor to fill in
50 
51  DESCRIPTION
52  The function fill in the descriptor of a cache flag field to which
53  the parameter 'field' points to. The function uses the first two
54  parameters to set the position in the record buffer from/to which
55  the field value is to be copied and the length of the copied fragment.
56  Before returning the result the function increments the value of
57  *field by 1.
58  The function ignores the fields 'blob_length' and 'ofset' of the
59  descriptor.
60 
61  RETURN
62  the length of the field
63 */
64 
65 static
66 uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
67 {
68  CACHE_FIELD *copy= *field;
69  copy->str= str;
70  copy->length= length;
71  copy->type= 0;
72  copy->field= 0;
73  copy->referenced_field_no= 0;
74  copy->next_copy_rowid= NULL;
75  (*field)++;
76  return length;
77 }
78 
79 
80 /*
81  Fill in the descriptors of table data fields associated with a join cache
82 
83  SYNOPSIS
84  add_table_data_fields_to_join_cache()
85  tab descriptors of fields from this table are to be filled
86  field_set descriptors for only these fields are to be created
87  field_cnt IN/OUT counter of data fields
88  descr IN/OUT pointer to the first descriptor to be filled
89  field_ptr_cnt IN/OUT counter of pointers to the data fields
90  descr_ptr IN/OUT pointer to the first pointer to blob descriptors
91 
92  DESCRIPTION
93  The function fills in the descriptors of cache data fields from the table
94  'tab'. The descriptors are filled only for the fields marked in the
95  bitmap 'field_set'.
96  The function fills the descriptors starting from the position pointed
97  by 'descr'. If an added field is of a BLOB type then a pointer to the
98  its descriptor is added to the array descr_ptr.
99  At the return 'descr' points to the position after the last added
100  descriptor while 'descr_ptr' points to the position right after the
101  last added pointer.
102 
103  RETURN
104  the total length of the added fields
105 */
106 
107 static
108 uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
109  MY_BITMAP *field_set,
110  uint *field_cnt,
111  CACHE_FIELD **descr,
112  uint *field_ptr_cnt,
113  CACHE_FIELD ***descr_ptr)
114 {
115  Field **fld_ptr;
116  uint len= 0;
117  CACHE_FIELD *copy= *descr;
118  CACHE_FIELD **copy_ptr= *descr_ptr;
119  uint used_fields= bitmap_bits_set(field_set);
120  for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
121  {
122  if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
123  {
124  len+= (*fld_ptr)->fill_cache_field(copy);
125  if (copy->type == CACHE_BLOB)
126  {
127  *copy_ptr= copy;
128  copy_ptr++;
129  (*field_ptr_cnt)++;
130  }
131  copy->field= *fld_ptr;
132  copy->referenced_field_no= 0;
133  copy->next_copy_rowid= NULL;
134  copy++;
135  (*field_cnt)++;
136  used_fields--;
137  }
138  }
139  *descr= copy;
140  *descr_ptr= copy_ptr;
141  return len;
142 }
143 
144 
145 /*
146  Determine various counters of fields associated with a record in the cache
147 
148  SYNOPSIS
149  calc_record_fields()
150 
151  DESCRIPTION
152  The function counts the number of total fields stored in a record
153  of the cache and saves this number in the 'fields' member. It also
154  determines the number of flag fields and the number of blobs.
155  The function sets 'with_match_flag' on if 'join_tab' needs a match flag
156  i.e. if it is the first inner table of an outer join, or of a semi-join
157  with FirstMatch strategy.
158 
159  RETURN
160  none
161 */
162 
163 void JOIN_CACHE::calc_record_fields()
164 {
165  /*
166  If there is a previous cache, start with the corresponding table, otherwise:
167  - if in a regular execution, start with the first non-const table.
168  - if in a materialized subquery, start with the first table of the subquery.
169  */
170  JOIN_TAB *tab = prev_cache ?
171  prev_cache->join_tab :
172  sj_is_materialize_strategy(join_tab->get_sj_strategy()) ?
174  join->join_tab+join->const_tables;
175  tables= join_tab-tab;
176 
177  fields= 0;
178  blobs= 0;
179  flag_fields= 0;
180  data_field_count= 0;
181  data_field_ptr_count= 0;
182  referenced_fields= 0;
183 
184  for ( ; tab < join_tab ; tab++)
185  {
186  calc_used_field_length(join->thd, tab);
187  flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields);
188  flag_fields+= test(tab->table->maybe_null);
189  fields+= tab->used_fields;
190  blobs+= tab->used_blobs;
191 
192  fields+= tab->check_rowid_field();
193  }
194  if ((with_match_flag= (join_tab->is_first_inner_for_outer_join() ||
195  (join_tab->first_sj_inner_tab == join_tab &&
196  join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH))))
197  flag_fields++;
198  fields+= flag_fields;
199 }
200 
201 /*
202  Allocate memory for descriptors and pointers to them associated with the cache
203 
204  SYNOPSIS
205  alloc_fields()
206 
207  DESCRIPTION
208  The function allocates memory for the array of fields descriptors
209  and the array of pointers to the field descriptors used to copy
210  join record data from record buffers into the join buffer and
211  backward. Some pointers refer to the field descriptor associated
212  with previous caches. They are placed at the beginning of the
213  array of pointers and its total number is specified by the parameter
214  'external fields'.
215  The pointer of the first array is assigned to field_descr and the
216  number of elements is precalculated by the function calc_record_fields.
217  The allocated arrays are adjacent.
218 
219  NOTES
220  The memory is allocated in join->thd->memroot
221 
222  RETURN
223  pointer to the first array
224 */
225 
226 int JOIN_CACHE::alloc_fields(uint external_fields)
227 {
228  uint ptr_cnt= external_fields+blobs+1;
229  uint fields_size= sizeof(CACHE_FIELD)*fields;
230  field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
231  sizeof(CACHE_FIELD*)*ptr_cnt);
232  blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
233  return (field_descr == NULL);
234 }
235 
236 /*
237  Create descriptors of the record flag fields stored in the join buffer
238 
239  SYNOPSIS
240  create_flag_fields()
241 
242  DESCRIPTION
243  The function creates descriptors of the record flag fields stored
244  in the join buffer. These are descriptors for:
245  - an optional match flag field,
246  - table null bitmap fields,
247  - table null row fields.
248  The match flag field is created when 'join_tab' is the first inner
249  table of an outer join our a semi-join. A null bitmap field is
250  created for any table whose fields are to be stored in the join
251  buffer if at least one of these fields is nullable or is a BIT field
252  whose bits are partially stored with null bits. A null row flag
253  is created for any table assigned to the cache if it is an inner
254  table of an outer join.
255  The descriptor for flag fields are placed one after another at the
256  beginning of the array of field descriptors 'field_descr' that
257  contains 'fields' elements. If there is a match flag field the
258  descriptor for it is always first in the sequence of flag fields.
259  The descriptors for other flag fields can follow in an arbitrary
260  order.
261  The flag field values follow in a record stored in the join buffer
262  in the same order as field descriptors, with the match flag always
263  following first.
264  The function sets the value of 'flag_fields' to the total number
265  of the descriptors created for the flag fields.
266  The function sets the value of 'length' to the total length of the
267  flag fields.
268 
269  RETURN
270  none
271 */
272 
273 void JOIN_CACHE::create_flag_fields()
274 {
275  CACHE_FIELD *copy;
276  JOIN_TAB *tab;
277 
278  copy= field_descr;
279 
280  length=0;
281 
282  /* If there is a match flag the first field is always used for this flag */
283  if (with_match_flag)
284  length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
285  sizeof(join_tab->found),
286  &copy);
287 
288  /* Create fields for all null bitmaps and null row flags that are needed */
289  for (tab= join_tab-tables; tab < join_tab; tab++)
290  {
291  TABLE *table= tab->table;
292 
293  /* Create a field for the null bitmap from table if needed */
294  if (tab->used_null_fields || tab->used_uneven_bit_fields)
295  length+= add_flag_field_to_join_cache(table->null_flags,
296  table->s->null_bytes,
297  &copy);
298 
299  /* Create table for the null row flag if needed */
300  if (table->maybe_null)
301  length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
302  sizeof(table->null_row),
303  &copy);
304  }
305 
306  /* Theoretically the new value of flag_fields can be less than the old one */
307  flag_fields= copy-field_descr;
308 }
309 
310 
311 /*
312  Create descriptors of all remaining data fields stored in the join buffer
313 
314  SYNOPSIS
315  create_remaining_fields()
316  all_read_fields indicates that descriptors for all read data fields
317  are to be created
318 
319  DESCRIPTION
320  The function creates descriptors for all remaining data fields of a
321  record from the join buffer. If the parameter 'all_read_fields' is
322  true the function creates fields for all read record fields that
323  comprise the partial join record joined with join_tab. Otherwise,
324  for each table tab, the set of the read fields for which the descriptors
325  have to be added is determined as the difference between all read fields
326  and and those for which the descriptors have been already created.
327  The latter are supposed to be marked in the bitmap tab->table->tmp_set.
328  The function increases the value of 'length' to the the total length of
329  the added fields.
330 
331  NOTES
332  If 'all_read_fields' is false the function modifies the value of
333  tab->table->tmp_set for a each table whose fields are stored in the cache.
334  The function calls the method Field::fill_cache_field to figure out
335  the type of the cache field and the maximal length of its representation
336  in the join buffer. If this is a blob field then additionally a pointer
337  to this field is added as an element of the array blob_ptr. For a blob
338  field only the size of the length of the blob data is taken into account.
339  It is assumed that 'data_field_count' contains the number of descriptors
340  for data fields that have been already created and 'data_field_ptr_count'
341  contains the number of the pointers to such descriptors having been
342  stored up to the moment.
343 
344  RETURN
345  none
346 */
347 
348 void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
349 {
350  JOIN_TAB *tab;
351  CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
352  CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
353 
354  for (tab= join_tab-tables; tab < join_tab; tab++)
355  {
356  MY_BITMAP *rem_field_set;
357  TABLE *table= tab->table;
358 
359  if (all_read_fields)
360  rem_field_set= table->read_set;
361  else
362  {
363  bitmap_invert(&table->tmp_set);
364  bitmap_intersect(&table->tmp_set, table->read_set);
365  rem_field_set= &table->tmp_set;
366  }
367 
368  length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
369  &data_field_count, &copy,
370  &data_field_ptr_count,
371  &copy_ptr);
372 
373  /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
374  if (tab->keep_current_rowid)
375  {
376  copy->str= table->file->ref;
377  copy->length= table->file->ref_length;
378  copy->type= 0;
379  copy->field= 0;
380  copy->referenced_field_no= 0;
381  copy->next_copy_rowid= NULL;
382  // Chain rowid copy objects belonging to same join_tab
383  if (tab->copy_current_rowid != NULL)
384  copy->next_copy_rowid= tab->copy_current_rowid;
385  tab->copy_current_rowid= copy;
386  length+= copy->length;
387  data_field_count++;
388  copy++;
389  }
390  }
391 }
392 
393 
394 /*
395  Calculate and set all cache constants
396 
397  SYNOPSIS
398  set_constants()
399 
400  DESCRIPTION
401  The function calculates and set all precomputed constants that are used
402  when writing records into the join buffer and reading them from it.
403  It calculates the size of offsets of a record within the join buffer
404  and of a field within a record. It also calculates the number of bytes
405  used to store record lengths.
406  The function also calculates the maximal length of the representation
407  of record in the cache excluding blob_data. This value is used when
408  making a dicision whether more records should be added into the join
409  buffer or not.
410 
411  RETURN
412  none
413 */
414 
415 void JOIN_CACHE::set_constants()
416 {
417  /*
418  Any record from a BKA cache is prepended with the record length.
419  We use the record length when reading the buffer and building key values
420  for each record. The length allows us not to read the fields that are
421  not needed for keys.
422  If a record has match flag it also may be skipped when the match flag
423  is on. It happens if the cache is used for a semi-join operation or
424  for outer join when the 'not exist' optimization can be applied.
425  If some of the fields are referenced from other caches then
426  the record length allows us to easily reach the saved offsets for
427  these fields since the offsets are stored at the very end of the record.
428  However at this moment we don't know whether we have referenced fields for
429  the cache or not. Later when a referenced field is registered for the cache
430  we adjust the value of the flag 'with_length'.
431  */
432  with_length= is_key_access() || with_match_flag;
433  /*
434  At this moment we don't know yet the value of 'referenced_fields',
435  but in any case it can't be greater than the value of 'fields'.
436  */
437  uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
438  (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
439  sizeof(ulong) + aux_buffer_min_size();
440  buff_size= max<size_t>(join->thd->variables.join_buff_size, 2*len);
441  size_of_rec_ofs= offset_size(buff_size);
442  size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
443  size_of_fld_ofs= size_of_rec_len;
444  /*
445  The size of the offsets for referenced fields will be added later.
446  The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
447  every time when the first reference to the referenced field is registered.
448  */
449  pack_length= (with_length ? size_of_rec_len : 0) +
450  (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
451  length;
452  pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
453 
455 }
456 
457 
467 {
468  buff= (uchar*) my_malloc(buff_size, MYF(0));
469  return buff == NULL;
470 }
471 
472 
473 /*
474  Initialize a BNL cache
475 
476  SYNOPSIS
477  init()
478 
479  DESCRIPTION
480  The function initializes the cache structure. It supposed to be called
481  right after a constructor for the JOIN_CACHE_BNL.
482  The function allocates memory for the join buffer and for descriptors of
483  the record fields stored in the buffer.
484 
485  NOTES
486  The code of this function should have been included into the constructor
487  code itself. However the new operator for the class JOIN_CACHE_BNL would
488  never fail while memory allocation for the join buffer is not absolutely
489  unlikely to fail. That's why this memory allocation has to be placed in a
490  separate function that is called in a couple with a cache constructor.
491  It is quite natural to put almost all other constructor actions into
492  this function.
493 
494  RETURN
495  0 initialization with buffer allocations has been succeeded
496  1 otherwise
497 */
498 
500 {
501  DBUG_ENTER("JOIN_CACHE::init");
502 
503  calc_record_fields();
504 
505  if (alloc_fields(0))
506  DBUG_RETURN(1);
507 
508  create_flag_fields();
509 
510  create_remaining_fields(TRUE);
511 
512  set_constants();
513 
514  if (alloc_buffer())
515  DBUG_RETURN(1);
516 
517  reset_cache(true);
518 
519  DBUG_RETURN(0);
520 }
521 
522 
523 /*
524  Initialize a BKA cache
525 
526  SYNOPSIS
527  init()
528 
529  DESCRIPTION
530  The function initializes the cache structure. It supposed to be called
531  right after a constructor for the JOIN_CACHE_BKA.
532  The function allocates memory for the join buffer and for descriptors of
533  the record fields stored in the buffer.
534 
535  NOTES
536  The code of this function should have been included into the constructor
537  code itself. However the new operator for the class JOIN_CACHE_BKA would
538  never fail while memory allocation for the join buffer is not absolutely
539  unlikely to fail. That's why this memory allocation has to be placed in a
540  separate function that is called in a couple with a cache constructor.
541  It is quite natural to put almost all other constructor actions into
542  this function.
543 
544  RETURN
545  0 initialization with buffer allocations has been succeeded
546  1 otherwise
547 */
548 
550 {
551  JOIN_TAB *tab;
552  JOIN_CACHE *cache;
553  local_key_arg_fields= 0;
554  external_key_arg_fields= 0;
555  DBUG_ENTER("JOIN_CACHE_BKA::init");
556 
557  calc_record_fields();
558 
559  /* Mark all fields that can be used as arguments for this key access */
560  TABLE_REF *ref= &join_tab->ref;
561  cache= this;
562  do
563  {
564  /*
565  Traverse the ref expressions and find the occurrences of fields in them for
566  each table 'tab' whose fields are to be stored in the 'cache' join buffer.
567  Mark these fields in the bitmap tab->table->tmp_set.
568  For these fields count the number of them stored in this cache and the
569  total number of them stored in the previous caches. Save the result
570  of the counting 'in local_key_arg_fields' and 'external_key_arg_fields'
571  respectively.
572  */
573  for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
574  {
575  uint key_args;
576  bitmap_clear_all(&tab->table->tmp_set);
577  for (uint i= 0; i < ref->key_parts; i++)
578  {
579  Item *ref_item= ref->items[i];
580  if (!(tab->table->map & ref_item->used_tables()))
581  continue;
582  ref_item->walk(&Item::add_field_to_set_processor, 1,
583  (uchar *) tab->table);
584  }
585  if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
586  {
587  if (cache == this)
588  local_key_arg_fields+= key_args;
589  else
590  external_key_arg_fields+= key_args;
591  }
592  }
593  cache= cache->prev_cache;
594  }
595  while (cache);
596 
597  if (alloc_fields(external_key_arg_fields))
598  DBUG_RETURN(1);
599 
600  create_flag_fields();
601 
602  /*
603  Save pointers to the cache fields in previous caches
604  that are used to build keys for this key access.
605  */
606  cache= this;
607  uint ext_key_arg_cnt= external_key_arg_fields;
608  CACHE_FIELD *copy;
609  CACHE_FIELD **copy_ptr= blob_ptr;
610  while (ext_key_arg_cnt)
611  {
612  cache= cache->prev_cache;
613  for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
614  {
615  CACHE_FIELD *copy_end;
616  MY_BITMAP *key_read_set= &tab->table->tmp_set;
617  /* key_read_set contains the bitmap of tab's fields referenced by ref */
618  if (bitmap_is_clear_all(key_read_set))
619  continue;
620  copy_end= cache->field_descr+cache->fields;
621  for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
622  {
623  /*
624  (1) - when we store rowids for DuplicateWeedout, they have
625  copy->field==NULL
626  */
627  if (copy->field && // (1)
628  copy->field->table == tab->table &&
629  bitmap_is_set(key_read_set, copy->field->field_index))
630  {
631  *copy_ptr++= copy;
632  ext_key_arg_cnt--;
633  if (!copy->referenced_field_no)
634  {
635  /*
636  Register the referenced field 'copy':
637  - set the offset number in copy->referenced_field_no,
638  - adjust the value of the flag 'with_length',
639  - adjust the values of 'pack_length' and
640  of 'pack_length_with_blob_ptrs'.
641  */
642  copy->referenced_field_no= ++cache->referenced_fields;
643  cache->with_length= TRUE;
644  cache->pack_length+= cache->get_size_of_fld_offset();
645  cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
646  }
647  }
648  }
649  }
650  }
651  /* After this 'blob_ptr' shall not be be changed */
652  blob_ptr= copy_ptr;
653 
654  /* Now create local fields that are used to build ref for this key access */
655  copy= field_descr+flag_fields;
656  for (tab= join_tab-tables; tab < join_tab ; tab++)
657  {
658  length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
659  &data_field_count, &copy,
660  &data_field_ptr_count,
661  &copy_ptr);
662  }
663 
664  use_emb_key= check_emb_key_usage();
665 
666  create_remaining_fields(FALSE);
667 
668  set_constants();
669 
670  if (alloc_buffer())
671  DBUG_RETURN(1);
672 
673  reset_cache(true);
674 
675  DBUG_RETURN(0);
676 }
677 
678 
679 /*
680  Check the possibility to read the access keys directly from the join buffer
681 
682  SYNOPSIS
683  check_emb_key_usage()
684 
685  DESCRIPTION
686  The function checks some conditions at which the key values can be read
687  directly from the join buffer. This is possible when the key values can be
688  composed by concatenation of the record fields stored in the join buffer.
689  Sometimes when the access key is multi-component the function has to re-order
690  the fields written into the join buffer to make keys embedded. If key
691  values for the key access are detected as embedded then 'use_emb_key'
692  is set to TRUE.
693 
694  EXAMPLE
695  Let table t2 has an index defined on the columns a,b . Let's assume also
696  that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
697  of the integer type. Then if the query
698  SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b
699  is executed with a join cache in such a way that t1 is the driving
700  table then the key values to access table t2 can be read directly
701  from the join buffer.
702 
703  NOTES
704  In some cases key values could be read directly from the join buffer but
705  we still do not consider them embedded. In the future we'll expand the
706  the class of keys which we identify as embedded.
707 
708  RETURN
709  TRUE - key values will be considered as embedded,
710  FALSE - otherwise.
711 */
712 
713 bool JOIN_CACHE_BKA::check_emb_key_usage()
714 {
715  uint i;
716  Item *item;
717  KEY_PART_INFO *key_part;
718  CACHE_FIELD *copy;
719  CACHE_FIELD *copy_end;
720  uint len= 0;
721  TABLE *table= join_tab->table;
722  TABLE_REF *ref= &join_tab->ref;
723  KEY *keyinfo= table->key_info+ref->key;
724 
725  /*
726  If some of the key arguments are not from the local cache the key
727  is not considered as embedded.
728  TODO:
729  Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
730  */
731  if (external_key_arg_fields != 0)
732  return FALSE;
733  /*
734  If the number of the local key arguments is not equal to the number
735  of key parts the key value cannot be read directly from the join buffer.
736  */
737  if (local_key_arg_fields != ref->key_parts)
738  return FALSE;
739 
740  /*
741  A key is not considered embedded if one of the following is true:
742  - one of its key parts is not equal to a field
743  - it is a partial key
744  - definition of the argument field does not coincide with the
745  definition of the corresponding key component
746  - some of the key components are nullable
747  */
748  for (i=0; i < ref->key_parts; i++)
749  {
750  item= ref->items[i]->real_item();
751  if (item->type() != Item::FIELD_ITEM)
752  return FALSE;
753  key_part= keyinfo->key_part+i;
754  if (key_part->key_part_flag & HA_PART_KEY_SEG)
755  return FALSE;
756  if (!key_part->field->eq_def(((Item_field *) item)->field))
757  return FALSE;
758  if (key_part->field->maybe_null())
759  {
760  return FALSE;
761  /*
762  If this is changed so that embedded keys may contain nullable
763  components, get_next_key() and put_record() will have to test
764  ref->null_rejecting in the "embedded keys" case too.
765  */
766  }
767  }
768 
769  copy= field_descr+flag_fields;
770  copy_end= copy+local_key_arg_fields;
771  for ( ; copy < copy_end; copy++)
772  {
773  /*
774  If some of the key arguments are of variable length the key
775  is not considered as embedded.
776  */
777  if (copy->type != 0)
778  return FALSE;
779  /*
780  If some of the key arguments are bit fields whose bits are partially
781  stored with null bits the key is not considered as embedded.
782  */
783  if (copy->field->type() == MYSQL_TYPE_BIT &&
784  ((Field_bit*) (copy->field))->bit_len)
785  return FALSE;
786  len+= copy->length;
787  }
788 
789  emb_key_length= len;
790 
791  /*
792  Make sure that key fields follow the order of the corresponding
793  key components these fields are equal to. For this the descriptors
794  of the fields that comprise the key might be re-ordered.
795  */
796  for (i= 0; i < ref->key_parts; i++)
797  {
798  uint j;
799  Item *item= ref->items[i]->real_item();
800  Field *fld= ((Item_field *) item)->field;
801  CACHE_FIELD *init_copy= field_descr+flag_fields+i;
802  for (j= i, copy= init_copy; i < local_key_arg_fields; i++, copy++)
803  {
804  if (fld->eq(copy->field))
805  {
806  if (j != i)
807  {
808  CACHE_FIELD key_part_copy= *copy;
809  *copy= *init_copy;
810  *init_copy= key_part_copy;
811  }
812  break;
813  }
814  }
815  }
816 
817  return TRUE;
818 }
819 
820 
821 /*
822  Calculate the increment of the MRR buffer for a record write
823 
824  SYNOPSIS
825  aux_buffer_incr()
826 
827  DESCRIPTION
828  This implementation of the virtual function aux_buffer_incr determines
829  for how much the size of the MRR buffer should be increased when another
830  record is added to the cache.
831 
832  RETURN
833  the increment of the size of the MRR buffer for the next record
834 */
835 
837 {
838  uint incr= 0;
839  TABLE_REF *ref= &join_tab->ref;
840  TABLE *tab= join_tab->table;
841  uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
842  set_if_bigger(rec_per_key, 1);
843  if (records == 1)
844  incr= ref->key_length + tab->file->ref_length;
845  /*
846  When adding a new record to the join buffer this can match
847  multiple keys in this table. We use rec_per_key as estimate for
848  the number of records that will match and reserve space in the
849  DS-MRR sort buffer for this many record references.
850  */
851  incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
852  return incr;
853 }
854 
855 
863 {
864  /*
865  For DS-MRR to work, the sort buffer must have space to store the
866  reference (or primary key) for at least one record.
867  */
868  return join_tab->table->file->stats.mrr_length_per_rec;
869 }
870 
871 
872 /*
873  Check if the record combination matches the index condition
874 
875  SYNOPSIS
876  JOIN_CACHE_BKA::skip_index_tuple()
877  rseq Value returned by bka_range_seq_init()
878  range_info MRR range association data
879 
880  DESCRIPTION
881  This function is invoked from MRR implementation to check if an index
882  tuple matches the index condition. It is used in the case where the index
883  condition actually depends on both columns of the used index and columns
884  from previous tables.
885 
886  Accessing columns of the previous tables requires special handling with
887  BKA. The idea of BKA is to collect record combinations in a buffer and
888  then do a batch of ref access lookups, i.e. by the time we're doing a
889  lookup its previous-records-combination is not in prev_table->record[0]
890  but somewhere in the join buffer.
891 
892  We need to get it from there back into prev_table(s)->record[0] before we
893  can evaluate the index condition, and that's why we need this function
894  instead of regular IndexConditionPushdown.
895 
896  NOTE
897  Possible optimization:
898  Before we unpack the record from a previous table
899  check if this table is used in the condition.
900  If so then unpack the record otherwise skip the unpacking.
901  This should be done by a special virtual method
902  get_partial_record_by_pos().
903 
904  RETURN
905  0 The record combination satisfies the index condition
906  1 Otherwise
907 */
908 
909 bool JOIN_CACHE_BKA::skip_index_tuple(range_seq_t rseq, char *range_info)
910 {
911  DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
912  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
913  cache->get_record_by_pos((uchar*)range_info);
914  DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
915 }
916 
917 
918 /*
919  Check if the record combination matches the index condition
920 
921  SYNOPSIS
922  bka_skip_index_tuple()
923  rseq Value returned by bka_range_seq_init()
924  range_info MRR range association data
925 
926  DESCRIPTION
927  This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
928  see comments there.
929 
930  NOTE
931  This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
932 
933  RETURN
934  0 The record combination satisfies the index condition
935  1 Otherwise
936 */
937 
938 static
939 bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
940 {
941  DBUG_ENTER("bka_skip_index_tuple");
942  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
943  DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
944 }
945 
946 
947 /*
948  Write record fields and their required offsets into the join cache buffer
949 
950  SYNOPSIS
951  write_record_data()
952  link a reference to the associated info in the previous cache
953  is_full OUT true if it has been decided that no more records will be
954  added to the join buffer
955 
956  DESCRIPTION
957  This function put into the cache buffer the following info that it reads
958  from the join record buffers or computes somehow:
959  (1) the length of all fields written for the record (optional)
960  (2) an offset to the associated info in the previous cache (if there is any)
961  determined by the link parameter
962  (3) all flag fields of the tables whose data field are put into the cache:
963  - match flag (optional),
964  - null bitmaps for all tables,
965  - null row flags for all tables
966  (4) values of all data fields including
967  - full images of those fixed legth data fields that cannot have
968  trailing spaces
969  - significant part of fixed length fields that can have trailing spaces
970  with the prepanded length
971  - data of non-blob variable length fields with the prepanded data length
972  - blob data from blob fields with the prepanded data length
973  (5) record offset values for the data fields that are referred to from
974  other caches
975 
976  The record is written at the current position stored in the field 'pos'.
977  At the end of the function 'pos' points at the position right after the
978  written record data.
979  The function increments the number of records in the cache that is stored
980  in the 'records' field by 1. The function also modifies the values of
981  'curr_rec_pos' and 'last_rec_pos' to point to the written record.
982  The 'end_pos' cursor is modified accordingly.
983  The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
984  remains in the record buffers and not copied to the join buffer. It may
985  happen only to the blob data from the last record added into the cache.
986 
987 
988  RETURN
989  length of the written record data
990 */
991 
992 uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
993 {
994  uint len;
995  bool last_record;
996  CACHE_FIELD *copy;
997  CACHE_FIELD *copy_end;
998  uchar *cp= pos;
999  uchar *init_pos= cp;
1000  uchar *rec_len_ptr= 0;
1001 
1002  records++; /* Increment the counter of records in the cache */
1003 
1004  len= pack_length;
1005 
1006  /* Make an adjustment for the size of the auxiliary buffer if there is any */
1007  uint incr= aux_buffer_incr();
1008  ulong rem= rem_space();
1009  aux_buff_size+= len+incr < rem ? incr : rem;
1010 
1011  /*
1012  For each blob to be put into cache save its length and a pointer
1013  to the value in the corresponding element of the blob_ptr array.
1014  Blobs with null values are skipped.
1015  Increment 'len' by the total length of all these blobs.
1016  */
1017  if (blobs)
1018  {
1019  CACHE_FIELD **copy_ptr= blob_ptr;
1020  CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
1021  for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
1022  {
1023  Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
1024  if (!blob_field->is_null())
1025  {
1026  uint blob_len= blob_field->get_length();
1027  (*copy_ptr)->blob_length= blob_len;
1028  len+= blob_len;
1029  blob_field->get_ptr(&(*copy_ptr)->str);
1030  }
1031  }
1032  }
1033 
1034  /*
1035  Check whether we won't be able to add any new record into the cache after
1036  this one because the cache will be full. Set last_record to TRUE if it's so.
1037  The assume that the cache will be full after the record has been written
1038  into it if either the remaining space of the cache is not big enough for the
1039  record's blob values or if there is a chance that not all non-blob fields
1040  of the next record can be placed there.
1041  This function is called only in the case when there is enough space left in
1042  the cache to store at least non-blob parts of the current record.
1043  */
1044  last_record= (len+pack_length_with_blob_ptrs) > rem_space();
1045 
1046  /*
1047  Save the position for the length of the record in the cache if it's needed.
1048  The length of the record will be inserted here when all fields of the record
1049  are put into the cache.
1050  */
1051  if (with_length)
1052  {
1053  rec_len_ptr= cp;
1054  cp+= size_of_rec_len;
1055  }
1056 
1057  /*
1058  Put a reference to the fields of the record that are stored in the previous
1059  cache if there is any. This reference is passed by the 'link' parameter.
1060  */
1061  if (prev_cache)
1062  {
1063  cp+= prev_cache->get_size_of_rec_offset();
1064  prev_cache->store_rec_ref(cp, link);
1065  }
1066 
1067  curr_rec_pos= cp;
1068 
1069  /* If the there is a match flag set its value to 0 */
1070  copy= field_descr;
1071  if (with_match_flag)
1072  *copy[0].str= 0;
1073 
1074  /* First put into the cache the values of all flag fields */
1075  copy_end= field_descr+flag_fields;
1076  for ( ; copy < copy_end; copy++)
1077  {
1078  memcpy(cp, copy->str, copy->length);
1079  cp+= copy->length;
1080  }
1081 
1082  /* Now put the values of the remaining fields as soon as they are not nulls */
1083  copy_end= field_descr+fields;
1084  for ( ; copy < copy_end; copy++)
1085  {
1086  Field *field= copy->field;
1087  if (field && field->maybe_null() && field->is_null())
1088  {
1089  /* Do not copy a field if its value is null */
1090  if (copy->referenced_field_no)
1091  copy->offset= 0;
1092  continue;
1093  }
1094  /* Save the offset of the field to put it later at the end of the record */
1095  if (copy->referenced_field_no)
1096  copy->offset= cp-curr_rec_pos;
1097 
1098  if (copy->type == CACHE_BLOB)
1099  {
1100  Field_blob *blob_field= (Field_blob *) copy->field;
1101  if (last_record)
1102  {
1103  last_rec_blob_data_is_in_rec_buff= 1;
1104  /* Put down the length of the blob and the pointer to the data */
1105  blob_field->get_image(cp, copy->length+sizeof(char*),
1106  blob_field->charset());
1107  cp+= copy->length+sizeof(char*);
1108  }
1109  else
1110  {
1111  /* First put down the length of the blob and then copy the data */
1112  blob_field->get_image(cp, copy->length,
1113  blob_field->charset());
1114  memcpy(cp+copy->length, copy->str, copy->blob_length);
1115  cp+= copy->length+copy->blob_length;
1116  }
1117  }
1118  else
1119  {
1120  switch (copy->type) {
1121  case CACHE_VARSTR1:
1122  /* Copy the significant part of the short varstring field */
1123  len= (uint) copy->str[0] + 1;
1124  memcpy(cp, copy->str, len);
1125  cp+= len;
1126  break;
1127  case CACHE_VARSTR2:
1128  /* Copy the significant part of the long varstring field */
1129  len= uint2korr(copy->str) + 2;
1130  memcpy(cp, copy->str, len);
1131  cp+= len;
1132  break;
1133  case CACHE_STRIPPED:
1134  {
1135  /*
1136  Put down the field value stripping all trailing spaces off.
1137  After this insert the length of the written sequence of bytes.
1138  */
1139  uchar *str, *end;
1140  for (str= copy->str, end= str+copy->length;
1141  end > str && end[-1] == ' ';
1142  end--) ;
1143  len=(uint) (end-str);
1144  int2store(cp, len);
1145  memcpy(cp+2, str, len);
1146  cp+= len+2;
1147  break;
1148  }
1149  default:
1150  /* Copy the entire image of the field from the record buffer */
1151  memcpy(cp, copy->str, copy->length);
1152  cp+= copy->length;
1153  }
1154  }
1155  }
1156 
1157  /* Add the offsets of the fields that are referenced from other caches */
1158  if (referenced_fields)
1159  {
1160  uint cnt= 0;
1161  for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
1162  {
1163  if (copy->referenced_field_no)
1164  {
1165  store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
1166  copy->offset);
1167  cnt++;
1168  }
1169  }
1170  cp+= size_of_fld_ofs*cnt;
1171  }
1172 
1173  if (rec_len_ptr)
1174  store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
1175  last_rec_pos= curr_rec_pos;
1176  end_pos= pos= cp;
1177  *is_full= last_record;
1178  return (uint) (cp-init_pos);
1179 }
1180 
1181 
1201 void JOIN_CACHE::reset_cache(bool for_writing)
1202 {
1203  pos= buff;
1204  curr_rec_link= 0;
1205  if (for_writing)
1206  {
1207  records= 0;
1208  last_rec_pos= buff;
1209  aux_buff_size= 0;
1210  end_pos= pos;
1211  last_rec_blob_data_is_in_rec_buff= 0;
1212  }
1213 }
1214 
1215 /*
1216  Add a record into the join buffer: the default implementation
1217 
1218  SYNOPSIS
1219  put_record_in_cache()
1220 
1221  DESCRIPTION
1222  This default implementation of the virtual function put_record writes
1223  the next matching record into the join buffer.
1224  It also links the record having been written into the join buffer with
1225  the matched record in the previous cache if there is any.
1226  The implementation assumes that the function get_curr_link()
1227  will return exactly the pointer to this matched record.
1228 
1229  RETURN
1230  TRUE if it has been decided that it should be the last record
1231  in the join buffer,
1232  FALSE otherwise
1233 */
1234 
1235 bool JOIN_CACHE::put_record_in_cache()
1236 {
1237  bool is_full;
1238  uchar *link= 0;
1239  if (prev_cache)
1240  link= prev_cache->get_curr_rec_link();
1241  write_record_data(link, &is_full);
1242  return (is_full);
1243 }
1244 
1245 
1246 /*
1247  Read the next record from the join buffer: the default implementation
1248 
1249  SYNOPSIS
1250  get_record()
1251 
1252  DESCRIPTION
1253  This default implementation of the virtual function get_record
1254  reads fields of the next record from the join buffer of this cache.
1255  The function also reads all other fields associated with this record
1256  from the the join buffers of the previous caches. The fields are read
1257  into the corresponding record buffers.
1258  It is supposed that 'pos' points to the position in the buffer
1259  right after the previous record when the function is called.
1260  When the function returns the 'pos' values is updated to point
1261  to the position after the read record.
1262  The value of 'curr_rec_pos' is also updated by the function to
1263  point to the beginning of the first field of the record in the
1264  join buffer.
1265 
1266  RETURN
1267  TRUE - there are no more records to read from the join buffer
1268  FALSE - otherwise
1269 */
1270 
1271 bool JOIN_CACHE::get_record()
1272 {
1273  bool res;
1274  uchar *prev_rec_ptr= 0;
1275  if (with_length)
1276  pos+= size_of_rec_len;
1277  if (prev_cache)
1278  {
1279  pos+= prev_cache->get_size_of_rec_offset();
1280  prev_rec_ptr= prev_cache->get_rec_ref(pos);
1281  }
1282  curr_rec_pos= pos;
1283  res= (read_some_record_fields() == -1);
1284  if (!res)
1285  { // There are more records to read
1286  pos+= referenced_fields*size_of_fld_ofs;
1287  if (prev_cache)
1288  {
1289  /*
1290  read_some_record_fields() didn't read fields stored in previous
1291  buffers, read them now:
1292  */
1293  prev_cache->get_record_by_pos(prev_rec_ptr);
1294  }
1295  }
1296  return res;
1297 }
1298 
1299 
1300 /*
1301  Read a positioned record from the join buffer: the default implementation
1302 
1303  SYNOPSIS
1304  get_record_by_pos()
1305  rec_ptr position of the first field of the record in the join buffer
1306 
1307  DESCRIPTION
1308  This default implementation of the virtual function get_record_pos
1309  reads the fields of the record positioned at 'rec_ptr' from the join buffer.
1310  The function also reads all other fields associated with this record
1311  from the the join buffers of the previous caches. The fields are read
1312  into the corresponding record buffers.
1313 
1314  RETURN
1315  none
1316 */
1317 
1318 void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
1319 {
1320  uchar *save_pos= pos;
1321  pos= rec_ptr;
1323  pos= save_pos;
1324  if (prev_cache)
1325  {
1326  uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1327  prev_cache->get_record_by_pos(prev_rec_ptr);
1328  }
1329 }
1330 
1331 
1332 /*
1333  Test the match flag from the referenced record: the default implementation
1334 
1335  SYNOPSIS
1336  get_match_flag_by_pos()
1337  rec_ptr position of the first field of the record in the join buffer
1338 
1339  DESCRIPTION
1340  This default implementation of the virtual function get_match_flag_by_pos
1341  test the match flag for the record pointed by the reference at the position
1342  rec_ptr. If the match flag in placed one of the previous buffers the function
1343  first reaches the linked record fields in this buffer.
1344 
1345  RETURN
1346  TRUE if the match flag is set on
1347  FALSE otherwise
1348 */
1349 
1350 bool JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
1351 {
1352  if (with_match_flag)
1353  return test(*rec_ptr);
1354  if (prev_cache)
1355  {
1356  uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1357  return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
1358  }
1359  DBUG_ASSERT(1);
1360  return FALSE;
1361 }
1362 
1363 
1386 {
1387  uchar *init_pos= pos;
1388 
1389  if (pos > last_rec_pos || !records)
1390  return -1;
1391 
1392  // First match flag, read null bitmaps and null_row flag
1394 
1395  /* Now read the remaining table fields if needed */
1396  CACHE_FIELD *copy= field_descr+flag_fields;
1397  CACHE_FIELD *copy_end= field_descr+fields;
1398  bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
1399  for ( ; copy < copy_end; copy++)
1400  read_record_field(copy, blob_in_rec_buff);
1401 
1402  return (uint) (pos-init_pos);
1403 }
1404 
1405 
1421 {
1422  CACHE_FIELD *copy= field_descr;
1423  CACHE_FIELD *copy_end= copy+flag_fields;
1424  for ( ; copy < copy_end; copy++)
1425  {
1426  memcpy(copy->str, pos, copy->length);
1427  pos+= copy->length;
1428  }
1429 }
1430 
1431 
1432 /*
1433  Read a data record field from the join buffer
1434 
1435  SYNOPSIS
1436  read_record_field()
1437  copy the descriptor of the data field to be read
1438  blob_in_rec_buff indicates whether this is the field from the record
1439  whose blob data are in record buffers
1440 
1441  DESCRIPTION
1442  The function reads the data field specified by the parameter copy
1443  from the join buffer into the corresponding record buffer.
1444  The field is read starting from the position 'pos'.
1445  The data of blob values is not copied from the join buffer.
1446  The function increments the value of 'pos' by the length of the
1447  read data.
1448 
1449  RETURN
1450  length of the data read from the join buffer
1451 */
1452 
1453 uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
1454 {
1455  uint len;
1456  /* Do not copy the field if its value is null */
1457  if (copy->field && copy->field->maybe_null() && copy->field->is_null())
1458  return 0;
1459  if (copy->type == CACHE_BLOB)
1460  {
1461  Field_blob *blob_field= (Field_blob *) copy->field;
1462  /*
1463  Copy the length and the pointer to data but not the blob data
1464  itself to the record buffer
1465  */
1466  if (blob_in_rec_buff)
1467  {
1468  blob_field->set_image(pos, copy->length+sizeof(char*),
1469  blob_field->charset());
1470  len= copy->length+sizeof(char*);
1471  }
1472  else
1473  {
1474  blob_field->set_ptr(pos, pos+copy->length);
1475  len= copy->length+blob_field->get_length();
1476  }
1477  }
1478  else
1479  {
1480  switch (copy->type) {
1481  case CACHE_VARSTR1:
1482  /* Copy the significant part of the short varstring field */
1483  len= (uint) pos[0] + 1;
1484  memcpy(copy->str, pos, len);
1485  break;
1486  case CACHE_VARSTR2:
1487  /* Copy the significant part of the long varstring field */
1488  len= uint2korr(pos) + 2;
1489  memcpy(copy->str, pos, len);
1490  break;
1491  case CACHE_STRIPPED:
1492  /* Pad the value by spaces that has been stripped off */
1493  len= uint2korr(pos);
1494  memcpy(copy->str, pos+2, len);
1495  memset(copy->str+len, ' ', copy->length-len);
1496  len+= 2;
1497  break;
1498  default:
1499  /* Copy the entire image of the field from the record buffer */
1500  len= copy->length;
1501  memcpy(copy->str, pos, len);
1502  }
1503  }
1504  pos+= len;
1505  return len;
1506 }
1507 
1508 
1509 /*
1510  Read a referenced field from the join buffer
1511 
1512  SYNOPSIS
1513  read_referenced_field()
1514  copy pointer to the descriptor of the referenced field
1515  rec_ptr pointer to the record that may contain this field
1516  len IN/OUT total length of the record fields
1517 
1518  DESCRIPTION
1519  The function checks whether copy points to a data field descriptor
1520  for this cache object. If it does not then the function returns
1521  FALSE. Otherwise the function reads the field of the record in
1522  the join buffer pointed by 'rec_ptr' into the corresponding record
1523  buffer and returns TRUE.
1524  If the value of *len is 0 then the function sets it to the total
1525  length of the record fields including possible trailing offset
1526  values. Otherwise *len is supposed to provide this value that
1527  has been obtained earlier.
1528 
1529  RETURN
1530  TRUE 'copy' points to a data descriptor of this join cache
1531  FALSE otherwise
1532 */
1533 
1534 bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
1535  uchar *rec_ptr,
1536  uint *len)
1537 {
1538  uchar *ptr;
1539  uint offset;
1540  if (copy < field_descr || copy >= field_descr+fields)
1541  return FALSE;
1542  if (!*len)
1543  {
1544  /* Get the total length of the record fields */
1545  uchar *len_ptr= rec_ptr;
1546  if (prev_cache)
1547  len_ptr-= prev_cache->get_size_of_rec_offset();
1548  *len= get_rec_length(len_ptr-size_of_rec_len);
1549  }
1550 
1551  ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
1552  offset= get_fld_offset(ptr+ *len -
1553  size_of_fld_ofs*
1554  (referenced_fields+1-copy->referenced_field_no));
1555  bool is_null= FALSE;
1556  if (offset == 0 && flag_fields)
1557  is_null= TRUE;
1558  if (is_null)
1559  copy->field->set_null();
1560  else
1561  {
1562  uchar *save_pos= pos;
1563  copy->field->set_notnull();
1564  pos= rec_ptr+offset;
1565  read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
1566  pos= save_pos;
1567  }
1568  return TRUE;
1569 }
1570 
1571 
1572 /*
1573  Skip record from join buffer if its match flag is on: default implementation
1574 
1575  SYNOPSIS
1576  skip_record_if_match()
1577 
1578  DESCRIPTION
1579  This default implementation of the virtual function skip_record_if_match
1580  skips the next record from the join buffer if its match flag is set on.
1581  If the record is skipped the value of 'pos' is set to points to the position
1582  right after the record.
1583 
1584  RETURN
1585  TRUE - the match flag is on and the record has been skipped
1586  FALSE - the match flag is off
1587 */
1588 
1589 bool JOIN_CACHE::skip_record_if_match()
1590 {
1591  DBUG_ASSERT(with_match_flag && with_length);
1592  uint offset= size_of_rec_len;
1593  if (prev_cache)
1594  offset+= prev_cache->get_size_of_rec_offset();
1595  /* Check whether the match flag is on */
1596  if (test(*(pos+offset)))
1597  {
1598  pos+= size_of_rec_len + get_rec_length(pos);
1599  return TRUE;
1600  }
1601  return FALSE;
1602 }
1603 
1604 
1605 /*
1606  Restore the fields of the last record from the join buffer
1607 
1608  SYNOPSIS
1609  restore_last_record()
1610 
1611  DESCRIPTION
1612  This function restore the values of the fields of the last record put
1613  into join buffer in record buffers. The values most probably have been
1614  overwritten by the field values from other records when they were read
1615  from the join buffer into the record buffer in order to check pushdown
1616  predicates.
1617 
1618  RETURN
1619  none
1620 */
1621 
1622 void JOIN_CACHE::restore_last_record()
1623 {
1624  if (records)
1625  get_record_by_pos(last_rec_pos);
1626 }
1627 
1628 
1629 /*
1630  Join records from the join buffer with records from the next join table
1631 
1632  SYNOPSIS
1633  join_records()
1634  skip_last do not find matches for the last record from the buffer
1635 
1636  DESCRIPTION
1637  The functions extends all records from the join buffer by the matched
1638  records from join_tab. In the case of outer join operation it also
1639  adds null complementing extensions for the records from the join buffer
1640  that have no match.
1641  No extensions are generated for the last record from the buffer if
1642  skip_last is true.
1643 
1644  NOTES
1645  The function must make sure that if linked join buffers are used then
1646  a join buffer cannot be refilled again until all extensions in the
1647  buffers chained to this one are generated.
1648  Currently an outer join operation with several inner tables always uses
1649  at least two linked buffers with the match join flags placed in the
1650  first buffer. Any record composed of rows of the inner tables that
1651  matches a record in this buffer must refer to the position of the
1652  corresponding match flag.
1653 
1654  IMPLEMENTATION
1655  When generating extensions for outer tables of an outer join operation
1656  first we generate all extensions for those records from the join buffer
1657  that have matches, after which null complementing extension for all
1658  unmatched records from the join buffer are generated.
1659 
1660  RETURN
1661  return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
1662 */
1663 
1664 enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
1665 {
1666  enum_nested_loop_state rc= NESTED_LOOP_OK;
1667  DBUG_ENTER("JOIN_CACHE::join_records");
1668 
1669  table_map saved_status_bits[3]= {0, 0, 0};
1670  for (int cnt= 1; cnt <= static_cast<int>(tables); cnt++)
1671  {
1672  /*
1673  We may have hit EOF on previous tables; this has set
1674  STATUS_NOT_FOUND in their status. However, now we are going to load
1675  table->record[0] from the join buffer so have to declare that there is a
1676  record. @See convert_constant_item().
1677  We first need to save bits of table->status; STATUS_DELETED and
1678  STATUS_UPDATED cannot be on as multi-table DELETE/UPDATE never use join
1679  buffering. So we only have three bits to save.
1680  */
1681  TABLE * const table= join_tab[- cnt].table;
1682  const uint8 status= table->status;
1683  const table_map map= table->map;
1684  DBUG_ASSERT((status & (STATUS_DELETED | STATUS_UPDATED)) == 0);
1685  if (status & STATUS_GARBAGE)
1686  saved_status_bits[0]|= map;
1687  if (status & STATUS_NOT_FOUND)
1688  saved_status_bits[1]|= map;
1689  if (status & STATUS_NULL_ROW)
1690  saved_status_bits[2]|= map;
1691  table->status= 0; // Record exists.
1692  }
1693 
1694  const bool outer_join_first_inner=
1695  join_tab->is_first_inner_for_outer_join();
1696  if (outer_join_first_inner && !join_tab->first_unmatched)
1697  join_tab->not_null_compl= TRUE;
1698 
1699  if (!join_tab->first_unmatched)
1700  {
1701  /* Find all records from join_tab that match records from join buffer */
1702  rc= join_matching_records(skip_last);
1703  if (rc != NESTED_LOOP_OK)
1704  goto finish;
1705  if (outer_join_first_inner)
1706  {
1707  if (next_cache)
1708  {
1709  /*
1710  Ensure that all matches for outer records from join buffer are to be
1711  found. Now we ensure that all full records are found for records from
1712  join buffer. Generally this is an overkill.
1713  TODO: Ensure that only matches of the inner table records have to be
1714  found for the records from join buffer.
1715  */
1716  rc= next_cache->join_records(skip_last);
1717  if (rc != NESTED_LOOP_OK)
1718  goto finish;
1719  }
1720  join_tab->not_null_compl= FALSE;
1721  /* Prepare for generation of null complementing extensions */
1722  for (JOIN_TAB *tab= join_tab->first_inner;
1723  tab <= join_tab->last_inner; tab++)
1724  tab->first_unmatched= join_tab->first_inner;
1725  }
1726  }
1727  if (join_tab->first_unmatched)
1728  {
1729  if (is_key_access())
1730  restore_last_record();
1731 
1732  /*
1733  Generate all null complementing extensions for the records from
1734  join buffer that don't have any matching rows from the inner tables.
1735  */
1736  reset_cache(false);
1737  rc= join_null_complements(skip_last);
1738  if (rc != NESTED_LOOP_OK)
1739  goto finish;
1740  }
1741  if(next_cache)
1742  {
1743  /*
1744  When using linked caches we must ensure the records in the next caches
1745  that refer to the records in the join buffer are fully extended.
1746  Otherwise we could have references to the records that have been
1747  already erased from the join buffer and replaced for new records.
1748  */
1749  rc= next_cache->join_records(skip_last);
1750  if (rc != NESTED_LOOP_OK)
1751  goto finish;
1752  }
1753 
1754  if (skip_last)
1755  {
1756  DBUG_ASSERT(!is_key_access());
1757  /*
1758  Restore the last record from the join buffer to generate
1759  all extensions for it.
1760  */
1761  get_record();
1762  }
1763 
1764 finish:
1765  if (outer_join_first_inner)
1766  {
1767  /*
1768  All null complemented rows have been already generated for all
1769  outer records from join buffer. Restore the state of the
1770  first_unmatched values to 0 to avoid another null complementing.
1771  */
1772  for (JOIN_TAB *tab= join_tab->first_inner;
1773  tab <= join_tab->last_inner; tab++)
1774  tab->first_unmatched= NULL;
1775  }
1776  for (int cnt= 1; cnt <= static_cast<int>(tables); cnt++)
1777  {
1778  /*
1779  We must restore the status of outer tables as it was before entering
1780  this function.
1781  */
1782  TABLE * const table= join_tab[- cnt].table;
1783  const table_map map= table->map;
1784  uint8 status= 0;
1785  if (saved_status_bits[0] & map)
1786  status|= STATUS_GARBAGE;
1787  if (saved_status_bits[1] & map)
1788  status|= STATUS_NOT_FOUND;
1789  if (saved_status_bits[2] & map)
1790  status|= STATUS_NULL_ROW;
1791  table->status= status;
1792  }
1793  restore_last_record();
1794  reset_cache(true);
1795  DBUG_RETURN(rc);
1796 }
1797 
1798 
1799 /*
1800  Using BNL find matches from the next table for records from the join buffer
1801 
1802  SYNOPSIS
1803  join_matching_records()
1804  skip_last do not look for matches for the last partial join record
1805 
1806  DESCRIPTION
1807  The function retrieves all rows of the join_tab table and check whether
1808  they match partial join records from the join buffer. If a match is found
1809  the function will call the sub_select function trying to look for matches
1810  for the remaining join operations.
1811  This function currently is called only from the function join_records.
1812  If the value of skip_last is true the function writes the partial join
1813  record from the record buffer into the join buffer to save its value for
1814  the future processing in the caller function.
1815 
1816  NOTES
1817  The function produces all matching extensions for the records in the
1818  join buffer following the path of the Blocked Nested Loops algorithm.
1819  When an outer join operation is performed all unmatched records from
1820  the join buffer must be extended by null values. The function
1821  'join_null_complements' serves this purpose.
1822 
1823  RETURN
1824  return one of enum_nested_loop_state.
1825 */
1826 
1827 enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
1828 {
1829  uint cnt;
1830  int error;
1831  READ_RECORD *info;
1832  enum_nested_loop_state rc= NESTED_LOOP_OK;
1833  SQL_SELECT *select= join_tab->cache_select;
1834 
1835  join_tab->table->null_row= 0;
1836 
1837  /* Return at once if there are no records in the join buffer */
1838  if (!records)
1839  return NESTED_LOOP_OK;
1840 
1841  /*
1842  When joining we read records from the join buffer back into record buffers.
1843  If matches for the last partial join record are found through a call to
1844  the sub_select function then this partial join record must be saved in the
1845  join buffer in order to be restored just before the sub_select call.
1846  */
1847  if (skip_last)
1848  put_record_in_cache();
1849 
1850  if (join_tab->use_quick == QS_DYNAMIC_RANGE && join_tab->select->quick)
1851  /* A dynamic range access was used last. Clean up after it */
1852  join_tab->select->set_quick(NULL);
1853 
1854  /* Start retrieving all records of the joined table */
1855  if ((error= (*join_tab->read_first_record)(join_tab)))
1856  return error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR;
1857 
1858  info= &join_tab->read_record;
1859  do
1860  {
1861  if (join_tab->keep_current_rowid)
1862  join_tab->table->file->position(join_tab->table->record[0]);
1863 
1864  if (join->thd->killed)
1865  {
1866  /* The user has aborted the execution of the query */
1867  join->thd->send_kill_message();
1868  return NESTED_LOOP_KILLED;
1869  }
1870 
1871  /*
1872  Do not look for matches if the last read record of the joined table
1873  does not meet the conditions that have been pushed to this table
1874  */
1875  if (rc == NESTED_LOOP_OK)
1876  {
1877  bool skip_record;
1878  bool consider_record= (!select ||
1879  (!select->skip_record(join->thd, &skip_record) &&
1880  !skip_record));
1881  if (select && join->thd->is_error())
1882  return NESTED_LOOP_ERROR;
1883  if (consider_record)
1884  {
1885  /* Prepare to read records from the join buffer */
1886  reset_cache(false);
1887 
1888  /* Read each record from the join buffer and look for matches */
1889  for (cnt= records - test(skip_last) ; cnt; cnt--)
1890  {
1891  /*
1892  If only the first match is needed and it has been already found for
1893  the next record read from the join buffer then the record is skipped.
1894  */
1895  if (!check_only_first_match || !skip_record_if_match())
1896  {
1897  get_record();
1898  rc= generate_full_extensions(get_curr_rec());
1899  if (rc != NESTED_LOOP_OK)
1900  return rc;
1901  }
1902  }
1903  }
1904  }
1905  } while (!(error= info->read_record(info)));
1906 
1907  if (error > 0) // Fatal error
1908  rc= NESTED_LOOP_ERROR;
1909  return rc;
1910 }
1911 
1912 
1913 /*
1914  Set match flag for a record in join buffer if it has not been set yet
1915 
1916  SYNOPSIS
1917  set_match_flag_if_none()
1918  first_inner the join table to which this flag is attached to
1919  rec_ptr pointer to the record in the join buffer
1920 
1921  DESCRIPTION
1922  If the records of the table are accumulated in a join buffer the function
1923  sets the match flag for the record in the buffer that is referred to by
1924  the record from this cache positioned at 'rec_ptr'.
1925  The function also sets the match flag 'found' of the table first inner
1926  if it has not been set before.
1927 
1928  NOTES
1929  The function assumes that the match flag for any record in any cache
1930  is placed in the first byte occupied by the record fields.
1931 
1932  RETURN
1933  TRUE the match flag is set by this call for the first time
1934  FALSE the match flag has been set before this call
1935 */
1936 
1937 bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
1938  uchar *rec_ptr)
1939 {
1940  if (!first_inner->op)
1941  {
1942  /*
1943  Records of the first inner table to which the flag is attached to
1944  are not accumulated in a join buffer.
1945  */
1946  if (first_inner->found)
1947  return FALSE;
1948  else
1949  {
1950  first_inner->found= 1;
1951  return TRUE;
1952  }
1953  }
1954  JOIN_CACHE *cache= this;
1955  while (cache->join_tab != first_inner)
1956  {
1957  cache= cache->prev_cache;
1958  DBUG_ASSERT(cache);
1959  rec_ptr= cache->get_rec_ref(rec_ptr);
1960  }
1961  if (rec_ptr[0] == 0)
1962  {
1963  rec_ptr[0]= 1;
1964  first_inner->found= 1;
1965  return TRUE;
1966  }
1967  return FALSE;
1968 }
1969 
1970 
1971 /*
1972  Generate all full extensions for a partial join record in the buffer
1973 
1974  SYNOPSIS
1975  generate_full_extensions()
1976  rec_ptr pointer to the record from join buffer to generate extensions
1977 
1978  DESCRIPTION
1979  The function first checks whether the current record of 'join_tab' matches
1980  the partial join record from join buffer located at 'rec_ptr'. If it is the
1981  case the function calls the join_tab->next_select method to generate
1982  all full extension for this partial join match.
1983 
1984  RETURN
1985  return one of enum_nested_loop_state.
1986 */
1987 
1988 enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
1989 {
1990  enum_nested_loop_state rc= NESTED_LOOP_OK;
1991 
1992  /*
1993  Check whether the extended partial join record meets
1994  the pushdown conditions.
1995  */
1996  if (check_match(rec_ptr))
1997  {
1998  int res= 0;
1999  if (!join_tab->check_weed_out_table ||
2000  !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table)))
2001  {
2002  set_curr_rec_link(rec_ptr);
2003  rc= (join_tab->next_select)(join, join_tab+1, 0);
2004  if (rc != NESTED_LOOP_OK)
2005  {
2006  reset_cache(true);
2007  return rc;
2008  }
2009  }
2010  if (res == -1)
2011  {
2012  rc= NESTED_LOOP_ERROR;
2013  return rc;
2014  }
2015  }
2016  return rc;
2017 }
2018 
2019 
2020 /*
2021  Check matching to a partial join record from the join buffer
2022 
2023  SYNOPSIS
2024  check_match()
2025  rec_ptr pointer to the record from join buffer to check matching to
2026 
2027  DESCRIPTION
2028  The function checks whether the current record of 'join_tab' matches
2029  the partial join record from join buffer located at 'rec_ptr'. If this is
2030  the case and 'join_tab' is the last inner table of a semi-join or an outer
2031  join the function turns on the match flag for the 'rec_ptr' record unless
2032  it has been already set.
2033 
2034  NOTES
2035  Setting the match flag on can trigger re-evaluation of pushdown conditions
2036  for the record when join_tab is the last inner table of an outer join.
2037 
2038  RETURN
2039  TRUE there is a match
2040  FALSE there is no match
2041 */
2042 
2043 bool JOIN_CACHE::check_match(uchar *rec_ptr)
2044 {
2045  bool skip_record;
2046  /* Check whether pushdown conditions are satisfied */
2047  if (join_tab->select &&
2048  (join_tab->select->skip_record(join->thd, &skip_record) || skip_record))
2049  return FALSE;
2050 
2051  if (!((join_tab->first_inner &&
2052  join_tab->first_inner->last_inner == join_tab) ||
2053  (join_tab->last_sj_inner_tab == join_tab &&
2054  join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)))
2055  return TRUE; // not the last inner table
2056 
2057  /*
2058  This is the last inner table of an outer join,
2059  and maybe of other embedding outer joins, or
2060  this is the last inner table of a semi-join.
2061  */
2062  JOIN_TAB *first_inner= join_tab->first_inner ?
2063  join_tab->first_inner :
2064  ((join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH) ?
2065  join_tab->first_sj_inner_tab : NULL);
2066 
2067  do
2068  {
2069  set_match_flag_if_none(first_inner, rec_ptr);
2070  if (calc_check_only_first_match(first_inner) &&
2071  !join_tab->first_inner)
2072  return TRUE;
2073  /*
2074  This is the first match for the outer table row.
2075  The function set_match_flag_if_none has turned the flag
2076  first_inner->found on. The pushdown predicates for
2077  inner tables must be re-evaluated with this flag on.
2078  Note that, if first_inner is the first inner table
2079  of a semi-join, but is not an inner table of an outer join
2080  such that 'not exists' optimization can be applied to it,
2081  the re-evaluation of the pushdown predicates is not needed.
2082  */
2083  for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
2084  {
2085  if (tab->select &&
2086  (tab->select->skip_record(join->thd, &skip_record) || skip_record))
2087  return FALSE;
2088  }
2089  }
2090  while ((first_inner= first_inner->first_upper) &&
2091  first_inner->last_inner == join_tab);
2092 
2093  return TRUE;
2094 }
2095 
2096 
2097 /*
2098  Add null complements for unmatched outer records from join buffer
2099 
2100  SYNOPSIS
2101  join_null_complements()
2102  skip_last do not add null complements for the last record
2103 
2104  DESCRIPTION
2105  This function is called only for inner tables of outer joins.
2106  The function retrieves all rows from the join buffer and adds null
2107  complements for those of them that do not have matches for outer
2108  table records.
2109  If the 'join_tab' is the last inner table of the embedding outer
2110  join and the null complemented record satisfies the outer join
2111  condition then the the corresponding match flag is turned on
2112  unless it has been set earlier. This setting may trigger
2113  re-evaluation of pushdown conditions for the record.
2114 
2115  NOTES
2116  The same implementation of the virtual method join_null_complements
2117  is used for JOIN_CACHE_BNL and JOIN_CACHE_BKA.
2118 
2119  RETURN
2120  return one of enum_nested_loop_state.
2121 */
2122 
2123 enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
2124 {
2125  uint cnt;
2126  enum_nested_loop_state rc= NESTED_LOOP_OK;
2127  bool is_first_inner= join_tab == join_tab->first_unmatched;
2128  DBUG_ENTER("JOIN_CACHE::join_null_complements");
2129 
2130  /* Return at once if there are no records in the join buffer */
2131  if (!records)
2132  DBUG_RETURN(NESTED_LOOP_OK);
2133 
2134  cnt= records - (is_key_access() ? 0 : test(skip_last));
2135 
2136  /* This function may be called only for inner tables of outer joins */
2137  DBUG_ASSERT(join_tab->first_inner);
2138 
2139  // Make sure that the rowid buffer is bound, duplicates weedout needs it
2140  if (join_tab->copy_current_rowid &&
2141  !join_tab->copy_current_rowid->buffer_is_bound())
2142  join_tab->copy_current_rowid->bind_buffer(join_tab->table->file->ref);
2143 
2144  for ( ; cnt; cnt--)
2145  {
2146  if (join->thd->killed)
2147  {
2148  /* The user has aborted the execution of the query */
2149  join->thd->send_kill_message();
2150  rc= NESTED_LOOP_KILLED;
2151  goto finish;
2152  }
2153  /* Just skip the whole record if a match for it has been already found */
2154  if (!is_first_inner || !skip_record_if_match())
2155  {
2156  get_record();
2157  /* The outer row is complemented by nulls for each inner table */
2158  restore_record(join_tab->table, s->default_values);
2159  mark_as_null_row(join_tab->table);
2160  rc= generate_full_extensions(get_curr_rec());
2161  if (rc != NESTED_LOOP_OK)
2162  goto finish;
2163  }
2164  }
2165 
2166 finish:
2167  DBUG_RETURN(rc);
2168 }
2169 
2170 
2171 /*
2172  Initialize retrieval of range sequence for BKA algorithm
2173 
2174  SYNOPSIS
2175  bka_range_seq_init()
2176  init_params pointer to the BKA join cache object
2177  n_ranges the number of ranges obtained
2178  flags combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
2179 
2180  DESCRIPTION
2181  The function interprets init_param as a pointer to a JOIN_CACHE_BKA
2182  object. The function prepares for an iteration over the join keys
2183  built for all records from the cache join buffer.
2184 
2185  NOTE
2186  This function are used only as a callback function.
2187 
2188  RETURN
2189  init_param value that is to be used as a parameter of bka_range_seq_next()
2190 */
2191 
2192 static
2193 range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
2194 {
2195  DBUG_ENTER("bka_range_seq_init");
2196  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
2197  cache->reset_cache(false);
2198  DBUG_RETURN((range_seq_t) init_param);
2199 }
2200 
2201 
2202 /*
2203  Get the key over the next record from the join buffer used by BKA
2204 
2205  SYNOPSIS
2206  bka_range_seq_next()
2207  seq the value returned by bka_range_seq_init
2208  range OUT reference to the next range
2209 
2210  DESCRIPTION
2211  The function interprets seq as a pointer to a JOIN_CACHE_BKA
2212  object. The function returns a pointer to the range descriptor
2213  for the key built over the next record from the join buffer.
2214 
2215  NOTE
2216  This function are used only as a callback function.
2217 
2218  RETURN
2219  0 ok, the range structure filled with info about the next key
2220  1 no more ranges
2221 */
2222 
2223 static
2224 uint bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
2225 {
2226  DBUG_ENTER("bka_range_seq_next");
2227  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
2228  TABLE_REF *ref= &cache->join_tab->ref;
2229  key_range *start_key= &range->start_key;
2230  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
2231  {
2232  start_key->keypart_map= (1 << ref->key_parts) - 1;
2233  start_key->flag= HA_READ_KEY_EXACT;
2234  range->end_key= *start_key;
2235  range->end_key.flag= HA_READ_AFTER_KEY;
2236  range->ptr= (char *) cache->get_curr_rec();
2237  range->range_flag= EQ_RANGE;
2238  DBUG_RETURN(0);
2239  }
2240  DBUG_RETURN(1);
2241 }
2242 
2243 
2244 /*
2245  Check whether range_info orders to skip the next record from BKA buffer
2246 
2247  SYNOPSIS
2248  bka_range_seq_skip_record()
2249  seq value returned by bka_range_seq_init()
2250  range_info information about the next range
2251  rowid [NOT USED] rowid of the record to be checked
2252 
2253 
2254  DESCRIPTION
2255  The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
2256  The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
2257  object. The function returns TRUE if the record with this range_info
2258  is to be filtered out from the stream of records returned by
2259  multi_range_read_next().
2260 
2261  NOTE
2262  This function are used only as a callback function.
2263 
2264  RETURN
2265  1 record with this range_info is to be filtered out from the stream
2266  of records returned by multi_range_read_next()
2267  0 the record is to be left in the stream
2268 */
2269 
2270 static
2271 bool bka_range_seq_skip_record(range_seq_t rseq, char *range_info, uchar *rowid)
2272 {
2273  DBUG_ENTER("bka_range_seq_skip_record");
2274  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
2275  bool res= cache->get_match_flag_by_pos((uchar *) range_info);
2276  DBUG_RETURN(res);
2277 }
2278 
2279 /*
2280  Using BKA find matches from the next table for records from the join buffer
2281 
2282  SYNOPSIS
2283  join_matching_records()
2284  skip_last do not look for matches for the last partial join record
2285 
2286  DESCRIPTION
2287  This function can be used only when the table join_tab can be accessed
2288  by keys built over the fields of previous join tables.
2289  The function retrieves all partial join records from the join buffer and
2290  for each of them builds the key value to access join_tab, performs index
2291  look-up with this key and selects matching records yielded by this look-up
2292  If a match is found the function will call the sub_select function trying
2293  to look for matches for the remaining join operations.
2294  This function currently is called only from the function join_records.
2295  It's assumed that this function is always called with the skip_last
2296  parameter equal to false.
2297 
2298  NOTES
2299  The function produces all matching extensions for the records in the
2300  join buffer following the path of the Batched Key Access algorithm.
2301  When an outer join operation is performed all unmatched records from
2302  the join buffer must be extended by null values. The function
2303  join_null_complements serves this purpose.
2304  The Batched Key Access algorithm assumes that key accesses are batched.
2305  In other words it assumes that, first, either keys themselves or the
2306  corresponding rowids (primary keys) are accumulated in a buffer, then
2307  data rows from join_tab are fetched for all of them. When a row is
2308  fetched it is always returned with a reference to the key by which it
2309  has been accessed.
2310  When key values are batched we can save on the number of the server
2311  requests for index lookups. For the remote engines, like NDB cluster, it
2312  essentially reduces the number of round trips between the server and
2313  the engine when performing a join operation.
2314  When the rowids for the keys are batched we can optimize the order
2315  in what we fetch the data for this rowids. The performance benefits of
2316  this optimization can be significant for such engines as MyISAM, InnoDB.
2317  What is exactly batched are hidden behind implementations of
2318  MRR handler interface that is supposed to be appropriately chosen
2319  for each engine. If for a engine no specific implementation of the MRR
2320  interface is supllied then the default implementation is used. This
2321  implementation actually follows the path of Nested Loops Join algorithm.
2322  In this case BKA join surely will demonstrate a worse performance than
2323  NL join.
2324 
2325  RETURN
2326  return one of enum_nested_loop_state
2327 */
2328 
2329 enum_nested_loop_state JOIN_CACHE_BKA::join_matching_records(bool skip_last)
2330 {
2331  /* The value of skip_last must be always FALSE when this function is called */
2332  DBUG_ASSERT(!skip_last);
2333 
2334  /* Return at once if there are no records in the join buffer */
2335  if (!records)
2336  return NESTED_LOOP_OK;
2337 
2338  /* Set functions to iterate over keys in the join buffer */
2339  RANGE_SEQ_IF seq_funcs= { bka_range_seq_init,
2340  bka_range_seq_next,
2342  bka_range_seq_skip_record : 0,
2343  join_tab->cache_idx_cond ?
2344  bka_skip_index_tuple : 0 };
2345 
2346  if (init_join_matching_records(&seq_funcs, records))
2347  return NESTED_LOOP_ERROR;
2348 
2349  int error;
2350  handler *file= join_tab->table->file;
2351  enum_nested_loop_state rc= NESTED_LOOP_OK;
2352  uchar *rec_ptr= NULL;
2353 
2354  while (!(error= file->multi_range_read_next((char **) &rec_ptr)))
2355  {
2356  if (join->thd->killed)
2357  {
2358  /* The user has aborted the execution of the query */
2359  join->thd->send_kill_message();
2360  return NESTED_LOOP_KILLED;
2361  }
2362  if (join_tab->keep_current_rowid)
2363  join_tab->table->file->position(join_tab->table->record[0]);
2364  /*
2365  If only the first match is needed and it has been already found
2366  for the associated partial join record then the returned candidate
2367  is discarded.
2368  */
2369  if (rc == NESTED_LOOP_OK &&
2370  (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
2371  {
2372  get_record_by_pos(rec_ptr);
2373  rc= generate_full_extensions(rec_ptr);
2374  if (rc != NESTED_LOOP_OK)
2375  return rc;
2376  }
2377  }
2378 
2379  if (error > 0 && error != HA_ERR_END_OF_FILE)
2380  return NESTED_LOOP_ERROR;
2381  return rc;
2382 }
2383 
2384 
2385 
2386 /*
2387  Prepare to search for records that match records from the join buffer
2388 
2389  SYNOPSIS
2390  init_join_matching_records()
2391  seq_funcs structure of range sequence interface
2392  ranges number of keys/ranges in the sequence
2393 
2394  DESCRIPTION
2395  This function calls the multi_range_read_init function to set up
2396  the BKA process of generating the keys from the records in the join
2397  buffer and looking for matching records from the table to be joined.
2398  The function passes as a parameter a structure of functions that
2399  implement the range sequence interface. This interface is used to
2400  enumerate all generated keys and optionally to filter the matching
2401  records returned by the multi_range_read_next calls from the
2402  intended invocation of the join_matching_records method. The
2403  multi_range_read_init function also receives the parameters for
2404  MRR buffer to be used and flags specifying the mode in which
2405  this buffer will be functioning.
2406  The number of keys in the sequence expected by multi_range_read_init
2407  is passed through the parameter ranges.
2408 
2409  RETURN
2410  False if ok, True otherwise.
2411 */
2412 
2413 bool
2414 JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges)
2415 {
2416  handler *file= join_tab->table->file;
2417 
2418  join_tab->table->null_row= 0;
2419 
2420  /* Dynamic range access is never used with BKA */
2421  DBUG_ASSERT(join_tab->use_quick != QS_DYNAMIC_RANGE);
2422 
2423  init_mrr_buff();
2424 
2425  /*
2426  Prepare to iterate over keys from the join buffer and to get
2427  matching candidates obtained with MMR handler functions.
2428  */
2429  if (!file->inited)
2430  {
2431  const int error= file->ha_index_init(join_tab->ref.key, 1);
2432  if (error)
2433  {
2434  file->print_error(error, MYF(0));
2435  return error;
2436  }
2437  }
2438  return
2439  file->multi_range_read_init(seq_funcs, (void*) this, ranges,
2440  mrr_mode, &mrr_buff);
2441 }
2442 
2443 
2452 {
2453  uchar * const save_pos= pos;
2454  pos= rec_ptr;
2455  read_some_flag_fields(); // moves 'pos'...
2456  pos= save_pos; // ... so we restore it.
2457  if (prev_cache)
2458  {
2459  // position of this record in previous join buffer:
2460  rec_ptr= prev_cache->get_rec_ref(rec_ptr);
2461  // recurse into previous buffer to read missing flag fields
2462  prev_cache->read_all_flag_fields_by_pos(rec_ptr);
2463  }
2464 }
2465 
2466 
2467 /*
2468  Get the key built over the next record from BKA join buffer
2469 
2470  SYNOPSIS
2471  get_next_key()
2472  key pointer to the buffer where the key value is to be placed
2473 
2474  DESCRIPTION
2475  The function reads key fields from the current record in the join buffer.
2476  and builds the key value out of these fields that will be used to access
2477  the 'join_tab' table. Some of key fields may belong to previous caches.
2478  They are accessed via record references to the record parts stored in the
2479  previous join buffers. The other key fields always are placed right after
2480  the flag fields of the record.
2481  If the key is embedded, which means that its value can be read directly
2482  from the join buffer, then *key is set to the beginning of the key in
2483  this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
2484  The function returns the length of the key if it succeeds ro read it.
2485  If is assumed that the functions starts reading at the position of
2486  the record length which is provided for each records in a BKA cache.
2487  After the key is built the 'pos' value points to the first position after
2488  the current record.
2489  The function returns 0 if the initial position is after the beginning
2490  of the record fields for last record from the join buffer.
2491 
2492  RETURN
2493  length of the key value - if the starting value of 'pos' points to
2494  the position before the fields for the last record,
2495  0 - otherwise.
2496 */
2497 
2498 uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
2499 {
2500  uint len;
2501  uint32 rec_len;
2502  uchar *init_pos;
2503  JOIN_CACHE *cache;
2504 
2505  if (records == 0)
2506  return 0;
2507 
2508  /* Any record in a BKA cache is prepended with its length, which we need */
2509  DBUG_ASSERT(with_length);
2510 
2511  /*
2512  Read keys until find non-ignorable one or EOF.
2513  Unlike in JOIN_CACHE::read_some_record_fields()), pos>=last_rec_pos means
2514  EOF, because we are not at fields' start, and previous record's fields
2515  might be empty.
2516  */
2517  for(len= 0 ; (len == 0) && pos < last_rec_pos ; pos= init_pos + rec_len)
2518  {
2519  /* Read the length of the record */
2520  rec_len= get_rec_length(pos);
2521  pos+= size_of_rec_len;
2522  init_pos= pos;
2523 
2524  /* Read a reference to the previous cache if any */
2525  uchar *prev_rec_ptr= NULL;
2526  if (prev_cache)
2527  {
2528  pos+= prev_cache->get_size_of_rec_offset();
2529  // position of this record in previous buffer:
2530  prev_rec_ptr= prev_cache->get_rec_ref(pos);
2531  }
2532 
2533  curr_rec_pos= pos;
2534 
2535  // Read all flag fields of the record, in two steps:
2536  read_some_flag_fields(); // 1) flag fields stored in this buffer
2537  if (prev_cache) // 2) flag fields stored in previous buffers
2538  prev_cache->read_all_flag_fields_by_pos(prev_rec_ptr);
2539 
2540  if (use_emb_key)
2541  {
2542  /* An embedded key is taken directly from the join buffer */
2543  *key= pos;
2544  len= emb_key_length;
2545  DBUG_ASSERT(len != 0);
2546  }
2547  else
2548  {
2549  /*
2550  Read key arguments from previous caches if there are any such
2551  fields
2552  */
2553  if (external_key_arg_fields)
2554  {
2555  uchar *rec_ptr= curr_rec_pos;
2556  uint key_arg_count= external_key_arg_fields;
2557  CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
2558  for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
2559  {
2560  uint len2= 0;
2561  DBUG_ASSERT(cache);
2562  rec_ptr= cache->get_rec_ref(rec_ptr);
2563  while (!cache->referenced_fields)
2564  {
2565  cache= cache->prev_cache;
2566  DBUG_ASSERT(cache);
2567  rec_ptr= cache->get_rec_ref(rec_ptr);
2568  }
2569  while (key_arg_count &&
2570  cache->read_referenced_field(*copy_ptr, rec_ptr, &len2))
2571  {
2572  copy_ptr++;
2573  --key_arg_count;
2574  }
2575  }
2576  }
2577 
2578  /*
2579  Read the other key arguments from the current record. The fields for
2580  these arguments are always first in the sequence of the record's
2581  fields.
2582  */
2583  CACHE_FIELD *copy= field_descr+flag_fields;
2584  CACHE_FIELD *copy_end= copy+local_key_arg_fields;
2585  bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
2586  for ( ; copy < copy_end; copy++)
2587  read_record_field(copy, blob_in_rec_buff);
2588 
2589  TABLE_REF *ref= &join_tab->ref;
2590  if (ref->impossible_null_ref())
2591  {
2592  DBUG_PRINT("info", ("JOIN_CACHE_BKA::get_next_key null_rejected"));
2593  /* this key cannot give a match, don't collect it, go read next key */
2594  len= 0;
2595  }
2596  else
2597  {
2598  /* Build the key over the fields read into the record buffers */
2599  cp_buffer_from_ref(join->thd, join_tab->table, ref);
2600  *key= ref->key_buff;
2601  len= ref->key_length;
2602  DBUG_ASSERT(len != 0);
2603  }
2604  }
2605  }
2606  return len;
2607 }
2608 
2609 
2610 /*
2611  Initialize a BKA_UNIQUE cache
2612 
2613  SYNOPSIS
2614  init()
2615 
2616  DESCRIPTION
2617  The function initializes the cache structure. It supposed to be called
2618  right after a constructor for the JOIN_CACHE_BKA_UNIQUE.
2619  The function allocates memory for the join buffer and for descriptors of
2620  the record fields stored in the buffer.
2621  The function also estimates the number of hash table entries in the hash
2622  table to be used and initializes this hash table.
2623 
2624  NOTES
2625  The code of this function should have been included into the constructor
2626  code itself. However the new operator for the class JOIN_CACHE_BKA_UNIQUE
2627  would never fail while memory allocation for the join buffer is not
2628  absolutely unlikely to fail. That's why this memory allocation has to be
2629  placed in a separate function that is called in a couple with a cache
2630  constructor.
2631  It is quite natural to put almost all other constructor actions into
2632  this function.
2633 
2634  RETURN
2635  0 initialization with buffer allocations has been succeeded
2636  1 otherwise
2637 */
2638 
2640 {
2641  int rc= 0;
2642  TABLE_REF *ref= &join_tab->ref;
2643 
2644  DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::init");
2645 
2646  hash_table= 0;
2647  key_entries= 0;
2648 
2649  if ((rc= JOIN_CACHE_BKA::init()))
2650  DBUG_RETURN (rc);
2651 
2652  key_length= ref->key_length;
2653 
2654  /* Take into account a reference to the next record in the key chain */
2655  pack_length+= get_size_of_rec_offset();
2656 
2657  /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
2658  uint max_size_of_key_ofs= max(2U, get_size_of_rec_offset());
2659  for (size_of_key_ofs= 2;
2660  size_of_key_ofs <= max_size_of_key_ofs;
2661  size_of_key_ofs+= 2)
2662  {
2663  key_entry_length= get_size_of_rec_offset() + // key chain header
2664  size_of_key_ofs + // reference to the next key
2665  (use_emb_key ? get_size_of_rec_offset() : key_length);
2666 
2667  uint n= buff_size / (pack_length+key_entry_length+size_of_key_ofs);
2668 
2669  /*
2670  TODO: Make a better estimate for this upper bound of
2671  the number of records in in the join buffer.
2672  */
2673  uint max_n= buff_size / (pack_length-length+
2674  key_entry_length+size_of_key_ofs);
2675 
2676  hash_entries= (uint) (n / 0.7);
2677 
2678  if (offset_size(max_n*key_entry_length) <=
2679  size_of_key_ofs)
2680  break;
2681  }
2682 
2683  /* Initialize the hash table */
2684  hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
2685  cleanup_hash_table();
2686  curr_key_entry= hash_table;
2687 
2688  pack_length+= key_entry_length;
2689  pack_length_with_blob_ptrs+= get_size_of_rec_offset() + key_entry_length;
2690 
2691  rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
2692  (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
2693 
2694  data_fields_offset= 0;
2695  if (use_emb_key)
2696  {
2697  CACHE_FIELD *copy= field_descr;
2698  CACHE_FIELD *copy_end= copy+flag_fields;
2699  for ( ; copy < copy_end; copy++)
2700  data_fields_offset+= copy->length;
2701  }
2702 
2703  DBUG_RETURN(rc);
2704 }
2705 
2706 
2707 /*
2708  Reset the JOIN_CACHE_BKA_UNIQUE buffer for reading/writing
2709 
2710  SYNOPSIS
2711  reset_cache()
2712  for_writing if it's TRUE the function reset the buffer for writing
2713 
2714  DESCRIPTION
2715  This implementation of the virtual function reset_cache() resets the join
2716  buffer of the JOIN_CACHE_BKA_UNIQUE class for reading or writing.
2717  Additionally to what the default implementation does this function
2718  cleans up the hash table allocated within the buffer.
2719 
2720  RETURN
2721  none
2722 */
2723 
2725 {
2726  this->JOIN_CACHE::reset_cache(for_writing);
2727  if (for_writing && hash_table)
2728  cleanup_hash_table();
2729  curr_key_entry= hash_table;
2730 }
2731 
2732 /*
2733  Add a record into the JOIN_CACHE_BKA_UNIQUE buffer
2734 
2735  SYNOPSIS
2736  put_record()
2737 
2738  DESCRIPTION
2739  This implementation of the virtual function put_record writes the next
2740  matching record into the join buffer of the JOIN_CACHE_BKA_UNIQUE class.
2741  Additionally to what the default implementation does this function
2742  performs the following.
2743  It extracts from the record the key value used in lookups for matching
2744  records and searches for this key in the hash tables from the join cache.
2745  If it finds the key in the hash table it joins the record to the chain
2746  of records with this key. If the key is not found in the hash table the
2747  key is placed into it and a chain containing only the newly added record
2748  is attached to the key entry. The key value is either placed in the hash
2749  element added for the key or, if the use_emb_key flag is set, remains in
2750  the record from the partial join.
2751 
2752  RETURN
2753  TRUE if it has been decided that it should be the last record
2754  in the join buffer,
2755  FALSE otherwise
2756 */
2757 
2758 bool
2759 JOIN_CACHE_BKA_UNIQUE::put_record_in_cache()
2760 {
2761  uchar *key;
2762  uint key_len= key_length;
2763  uchar *key_ref_ptr;
2764  TABLE_REF *ref= &join_tab->ref;
2765  uchar *next_ref_ptr= pos;
2766  pos+= get_size_of_rec_offset();
2767 
2768  // Write record to join buffer
2769  bool is_full= JOIN_CACHE::put_record_in_cache();
2770 
2771  if (use_emb_key)
2772  {
2773  key= get_curr_emb_key();
2774  // Embedded is not used if one of the key columns is nullable
2775  }
2776  else
2777  {
2778  /* Build the key over the fields read into the record buffers */
2779  cp_buffer_from_ref(join->thd, join_tab->table, ref);
2780  key= ref->key_buff;
2781  if (ref->impossible_null_ref())
2782  {
2783  /*
2784  The row just put into the buffer has a NULL-value for one of
2785  the ref-columns and the ref access is NULL-rejecting, this key cannot
2786  give a match. So we don't insert it into the hash table.
2787  We still stored the record into the buffer (put_record() call above),
2788  or we would later miss NULL-complementing of this record.
2789  */
2790  DBUG_PRINT("info", ("JOIN_CACHE_BKA_UNIQUE::put_record null_rejected"));
2791  return is_full;
2792  }
2793  }
2794 
2795  /* Look for the key in the hash table */
2796  if (key_search(key, key_len, &key_ref_ptr))
2797  {
2798  uchar *last_next_ref_ptr;
2799  /*
2800  The key is found in the hash table.
2801  Add the record to the circular list of the records attached to this key.
2802  Below 'rec' is the record to be added into the record chain for the found
2803  key, 'key_ref' points to a flatten representation of the st_key_entry
2804  structure that contains the key and the head of the record chain.
2805  */
2806  last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
2807  /* rec->next_rec= key_entry->last_rec->next_rec */
2808  memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
2809  /* key_entry->last_rec->next_rec= rec */
2810  store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
2811  /* key_entry->last_rec= rec */
2812  store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
2813  }
2814  else
2815  {
2816  /*
2817  The key is not found in the hash table.
2818  Put the key into the join buffer linking it with the keys for the
2819  corresponding hash entry. Create a circular list with one element
2820  referencing the record and attach the list to the key in the buffer.
2821  */
2822  uchar *cp= last_key_entry;
2823  cp-= get_size_of_rec_offset()+get_size_of_key_offset();
2824  store_next_key_ref(key_ref_ptr, cp);
2825  store_null_key_ref(cp);
2826  store_next_rec_ref(next_ref_ptr, next_ref_ptr);
2827  store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
2828  if (use_emb_key)
2829  {
2830  cp-= get_size_of_rec_offset();
2831  store_emb_key_ref(cp, key);
2832  }
2833  else
2834  {
2835  cp-= key_len;
2836  memcpy(cp, key, key_len);
2837  }
2838  last_key_entry= cp;
2839  /* Increment the counter of key_entries in the hash table */
2840  key_entries++;
2841  }
2842  return is_full;
2843 }
2844 
2845 
2846 /*
2847  Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer
2848 
2849  SYNOPSIS
2850  get_record()
2851 
2852  DESCRIPTION
2853  Additionally to what the default implementation of the virtual
2854  function get_record does this implementation skips the link element
2855  used to connect the records with the same key into a chain.
2856 
2857  RETURN
2858  TRUE - there are no more records to read from the join buffer
2859  FALSE - otherwise
2860 */
2861 
2862 bool JOIN_CACHE_BKA_UNIQUE::get_record()
2863 {
2864  pos+= get_size_of_rec_offset();
2865  return this->JOIN_CACHE::get_record();
2866 }
2867 
2868 
2869 /*
2870  Skip record from the JOIN_CACHE_BKA_UNIQUE join buffer if its match flag is on
2871 
2872  SYNOPSIS
2873  skip_record_if_match()
2874 
2875  DESCRIPTION
2876  This implementation of the virtual function skip_record_if_match does
2877  the same as the default implementation does, but it takes into account
2878  the link element used to connect the records with the same key into a chain.
2879 
2880  RETURN
2881  TRUE - the match flag is on and the record has been skipped
2882  FALSE - the match flag is off
2883 */
2884 
2885 bool JOIN_CACHE_BKA_UNIQUE::skip_record_if_match()
2886 {
2887  uchar *save_pos= pos;
2888  pos+= get_size_of_rec_offset();
2889  if (!this->JOIN_CACHE::skip_record_if_match())
2890  {
2891  pos= save_pos;
2892  return FALSE;
2893  }
2894  return TRUE;
2895 }
2896 
2897 
2898 /*
2899  Search for a key in the hash table of the join buffer
2900 
2901  SYNOPSIS
2902  key_search()
2903  key pointer to the key value
2904  key_len key value length
2905  key_ref_ptr OUT position of the reference to the next key from
2906  the hash element for the found key , or
2907  a position where the reference to the the hash
2908  element for the key is to be added in the
2909  case when the key has not been found
2910 
2911  DESCRIPTION
2912  The function looks for a key in the hash table of the join buffer.
2913  If the key is found the functionreturns the position of the reference
2914  to the next key from to the hash element for the given key.
2915  Otherwise the function returns the position where the reference to the
2916  newly created hash element for the given key is to be added.
2917 
2918  RETURN
2919  TRUE - the key is found in the hash table
2920  FALSE - otherwise
2921 */
2922 
2923 bool JOIN_CACHE_BKA_UNIQUE::key_search(uchar *key, uint key_len,
2924  uchar **key_ref_ptr)
2925 {
2926  bool is_found= FALSE;
2927  uint idx= get_hash_idx(key, key_length);
2928  uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
2929  while (!is_null_key_ref(ref_ptr))
2930  {
2931  uchar *next_key;
2932  ref_ptr= get_next_key_ref(ref_ptr);
2933  next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
2934  ref_ptr-key_length;
2935 
2936  if (memcmp(next_key, key, key_len) == 0)
2937  {
2938  is_found= TRUE;
2939  break;
2940  }
2941  }
2942  *key_ref_ptr= ref_ptr;
2943  return is_found;
2944 }
2945 
2946 
2947 /*
2948  Calclulate hash value for a key in the hash table of the join buffer
2949 
2950  SYNOPSIS
2951  get_hash_idx()
2952  key pointer to the key value
2953  key_len key value length
2954 
2955  DESCRIPTION
2956  The function calculates an index of the hash entry in the hash table
2957  of the join buffer for the given key
2958 
2959  RETURN
2960  the calculated index of the hash entry for the given key.
2961 */
2962 
2963 uint JOIN_CACHE_BKA_UNIQUE::get_hash_idx(uchar* key, uint key_len)
2964 {
2965  ulong nr= 1;
2966  ulong nr2= 4;
2967  uchar *pos= key;
2968  uchar *end= key+key_len;
2969  for (; pos < end ; pos++)
2970  {
2971  nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
2972  nr2+= 3;
2973  }
2974  return nr % hash_entries;
2975 }
2976 
2977 
2978 /*
2979  Clean up the hash table of the join buffer
2980 
2981  SYNOPSIS
2982  cleanup_hash_table()
2983  key pointer to the key value
2984  key_len key value length
2985 
2986  DESCRIPTION
2987  The function cleans up the hash table in the join buffer removing all
2988  hash elements from the table.
2989 
2990  RETURN
2991  none
2992 */
2993 
2994 void JOIN_CACHE_BKA_UNIQUE:: cleanup_hash_table()
2995 {
2996  last_key_entry= hash_table;
2997  memset(hash_table, 0, (buff+buff_size)-hash_table);
2998  key_entries= 0;
2999 }
3000 
3001 
3002 /*
3003  Initialize retrieval of range sequence for BKA_UNIQUE algorithm
3004 
3005  SYNOPSIS
3006  bka_range_seq_init()
3007  init_params pointer to the BKA_INIQUE join cache object
3008  n_ranges the number of ranges obtained
3009  flags combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
3010 
3011  DESCRIPTION
3012  The function interprets init_param as a pointer to a JOIN_CACHE_BKA_UNIQUE
3013  object. The function prepares for an iteration over the unique join keys
3014  built over the records from the cache join buffer.
3015 
3016  NOTE
3017  This function are used only as a callback function.
3018 
3019  RETURN
3020  init_param value that is to be used as a parameter of
3021  bka_unique_range_seq_next()
3022 */
3023 
3024 static
3025 range_seq_t bka_unique_range_seq_init(void *init_param, uint n_ranges,
3026  uint flags)
3027 {
3028  DBUG_ENTER("bka_unique_range_seq_init");
3029  JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) init_param;
3030  cache->reset_cache(false);
3031  DBUG_RETURN((range_seq_t) init_param);
3032 }
3033 
3034 
3035 /*
3036  Get the key over the next record from the join buffer used by BKA_UNIQUE
3037 
3038  SYNOPSIS
3039  bka_unique_range_seq_next()
3040  seq value returned by bka_unique_range_seq_init()
3041  range OUT reference to the next range
3042 
3043  DESCRIPTION
3044  The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
3045  object. The function returns a pointer to the range descriptor
3046  for the next unique key built over records from the join buffer.
3047 
3048  NOTE
3049  This function are used only as a callback function.
3050 
3051  RETURN
3052  0 ok, the range structure filled with info about the next key
3053  1 no more ranges
3054 */
3055 
3056 static
3057 uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
3058 {
3059  DBUG_ENTER("bka_unique_range_seq_next");
3061  TABLE_REF *ref= &cache->join_tab->ref;
3062  key_range *start_key= &range->start_key;
3063  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
3064  {
3065  start_key->keypart_map= (1 << ref->key_parts) - 1;
3066  start_key->flag= HA_READ_KEY_EXACT;
3067  range->end_key= *start_key;
3068  range->end_key.flag= HA_READ_AFTER_KEY;
3069  range->ptr= (char *) cache->get_curr_key_chain();
3070  range->range_flag= EQ_RANGE;
3071  DBUG_RETURN(0);
3072  }
3073  DBUG_RETURN(1);
3074 }
3075 
3076 
3077 /*
3078  Check whether range_info orders to skip the next record from BKA_UNIQUE buffer
3079 
3080  SYNOPSIS
3081  bka_unique_range_seq_skip_record()
3082  seq value returned by bka_unique_range_seq_init()
3083  range_info information about the next range
3084  rowid [NOT USED] rowid of the record to be checked (not used)
3085 
3086  DESCRIPTION
3087  The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
3088  object. The function returns TRUE if the record with this range_info
3089  is to be filtered out from the stream of records returned by
3090  multi_range_read_next().
3091 
3092  NOTE
3093  This function are used only as a callback function.
3094 
3095  RETURN
3096  1 record with this range_info is to be filtered out from the stream
3097  of records returned by multi_range_read_next()
3098  0 the record is to be left in the stream
3099 */
3100 
3101 static
3102 bool bka_unique_range_seq_skip_record(range_seq_t rseq, char *range_info,
3103  uchar *rowid)
3104 {
3105  DBUG_ENTER("bka_unique_range_seq_skip_record");
3107  bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
3108  DBUG_RETURN(res);
3109 }
3110 
3111 
3153 bool JOIN_CACHE_BKA_UNIQUE::skip_index_tuple(range_seq_t rseq, char *range_info)
3154 {
3155  DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::skip_index_tuple");
3157  uchar *last_rec_ref_ptr= cache->get_next_rec_ref((uchar*) range_info);
3158  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3159  do
3160  {
3161  next_rec_ref_ptr= cache->get_next_rec_ref(next_rec_ref_ptr);
3162  uchar *rec_ptr= next_rec_ref_ptr + cache->rec_fields_offset;
3163  cache->get_record_by_pos(rec_ptr);
3164  if (join_tab->cache_idx_cond->val_int())
3165  DBUG_RETURN(FALSE);
3166  } while(next_rec_ref_ptr != last_rec_ref_ptr);
3167  DBUG_RETURN(TRUE);
3168 }
3169 
3170 
3171 /*
3172  Check if the record combination matches the index condition
3173 
3174  SYNOPSIS
3175  bka_unique_skip_index_tuple()
3176  rseq Value returned by bka_range_seq_init()
3177  range_info MRR range association data
3178 
3179  DESCRIPTION
3180  This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
3181  see comments there.
3182 
3183  NOTE
3184  This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
3185 
3186  RETURN
3187  0 The record combination satisfies the index condition
3188  1 Otherwise
3189 */
3190 
3191 static
3192 bool bka_unique_skip_index_tuple(range_seq_t rseq, char *range_info)
3193 {
3194  DBUG_ENTER("bka_unique_skip_index_tuple");
3196  DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
3197 }
3198 
3199 
3200 /*
3201  Using BKA_UNIQUE find matches from the next table for records from join buffer
3202 
3203  SYNOPSIS
3204  join_matching_records()
3205  skip_last do not look for matches for the last partial join record
3206 
3207  DESCRIPTION
3208  This function can be used only when the table join_tab can be accessed
3209  by keys built over the fields of previous join tables.
3210  The function retrieves all keys from the hash table of the join buffer
3211  built for partial join records from the buffer. For each of these keys
3212  the function performs an index lookup and tries to match records yielded
3213  by this lookup with records from the join buffer attached to the key.
3214  If a match is found the function will call the sub_select function trying
3215  to look for matches for the remaining join operations.
3216  This function does not assume that matching records are necessarily
3217  returned with references to the keys by which they were found. If the call
3218  of the function multi_range_read_init returns flags with
3219  HA_MRR_NO_ASSOCIATION then a search for the key built from the returned
3220  record is carried on. The search is performed by probing in in the hash
3221  table of the join buffer.
3222  This function currently is called only from the function join_records.
3223  It's assumed that this function is always called with the skip_last
3224  parameter equal to false.
3225 
3226  RETURN
3227  return one of enum_nested_loop_state
3228 */
3229 
3230 enum_nested_loop_state
3231 JOIN_CACHE_BKA_UNIQUE::join_matching_records(bool skip_last)
3232 {
3233  /* The value of skip_last must be always FALSE when this function is called */
3234  DBUG_ASSERT(!skip_last);
3235 
3236  /* Return at once if there are no records in the join buffer */
3237  if (!records)
3238  return NESTED_LOOP_OK;
3239 
3240  const bool no_association= mrr_mode & HA_MRR_NO_ASSOCIATION;
3241  /* Set functions to iterate over keys in the join buffer */
3242  RANGE_SEQ_IF seq_funcs= { bka_unique_range_seq_init,
3243  bka_unique_range_seq_next,
3244  check_only_first_match && !no_association ?
3245  bka_unique_range_seq_skip_record : 0,
3246  join_tab->cache_idx_cond ?
3247  bka_unique_skip_index_tuple : 0 };
3248 
3249  if (init_join_matching_records(&seq_funcs, key_entries))
3250  return NESTED_LOOP_ERROR;
3251 
3252  int error;
3253  uchar *key_chain_ptr;
3254  handler *file= join_tab->table->file;
3255  enum_nested_loop_state rc= NESTED_LOOP_OK;
3256 
3257  while (!(error= file->multi_range_read_next((char **) &key_chain_ptr)))
3258  {
3259  if (no_association)
3260  {
3261  uchar *key_ref_ptr;
3262  TABLE *table= join_tab->table;
3263  TABLE_REF *ref= &join_tab->ref;
3264  KEY *keyinfo= table->key_info+ref->key;
3265  /*
3266  Build the key value out of the record returned by the call of
3267  multi_range_read_next in the record buffer
3268  */
3269  key_copy(ref->key_buff, table->record[0], keyinfo, ref->key_length);
3270  /* Look for this key in the join buffer */
3271  if (!key_search(ref->key_buff, ref->key_length, &key_ref_ptr))
3272  continue;
3273  key_chain_ptr= key_ref_ptr+get_size_of_key_offset();
3274  }
3275 
3276  if (join_tab->keep_current_rowid)
3277  join_tab->table->file->position(join_tab->table->record[0]);
3278 
3279  uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3280  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3281  do
3282  {
3283  next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3284  uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3285 
3286  if (join->thd->killed)
3287  {
3288  /* The user has aborted the execution of the query */
3289  join->thd->send_kill_message();
3290  return NESTED_LOOP_KILLED;
3291  }
3292  /*
3293  If only the first match is needed and it has been already found
3294  for the associated partial join record then the returned candidate
3295  is discarded.
3296  */
3297  if (rc == NESTED_LOOP_OK &&
3298  (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
3299  {
3300  get_record_by_pos(rec_ptr);
3301  rc= generate_full_extensions(rec_ptr);
3302  if (rc != NESTED_LOOP_OK)
3303  return rc;
3304  }
3305  }
3306  while (next_rec_ref_ptr != last_rec_ref_ptr);
3307  }
3308 
3309  if (error > 0 && error != HA_ERR_END_OF_FILE)
3310  return NESTED_LOOP_ERROR;
3311  return rc;
3312 }
3313 
3314 
3315 /*
3316  Check whether all records in a key chain have their match flags set on
3317 
3318  SYNOPSIS
3319  check_all_match_flags_for_key()
3320  key_chain_ptr
3321 
3322  DESCRIPTION
3323  This function retrieves records in the given circular chain and checks
3324  whether their match flags are set on. The parameter key_chain_ptr shall
3325  point to the position in the join buffer storing the reference to the
3326  last element of this chain.
3327 
3328  RETURN
3329  TRUE if each retrieved record has its match flag set on
3330  FALSE otherwise
3331 */
3332 
3333 bool JOIN_CACHE_BKA_UNIQUE::check_all_match_flags_for_key(uchar *key_chain_ptr)
3334 {
3335  uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3336  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3337  do
3338  {
3339  next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3340  uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3341  if (!get_match_flag_by_pos(rec_ptr))
3342  return FALSE;
3343  }
3344  while (next_rec_ref_ptr != last_rec_ref_ptr);
3345  return TRUE;
3346 }
3347 
3348 
3349 /*
3350  Get the next key built for the records from BKA_UNIQUE join buffer
3351 
3352  SYNOPSIS
3353  get_next_key()
3354  key pointer to the buffer where the key value is to be placed
3355 
3356  DESCRIPTION
3357  The function reads the next key value stored in the hash table of the
3358  join buffer. Depending on the value of the use_emb_key flag of the
3359  join cache the value is read either from the table itself or from
3360  the record field where it occurs.
3361 
3362  RETURN
3363  length of the key value - if the starting value of 'cur_key_entry' refers
3364  to the position after that referred by the the value of 'last_key_entry'
3365  0 - otherwise.
3366 */
3367 
3368 uint JOIN_CACHE_BKA_UNIQUE::get_next_key(uchar ** key)
3369 {
3370  if (curr_key_entry == last_key_entry)
3371  return 0;
3372 
3373  curr_key_entry-= key_entry_length;
3374 
3375  *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
3376 
3377  DBUG_ASSERT(*key >= buff && *key < hash_table);
3378 
3379  return key_length;
3380 }
3381 
3394 {
3395  /* recheck pushed down index condition */
3396  if (join_tab->cache_idx_cond != NULL &&
3397  !join_tab->cache_idx_cond->val_int())
3398  return FALSE;
3399  /* continue with generic tests */
3400  return JOIN_CACHE_BKA::check_match(rec_ptr);
3401 }
3402 
3403 
3404 /****************************************************************************
3405  * Join cache module end
3406  ****************************************************************************/