MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sql_planner.cc
Go to the documentation of this file.
1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
27 #include "sql_planner.h"
28 #include "sql_optimizer.h"
29 #include "opt_range.h"
30 #include "opt_trace.h"
31 #include "sql_executor.h"
32 #include "merge_sort.h"
33 #include <my_bit.h>
34 
35 #include <algorithm>
36 using std::max;
37 using std::min;
38 
39 static double prev_record_reads(JOIN *join, uint idx, table_map found_ref);
40 static void trace_plan_prefix(JOIN *join, uint idx,
41  table_map excluded_tables);
42 
43 /*
44  This is a class for considering possible loose index scan optimizations.
45  It's usage pattern is as follows:
46  best_access_path()
47  {
48  Loose_scan_opt opt;
49 
50  opt.init()
51  for each index we can do ref access with
52  {
53  opt.next_ref_key();
54  for each keyuse
55  opt.add_keyuse();
56  opt.check_ref_access_part1();
57  opt.check_ref_access_part2();
58  }
59 
60  if (some criteria for range scans)
61  opt.check_range_access();
62 
63  opt.save_to_position();
64  }
65 */
66 
68 {
69 private:
70  /* All methods must check this before doing anything else */
71  bool try_loosescan;
72 
73  /*
74  If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
75  called sj-equality. If oeK depends only on preceding tables then such
76  equality is called 'bound'.
77  */
78  ulonglong bound_sj_equalities;
79 
80  /* Accumulated properties of ref access we're now considering: */
81  ulonglong handled_sj_equalities;
82  key_part_map loose_scan_keyparts;
87  uint max_loose_keypart;
88  bool part1_conds_met;
89 
90  /*
91  Use of quick select is a special case. Some of its properties:
92  */
93  uint quick_uses_applicable_index;
94  uint quick_max_loose_keypart;
95 
96  /* Best loose scan method so far */
97  uint best_loose_scan_key;
98  double best_loose_scan_cost;
99  double best_loose_scan_records;
100  Key_use *best_loose_scan_start_key;
101 
102  uint best_max_loose_keypart;
103 
104 public:
105  Loose_scan_opt() :
106  try_loosescan(FALSE),
107  quick_uses_applicable_index(FALSE)
108  {
109  /*
110  We needn't initialize:
111  bound_sj_equalities - protected by try_loosescan
112  quick_max_loose_keypart - protected by quick_uses_applicable_index
113  best_loose_scan_key - protected by best_loose_scan_cost != DBL_MAX
114  best_loose_scan_records - same
115  best_max_loose_keypart - same
116  best_loose_scan_start_key - same
117  Not initializing them causes compiler warnings with g++ at -O1 or higher,
118  but initializing them would cause a 2% CPU time loss in a 20-table plan
119  search. So we initialize only if warnings would stop the build.
120  */
121 #ifdef COMPILE_FLAG_WERROR
122  bound_sj_equalities= 0;
123  quick_max_loose_keypart= 0;
124  best_loose_scan_key= 0;
125  best_loose_scan_records= 0;
126  best_max_loose_keypart= 0;
127  best_loose_scan_start_key= NULL;
128 #endif
129  }
130 
131  void init(JOIN_TAB *s, table_map remaining_tables,
132  bool in_dups_producing_range, bool is_sjm_nest)
133  {
134  /*
135  We may consider the LooseScan strategy if
136  1. The next table is an SJ-inner table, and
137  2, We have no more than 64 IN expressions (must fit in bitmap), and
138  3. It is the first table from that semijoin, and
139  4. We're not within a semi-join range (i.e. all semi-joins either have
140  all or none of their tables in join_table_map), except
141  s->emb_sj_nest (which we've just entered, see #2), and
142  5. All non-IN-equality correlation references from this sj-nest are
143  bound, and
144  6. But some of the IN-equalities aren't (so this can't be handled by
145  FirstMatch strategy), and
146  7. LooseScan is not disabled, and
147  8. Not a derived table/view. (a temporary restriction)
148  */
149  best_loose_scan_cost= DBL_MAX;
150  if (s->emb_sj_nest && !is_sjm_nest && // (1)
151  s->emb_sj_nest->nested_join->sj_inner_exprs.elements <= 64 && // (2)
152  ((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (3)
153  s->emb_sj_nest->sj_inner_tables) && // (3)
154  !in_dups_producing_range && // (4)
155  !(remaining_tables &
156  s->emb_sj_nest->nested_join->sj_corr_tables) && // (5)
157  (remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on) && //(6)
158  s->join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_LOOSE_SCAN) &&//(7)
159  !s->table->pos_in_table_list->uses_materialization()) // (8)
160  {
161  try_loosescan= true; // This table is a LooseScan scan candidate
162  bound_sj_equalities= 0; // These equalities are populated later
163  DBUG_PRINT("info", ("Will try LooseScan scan"));
164  }
165  }
166 
167  void next_ref_key()
168  {
169  handled_sj_equalities=0;
170  loose_scan_keyparts= 0;
171  max_loose_keypart= 0;
172  part1_conds_met= FALSE;
173  }
174 
175  void add_keyuse(table_map remaining_tables, Key_use *keyuse)
176  {
177  if (try_loosescan && keyuse->sj_pred_no != UINT_MAX)
178  {
179  if (!(remaining_tables & keyuse->used_tables))
180  {
181  /*
182  This allows to use equality propagation to infer that some
183  sj-equalities are bound.
184  */
185  bound_sj_equalities |= 1ULL << keyuse->sj_pred_no;
186  }
187  else
188  {
189  handled_sj_equalities |= 1ULL << keyuse->sj_pred_no;
190  loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart;
191  set_if_bigger(max_loose_keypart, keyuse->keypart);
192  }
193  }
194  }
195 
196  bool have_a_case() { return test(handled_sj_equalities); }
197 
207  void check_ref_access_part1(JOIN_TAB *s, uint key, Key_use *start_key,
208  key_part_map bound_keyparts)
209  {
210  /*
211  Check if we can use LooseScan semi-join strategy. We can if
212  1. This is the right table at right location
213  2. All IN-equalities are either
214  - "bound", ie. the outer_expr part refers to the preceding tables
215  - "handled", ie. covered by the index we're considering
216  3. Index order allows to enumerate subquery's duplicate groups in
217  order. This happens when the index columns are defined in an order
218  that matches this pattern:
219  (handled_col|bound_col)* (other_col|bound_col)
220  4. No keys are defined over a partial column
221 
222  */
223  if (try_loosescan && // (1)
224  (handled_sj_equalities | bound_sj_equalities) == // (2)
225  LOWER_BITS(ulonglong,
226  s->emb_sj_nest->nested_join->sj_inner_exprs.elements) && // (2)
227  (LOWER_BITS(key_part_map, max_loose_keypart+1) & // (3)
228  ~(bound_keyparts | loose_scan_keyparts)) == 0 && // (3)
229  !key_uses_partial_cols(s->table, key)) // (4)
230  {
231  /* Ok, can use the strategy */
232  part1_conds_met= TRUE;
233  if (s->quick && s->quick->index == key &&
234  s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
235  {
236  quick_uses_applicable_index= TRUE;
237  quick_max_loose_keypart= max_loose_keypart;
238  }
239  DBUG_PRINT("info", ("Can use LooseScan scan"));
240 
241  /*
242  Check if this is a confluent where there are no usable bound
243  IN-equalities, e.g. we have
244 
245  outer_expr IN (SELECT innertbl.key FROM ...)
246 
247  and outer_expr cannot be evaluated yet, so it's actually full
248  index scan and not a ref access
249  */
250  if (!(bound_keyparts & 1 ) && /* no usable ref access for 1st key part */
251  s->table->covering_keys.is_set(key))
252  {
253  DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
254 
255  /* Calculate the cost of complete loose index scan. */
256  double records= rows2double(s->table->file->stats.records);
257 
258  /* The cost is entire index scan cost (divided by 2) */
259  double read_time= s->table->file->index_only_read_time(key, records);
260 
261  /*
262  Now find out how many different keys we will get (for now we
263  ignore the fact that we have "keypart_i=const" restriction for
264  some key components, that may make us think think that loose
265  scan will produce more distinct records than it actually will)
266  */
267  ulong rpc;
268  if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
269  records= records / rpc;
270 
271  // TODO: previous version also did /2
272  if (read_time < best_loose_scan_cost)
273  {
274  best_loose_scan_key= key;
275  best_loose_scan_cost= read_time;
276  best_loose_scan_records= records;
277  best_max_loose_keypart= max_loose_keypart;
278  best_loose_scan_start_key= start_key;
279  }
280  }
281  }
282  }
283 
298  void check_ref_access_part2(uint key, Key_use *start_key, double records,
299  double read_time)
300  {
301  if (part1_conds_met && read_time < best_loose_scan_cost)
302  {
303  /* TODO use rec-per-key-based fanout calculations */
304  best_loose_scan_key= key;
305  best_loose_scan_cost= read_time;
306  best_loose_scan_records= records;
307  best_max_loose_keypart= max_loose_keypart;
308  best_loose_scan_start_key= start_key;
309  }
310  }
311 
312  void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick)
313  {
314  /* TODO: this the right part restriction: */
315  if (quick_uses_applicable_index && idx == join->const_tables &&
316  quick->read_time < best_loose_scan_cost)
317  {
318  best_loose_scan_key= quick->index;
319  best_loose_scan_cost= quick->read_time;
320  /* this is ok because idx == join->const_tables */
321  best_loose_scan_records= rows2double(quick->records);
322  best_max_loose_keypart= quick_max_loose_keypart;
323  best_loose_scan_start_key= NULL;
324  }
325  }
326 
327  void save_to_position(JOIN_TAB *tab, POSITION *pos)
328  {
329  pos->read_time= best_loose_scan_cost;
330  if (best_loose_scan_cost != DBL_MAX)
331  {
332  pos->records_read= best_loose_scan_records;
333  pos->key= best_loose_scan_start_key;
334  pos->loosescan_key= best_loose_scan_key;
335  pos->loosescan_parts= best_max_loose_keypart + 1;
336  pos->use_join_buffer= FALSE;
337  pos->table= tab;
338  // todo need ref_depend_map ?
339  DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
340  tab->table->key_info[best_loose_scan_key].name,
341  best_loose_scan_start_key? "(ref access)":
342  "(range/index access)"));
343  }
344  }
345 };
346 
347 
348 static uint
349 max_part_bit(key_part_map bits)
350 {
351  uint found;
352  for (found=0; bits & 1 ; found++,bits>>=1) ;
353  return found;
354 }
355 
356 static uint
357 cache_record_length(JOIN *join,uint idx)
358 {
359  uint length=0;
360  JOIN_TAB **pos,**end;
361  THD *thd=join->thd;
362 
363  for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
364  pos != end ;
365  pos++)
366  {
367  JOIN_TAB *join_tab= *pos;
368  if (!join_tab->used_fieldlength) /* Not calced yet */
369  calc_used_field_length(thd, join_tab);
370  length+=join_tab->used_fieldlength;
371  }
372  return length;
373 }
374 
375 
399 void Optimize_table_order::best_access_path(
400  JOIN_TAB *s,
401  table_map remaining_tables,
402  uint idx,
403  bool disable_jbuf,
404  double record_count,
405  POSITION *pos,
406  POSITION *loose_scan_pos)
407 {
408  Key_use *best_key= NULL;
409  uint best_max_key_part= 0;
410  bool found_constraint= false;
411  double best= DBL_MAX;
412  double best_time= DBL_MAX;
413  double records= DBL_MAX;
414  table_map best_ref_depends_map= 0;
415  double tmp;
416  bool best_uses_jbuf= false;
417  Opt_trace_context * const trace= &thd->opt_trace;
418 
419  status_var_increment(thd->status_var.last_query_partial_plans);
420 
421  /*
422  Cannot use join buffering if either
423  1. This is the first table in the join sequence, or
424  2. Join buffering is not enabled
425  (Only Block Nested Loop is considered in this context)
426  */
427  disable_jbuf= disable_jbuf ||
428  idx == join->const_tables || // 1
429  !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BNL); // 2
430 
431  Loose_scan_opt loose_scan_opt;
432  DBUG_ENTER("Optimize_table_order::best_access_path");
433 
434  Opt_trace_object trace_wrapper(trace, "best_access_path");
435  Opt_trace_array trace_paths(trace, "considered_access_paths");
436 
437  {
438  /*
439  Loose-scan specific-logic:
440  - we must decide whether this is within the dups_producing range.
441  - if 'pos' is within the JOIN::positions array, then decide this
442  by using the pos[-1] entry.
443  - if 'pos' is not in the JOIN::position array then
444  in_dups_producing_range must be false (this case may occur in
445  semijoin_*_access_paths() which calls best_access_path() with 'pos'
446  allocated on the stack).
447  @todo One day Loose-scan will be considered in advance_sj_state() only,
448  outside best_access_path(), so this complicated logic will not be
449  needed.
450  */
451  const bool in_dups_producing_range=
452  (idx == join->const_tables) ?
453  false :
454  (pos == (join->positions + idx) ?
455  (pos[-1].dups_producing_tables != 0) :
456  false);
457  loose_scan_opt.init(s, remaining_tables, in_dups_producing_range,
458  emb_sjm_nest != NULL);
459  }
460 
461  /*
462  This isn't unlikely at all, but unlikely() cuts 6% CPU time on a 20-table
463  search when s->keyuse==0, and has no cost when s->keyuse!=0.
464  */
465  if (unlikely(s->keyuse != NULL))
466  { /* Use key if possible */
467  TABLE *const table= s->table;
468  double best_records= DBL_MAX;
469 
470  /* Test how we can use keys */
471  ha_rows rec=
472  s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
473  for (Key_use *keyuse=s->keyuse; keyuse->table == table; )
474  {
475  key_part_map found_part= 0;
476  table_map found_ref= 0;
477  const uint key= keyuse->key;
478  uint max_key_part= 0;
479  KEY *const keyinfo= table->key_info+key;
480  const bool ft_key= (keyuse->keypart == FT_KEYPART);
481  /* Bitmap of keyparts where the ref access is over 'keypart=const': */
482  key_part_map const_part= 0;
483  /* The or-null keypart in ref-or-null access: */
484  key_part_map ref_or_null_part= 0;
485 
486  /* Calculate how many key segments of the current key we can use */
487  Key_use *const start_key= keyuse;
488 
489  loose_scan_opt.next_ref_key();
490  DBUG_PRINT("info", ("Considering ref access on key %s",
491  keyuse->table->key_info[keyuse->key].name));
492  Opt_trace_object trace_access_idx(trace);
493  trace_access_idx.add_alnum("access_type", "ref").
494  add_utf8("index", keyinfo->name);
495 
496  // For each keypart
497  while (keyuse->table == table && keyuse->key == key)
498  {
499  const uint keypart= keyuse->keypart;
500  table_map best_part_found_ref= 0;
501  double best_prev_record_reads= DBL_MAX;
502 
503  // For each way to access the keypart
504  for ( ; keyuse->table == table && keyuse->key == key &&
505  keyuse->keypart == keypart ; ++keyuse)
506  {
507  /*
508  When calculating a plan for a materialized semijoin nest,
509  we must not consider key references between tables inside the
510  semijoin nest and those outside of it. The same applies to a
511  materialized subquery.
512  */
513  if ((excluded_tables & keyuse->used_tables))
514  continue;
515  /*
516  if 1. expression doesn't refer to forward tables
517  2. we won't get two ref-or-null's
518  */
519  if (!(remaining_tables & keyuse->used_tables) &&
520  !(ref_or_null_part && (keyuse->optimize &
521  KEY_OPTIMIZE_REF_OR_NULL)))
522  {
523  found_part|= keyuse->keypart_map;
524  if (!(keyuse->used_tables & ~join->const_table_map))
525  const_part|= keyuse->keypart_map;
526 
527  double tmp2= prev_record_reads(join, idx, (found_ref |
528  keyuse->used_tables));
529  if (tmp2 < best_prev_record_reads)
530  {
531  best_part_found_ref= keyuse->used_tables & ~join->const_table_map;
532  best_prev_record_reads= tmp2;
533  }
534  if (rec > keyuse->ref_table_rows)
535  rec= keyuse->ref_table_rows;
536  /*
537  If there is one 'key_column IS NULL' expression, we can
538  use this ref_or_null optimisation of this field
539  */
540  if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
541  ref_or_null_part |= keyuse->keypart_map;
542  }
543  loose_scan_opt.add_keyuse(remaining_tables, keyuse);
544  }
545  found_ref|= best_part_found_ref;
546  }
547 
548  /*
549  Assume that that each key matches a proportional part of table.
550  */
551  if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
552  {
553  trace_access_idx.add("usable", false);
554  goto done_with_index; // Nothing usable found
555  }
556 
558  rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
559 
560  /*
561  ft-keys require special treatment
562  */
563  if (ft_key)
564  {
565  /*
566  Really, there should be records=0.0 (yes!)
567  but 1.0 would be probably safer
568  */
569  tmp= prev_record_reads(join, idx, found_ref);
570  records= 1.0;
571  }
572  else
573  {
574  found_constraint= test(found_part);
575  loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
576 
577  /* Check if we found full key */
578  if (found_part == LOWER_BITS(key_part_map, actual_key_parts(keyinfo)) &&
579  !ref_or_null_part)
580  { /* use eq key */
581  max_key_part= (uint) ~0;
582  if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
583  {
584  tmp = prev_record_reads(join, idx, found_ref);
585  records=1.0;
586  }
587  else
588  {
589  if (!found_ref)
590  { /* We found a const key */
591  /*
592  ReuseRangeEstimateForRef-1:
593  We get here if we've found a ref(const) (c_i are constants):
594  "(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond]
595 
596  If range optimizer was able to construct a "range"
597  access on this index, then its condition "quick_cond" was
598  eqivalent to ref_const_cond (*), and we can re-use E(#rows)
599  from the range optimizer.
600 
601  Proof of (*): By properties of range and ref optimizers
602  quick_cond will be equal or tighther than ref_const_cond.
603  ref_const_cond already covers "smallest" possible interval -
604  a singlepoint interval over all keyparts. Therefore,
605  quick_cond is equivalent to ref_const_cond (if it was an
606  empty interval we wouldn't have got here).
607  */
608  if (table->quick_keys.is_set(key))
609  records= (double) table->quick_rows[key];
610  else
611  {
612  /* quick_range couldn't use key! */
613  records= (double) s->records/rec;
614  }
615  }
616  else
617  {
618  if (!(records= keyinfo->rec_per_key[actual_key_parts(keyinfo)-1]))
619  { /* Prefer longer keys */
620  records=
621  ((double) s->records / (double) rec *
622  (1.0 +
623  ((double) (table->s->max_key_length-keyinfo->key_length) /
624  (double) table->s->max_key_length)));
625  if (records < 2.0)
626  records=2.0; /* Can't be as good as a unique */
627  }
628  /*
629  ReuseRangeEstimateForRef-2: We get here if we could not reuse
630  E(#rows) from range optimizer. Make another try:
631 
632  If range optimizer produced E(#rows) for a prefix of the ref
633  access we're considering, and that E(#rows) is lower then our
634  current estimate, make an adjustment. The criteria of when we
635  can make an adjustment is a special case of the criteria used
636  in ReuseRangeEstimateForRef-3.
637  */
638  if (table->quick_keys.is_set(key) &&
639  (const_part &
640  (((key_part_map)1 << table->quick_key_parts[key])-1)) ==
641  (((key_part_map)1 << table->quick_key_parts[key])-1) &&
642  table->quick_n_ranges[key] == 1 &&
643  records > (double) table->quick_rows[key])
644  {
645  records= (double) table->quick_rows[key];
646  }
647  }
648  /* Limit the number of matched rows */
649  tmp= records;
650  set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
651  if (table->covering_keys.is_set(key))
652  {
653  /* we can use only index tree */
654  tmp= record_count * table->file->index_only_read_time(key, tmp);
655  }
656  else
657  tmp= record_count*min(tmp,s->worst_seeks);
658  }
659  }
660  else
661  {
662  /*
663  Use as much key-parts as possible and a uniq key is better
664  than a not unique key
665  Set tmp to (previous record count) * (records / combination)
666  */
667  if ((found_part & 1) &&
668  (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
669  found_part == LOWER_BITS(key_part_map,
670  actual_key_parts(keyinfo))))
671  {
672  max_key_part= max_part_bit(found_part);
673  /*
674  ReuseRangeEstimateForRef-3:
675  We're now considering a ref[or_null] access via
676  (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
677  (same-as-above but with one cond replaced
678  with "t.keypart_i IS NULL")] (**)
679 
680  Try re-using E(#rows) from "range" optimizer:
681  We can do so if "range" optimizer used the same intervals as
682  in (**). The intervals used by range optimizer may be not
683  available at this point (as "range" access might have choosen to
684  create quick select over another index), so we can't compare
685  them to (**). We'll make indirect judgements instead.
686  The sufficient conditions for re-use are:
687  (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
688  this is not satisfied we have no way to know which ranges
689  will be actually scanned by 'ref' until we execute the
690  join)
691  (C2) max #key parts in 'range' access == K == max_key_part (this
692  is apparently a necessary requirement)
693 
694  We also have a property that "range optimizer produces equal or
695  tighter set of scan intervals than ref(const) optimizer". Each
696  of the intervals in (**) are "tightest possible" intervals when
697  one limits itself to using keyparts 1..K (which we do in #2).
698  From here it follows that range access used either one, or
699  both of the (I1) and (I2) intervals:
700 
701  (t.keypart1=c1 AND ... AND t.keypartK=eK) (I1)
702  (same-as-above but with one cond replaced
703  with "t.keypart_i IS NULL") (I2)
704 
705  The remaining part is to exclude the situation where range
706  optimizer used one interval while we're considering
707  ref-or-null and looking for estimate for two intervals. This
708  is done by last limitation:
709 
710  (C3) "range optimizer used (have ref_or_null?2:1) intervals"
711  */
712  if (table->quick_keys.is_set(key) && !found_ref && //(C1)
713  table->quick_key_parts[key] == max_key_part && //(C2)
714  table->quick_n_ranges[key] == 1+test(ref_or_null_part)) //(C3)
715  {
716  tmp= records= (double) table->quick_rows[key];
717  }
718  else
719  {
720  /* Check if we have statistic about the distribution */
721  if ((records= keyinfo->rec_per_key[max_key_part-1]))
722  {
723  /*
724  Fix for the case where the index statistics is too
725  optimistic: If
726  (1) We're considering ref(const) and there is quick select
727  on the same index,
728  (2) and that quick select uses more keyparts (i.e. it will
729  scan equal/smaller interval then this ref(const))
730  (3) and E(#rows) for quick select is higher then our
731  estimate,
732  Then
733  We'll use E(#rows) from quick select.
734 
735  One observation is that when there are multiple
736  indexes with a common prefix (eg (b) and (b, c)) we
737  are not always selecting (b, c) even when this can
738  use more keyparts. Inaccuracies in statistics from
739  the storage engines can cause the record estimate
740  for the quick object for (b) to be lower than the
741  record estimate for the quick object for (b,c).
742 
743  Q: Why do we choose to use 'ref'? Won't quick select be
744  cheaper in some cases ?
745  TODO: figure this out and adjust the plan choice if needed.
746  */
747  if (!found_ref && table->quick_keys.is_set(key) && // (1)
748  table->quick_key_parts[key] > max_key_part && // (2)
749  records < (double)table->quick_rows[key]) // (3)
750  records= (double)table->quick_rows[key];
751 
752  tmp= records;
753  }
754  else
755  {
756  /*
757  Assume that the first key part matches 1% of the file
758  and that the whole key matches 10 (duplicates) or 1
759  (unique) records.
760  Assume also that more key matches proportionally more
761  records
762  This gives the formula:
763  records = (x * (b-a) + a*c-b)/(c-1)
764 
765  b = records matched by whole key
766  a = records matched by first key part (1% of all records?)
767  c = number of key parts in key
768  x = used key parts (1 <= x <= c)
769  */
770  double rec_per_key;
771  if (!(rec_per_key=(double)
772  keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1]))
773  rec_per_key=(double) s->records/rec+1;
774 
775  if (!s->records)
776  tmp = 0;
777  else if (rec_per_key/(double) s->records >= 0.01)
778  tmp = rec_per_key;
779  else
780  {
781  double a=s->records*0.01;
782  if (keyinfo->user_defined_key_parts > 1)
783  tmp= (max_key_part * (rec_per_key - a) +
784  a * keyinfo->user_defined_key_parts - rec_per_key) /
785  (keyinfo->user_defined_key_parts - 1);
786  else
787  tmp= a;
788  set_if_bigger(tmp,1.0);
789  }
790  records = (ulong) tmp;
791  }
792 
793  if (ref_or_null_part)
794  {
795  /* We need to do two key searches to find key */
796  tmp *= 2.0;
797  records *= 2.0;
798  }
799 
800  /*
801  ReuseRangeEstimateForRef-4: We get here if we could not reuse
802  E(#rows) from range optimizer. Make another try:
803 
804  If range optimizer produced E(#rows) for a prefix of the ref
805  access we're considering, and that E(#rows) is lower then our
806  current estimate, make the adjustment.
807 
808  The decision whether we can re-use the estimate from the range
809  optimizer is the same as in ReuseRangeEstimateForRef-3,
810  applied to first table->quick_key_parts[key] key parts.
811  */
812  if (table->quick_keys.is_set(key) &&
813  table->quick_key_parts[key] <= max_key_part &&
814  const_part &
815  ((key_part_map)1 << table->quick_key_parts[key]) &&
816  table->quick_n_ranges[key] == 1 + test(ref_or_null_part &
817  const_part) &&
818  records > (double) table->quick_rows[key])
819  {
820  tmp= records= (double) table->quick_rows[key];
821  }
822  }
823 
824  /* Limit the number of matched rows */
825  set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
826  if (table->covering_keys.is_set(key))
827  {
828  /* we can use only index tree */
829  tmp= record_count * table->file->index_only_read_time(key, tmp);
830  }
831  else
832  tmp= record_count * min(tmp,s->worst_seeks);
833  }
834  else
835  tmp= best_time; // Do nothing
836  }
837  // {semijoin LooseScan + ref} is disabled
838 #if 0
839  loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
840 #endif
841 
842  } /* not ft_key */
843 
844  {
845  const double idx_time= tmp + records * ROW_EVALUATE_COST;
846  trace_access_idx.add("rows", records).add("cost", idx_time);
847  if (idx_time < best_time)
848  {
849  best_time= idx_time;
850  best= tmp;
851  best_records= records;
852  best_key= start_key;
853  best_max_key_part= max_key_part;
854  best_ref_depends_map= found_ref;
855  }
856  }
857  done_with_index:
858  trace_access_idx.add("chosen", best_key == start_key);
859  } /* for each key */
860  records= best_records;
861  }
862 
863  Opt_trace_object trace_access_scan(trace);
864  /*
865  Don't test table scan if it can't be better.
866  Prefer key lookup if we would use the same key for scanning.
867 
868  Don't do a table scan on InnoDB tables, if we can read the used
869  parts of the row from any of the used index.
870  This is because table scans uses index and we would not win
871  anything by using a table scan. The only exception is INDEX_MERGE
872  quick select. We can not say for sure that INDEX_MERGE quick select
873  is always faster than ref access. So it's necessary to check if
874  ref access is more expensive.
875 
876  A word for word translation of the below if-statement in sergefp's
877  understanding: we check if we should use table scan if:
878  (1) The found 'ref' access produces more records than a table scan
879  (or index scan, or quick select), or 'ref' is more expensive than
880  any of them.
881  (2) This doesn't hold: the best way to perform table scan is to to perform
882  'range' access using index IDX, and the best way to perform 'ref'
883  access is to use the same index IDX, with the same or more key parts.
884  (note: it is not clear how this rule is/should be extended to
885  index_merge quick selects)
886  (3) See above note about InnoDB.
887  (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access
888  path, but there is no quick select)
889  If the condition in the above brackets holds, then the only possible
890  "table scan" access method is ALL/index (there is no quick select).
891  Since we have a 'ref' access path, and FORCE INDEX instructs us to
892  choose it over ALL/index, there is no need to consider a full table
893  scan.
894  */
895  if (!(records >= s->found_records || best > s->read_time)) // (1)
896  {
897  // "scan" means (full) index scan or (full) table scan.
898  trace_access_scan.add_alnum("access_type", s->quick ? "range" : "scan").
899  add("cost", s->read_time + s->found_records * ROW_EVALUATE_COST).
900  add("rows", s->found_records).
901  add_alnum("cause", "cost");
902 
903  goto skip_table_scan;
904  }
905 
906  if ((s->quick && best_key && s->quick->index == best_key->key && // (2)
907  best_max_key_part >= s->table->quick_key_parts[best_key->key])) // (2)
908  {
909  trace_access_scan.add_alnum("access_type", "range").
910  add_alnum("cause", "heuristic_index_cheaper");
911  goto skip_table_scan;
912  }
913 
914  if ((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && //(3)
915  !s->table->covering_keys.is_clear_all() && best_key && //(3)
916  (!s->quick || //(3)
917  (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&//(3)
918  best < s->quick->read_time))) //(3)
919  {
920  trace_access_scan.add_alnum("access_type", s->quick ? "range" : "scan").
921  add_alnum("cause", "covering_index_better_than_full_scan");
922  goto skip_table_scan;
923  }
924 
925  if ((s->table->force_index && best_key && !s->quick)) // (4)
926  {
927  trace_access_scan.add_alnum("access_type", "scan").
928  add_alnum("cause", "force_index");
929  goto skip_table_scan;
930  }
931 
932  { // Check full join
933  ha_rows rnd_records= s->found_records;
934  /*
935  If there is a filtering condition on the table (i.e. ref analyzer found
936  at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
937  preceding this table in the join order we're now considering), then
938  assume that 25% of the rows will be filtered out by this condition.
939 
940  This heuristic is supposed to force tables used in exprZ to be before
941  this table in join order.
942  */
943  if (found_constraint)
944  rnd_records-= rnd_records/4;
945 
946  /*
947  If applicable, get a more accurate estimate. Don't use the two
948  heuristics at once.
949  */
950  if (s->table->quick_condition_rows != s->found_records)
951  rnd_records= s->table->quick_condition_rows;
952 
953  /*
954  Range optimizer never proposes a RANGE if it isn't better
955  than FULL: so if RANGE is present, it's always preferred to FULL.
956  Here we estimate its cost.
957  */
958 
959  if (s->quick)
960  {
961  trace_access_scan.add_alnum("access_type", "range");
962  /*
963  For each record we:
964  - read record range through 'quick'
965  - skip rows which does not satisfy WHERE constraints
966  TODO:
967  We take into account possible use of join cache for ALL/index
968  access (see first else-branch below), but we don't take it into
969  account here for range/index_merge access. Find out why this is so.
970  */
971  tmp= record_count *
972  (s->quick->read_time +
973  (s->found_records - rnd_records) * ROW_EVALUATE_COST);
974 
975  loose_scan_opt.check_range_access(join, idx, s->quick);
976  }
977  else
978  {
979  trace_access_scan.add_alnum("access_type", "scan");
980  /* Estimate cost of reading table. */
981  if (s->table->force_index && !best_key) // index scan
982  tmp= s->table->file->read_time(s->ref.key, 1, s->records);
983  else // table scan
984  tmp= s->table->file->scan_time();
985 
986  if (disable_jbuf)
987  {
988  /*
989  For each record we have to:
990  - read the whole table record
991  - skip rows which does not satisfy join condition
992  */
993  tmp= record_count *
994  (tmp + (s->records - rnd_records) * ROW_EVALUATE_COST);
995  }
996  else
997  {
998  trace_access_scan.add("using_join_cache", true);
999  /*
1000  We read the table as many times as join buffer becomes full.
1001  It would be more exact to round the result of the division with
1002  floor(), but that takes 5% of time in a 20-table query plan search.
1003  */
1004  tmp*= (1.0 + ((double) cache_record_length(join,idx) *
1005  record_count /
1006  (double) thd->variables.join_buff_size));
1007  /*
1008  We don't make full cartesian product between rows in the scanned
1009  table and existing records because we skip all rows from the
1010  scanned table, which does not satisfy join condition when
1011  we read the table (see flush_cached_records for details). Here we
1012  take into account cost to read and skip these records.
1013  */
1014  tmp+= (s->records - rnd_records) * ROW_EVALUATE_COST;
1015  }
1016  }
1017 
1018  const double scan_cost=
1019  tmp + (record_count * ROW_EVALUATE_COST * rnd_records);
1020 
1021  trace_access_scan.add("rows", rows2double(rnd_records)).
1022  add("cost", scan_cost);
1023  /*
1024  We estimate the cost of evaluating WHERE clause for found records
1025  as record_count * rnd_records * ROW_EVALUATE_COST. This cost plus
1026  tmp give us total cost of using TABLE SCAN
1027  */
1028  if (best == DBL_MAX ||
1029  (scan_cost < best + (record_count * ROW_EVALUATE_COST * records)))
1030  {
1031  /*
1032  If the table has a range (s->quick is set) make_join_select()
1033  will ensure that this will be used
1034  */
1035  best= tmp;
1036  records= rows2double(rnd_records);
1037  best_key= 0;
1038  /* range/index_merge/ALL/index access method are "independent", so: */
1039  best_ref_depends_map= 0;
1040  best_uses_jbuf= test(!disable_jbuf);
1041  }
1042  }
1043 
1044 skip_table_scan:
1045  trace_access_scan.add("chosen", best_key == NULL);
1046 
1047  /* Update the cost information for the current partial plan */
1048  pos->records_read= records;
1049  pos->read_time= best;
1050  pos->key= best_key;
1051  pos->table= s;
1052  pos->ref_depend_map= best_ref_depends_map;
1053  pos->loosescan_key= MAX_KEY;
1054  pos->use_join_buffer= best_uses_jbuf;
1055 
1056  loose_scan_opt.save_to_position(s, loose_scan_pos);
1057 
1058  if (!best_key &&
1059  idx == join->const_tables &&
1060  s->table == join->sort_by_table &&
1061  join->unit->select_limit_cnt >= records)
1062  {
1063  trace_access_scan.add("use_tmp_table", true);
1064  join->sort_by_table= (TABLE*) 1; // Must use temporary table
1065  }
1066 
1067  DBUG_VOID_RETURN;
1068 }
1069 
1070 
1089 {
1090  DBUG_ENTER("Optimize_table_order::choose_table_order");
1091 
1092  /* Are there any tables to optimize? */
1093  if (join->const_tables == join->tables)
1094  {
1095  memcpy(join->best_positions, join->positions,
1096  sizeof(POSITION) * join->const_tables);
1097  join->best_read= 1.0;
1098  join->best_rowcount= 1;
1099  DBUG_RETURN(false);
1100  }
1101 
1103 
1104  const bool straight_join= test(join->select_options & SELECT_STRAIGHT_JOIN);
1105  table_map join_tables;
1106 
1107  if (emb_sjm_nest)
1108  {
1109  /* We're optimizing semi-join materialization nest, so put the
1110  tables from this semi-join as first
1111  */
1112  merge_sort(join->best_ref + join->const_tables,
1113  join->best_ref + join->tables,
1114  Join_tab_compare_embedded_first(emb_sjm_nest));
1115  join_tables= emb_sjm_nest->sj_inner_tables;
1116  }
1117  else
1118  {
1119  /*
1120  if (SELECT_STRAIGHT_JOIN option is set)
1121  reorder tables so dependent tables come after tables they depend
1122  on, otherwise keep tables in the order they were specified in the query
1123  else
1124  Apply heuristic: pre-sort all access plans with respect to the number of
1125  records accessed.
1126  */
1127  if (straight_join)
1128  merge_sort(join->best_ref + join->const_tables,
1129  join->best_ref + join->tables,
1131  else
1132  merge_sort(join->best_ref + join->const_tables,
1133  join->best_ref + join->tables,
1135 
1136  join_tables= join->all_table_map & ~join->const_table_map;
1137  }
1138 
1139  Opt_trace_object wrapper(&join->thd->opt_trace);
1141  trace_plan(&join->thd->opt_trace, "considered_execution_plans",
1142  Opt_trace_context::GREEDY_SEARCH);
1143  if (straight_join)
1144  optimize_straight_join(join_tables);
1145  else
1146  {
1147  if (greedy_search(join_tables))
1148  DBUG_RETURN(true);
1149  }
1150 
1151  // Remaining part of this function not needed when processing semi-join nests.
1152  if (emb_sjm_nest)
1153  DBUG_RETURN(false);
1154 
1155  // Fix semi-join strategies and perform final cost calculation.
1156  if (fix_semijoin_strategies())
1157  DBUG_RETURN(true);
1158 
1159  DBUG_RETURN(false);
1160 }
1161 
1162 
1196 uint Optimize_table_order::determine_search_depth(uint search_depth,
1197  uint table_count)
1198 {
1199  if (search_depth > 0)
1200  return search_depth;
1201  /* TODO: this value should be determined dynamically, based on statistics: */
1202  const uint max_tables_for_exhaustive_opt= 7;
1203 
1204  if (table_count <= max_tables_for_exhaustive_opt)
1205  search_depth= table_count+1; // use exhaustive for small number of tables
1206  else
1207  /*
1208  TODO: this value could be determined by some mapping of the form:
1209  depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
1210  */
1211  search_depth= max_tables_for_exhaustive_opt; // use greedy search
1212 
1213  return search_depth;
1214 }
1215 
1216 
1238 void Optimize_table_order::optimize_straight_join(table_map join_tables)
1239 {
1240  JOIN_TAB *s;
1241  uint idx= join->const_tables;
1242  double record_count= 1.0;
1243  double read_time= 0.0;
1244 
1245  Opt_trace_context * const trace= &join->thd->opt_trace;
1246  for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
1247  {
1248  POSITION * const position= join->positions + idx;
1249  Opt_trace_object trace_table(trace);
1250  if (unlikely(trace->is_started()))
1251  {
1252  trace_plan_prefix(join, idx, excluded_tables);
1253  trace_table.add_utf8_table(s->table);
1254  }
1255  /*
1256  Dependency computation (make_join_statistics()) and proper ordering
1257  based on them (join_tab_cmp*) guarantee that this order is compatible
1258  with execution, check it:
1259  */
1260  DBUG_ASSERT(!check_interleaving_with_nj(s));
1261  /* Find the best access method from 's' to the current partial plan */
1262  POSITION loose_scan_pos;
1263  best_access_path(s, join_tables, idx, false, record_count,
1264  position, &loose_scan_pos);
1265 
1266  /* compute the cost of the new plan extended with 's' */
1267  record_count*= position->records_read;
1268  read_time+= position->read_time;
1269  read_time+= record_count * ROW_EVALUATE_COST;
1270  position->set_prefix_costs(read_time, record_count);
1271 
1272  // see similar if() in best_extension_by_limited_search
1273  if (!join->select_lex->sj_nests.is_empty())
1274  advance_sj_state(join_tables, s, idx, &record_count, &read_time,
1275  &loose_scan_pos);
1276  else
1277  position->no_semijoin();
1278 
1279  trace_table.add("cost_for_plan", read_time).
1280  add("rows_for_plan", record_count);
1281  join_tables&= ~(s->table->map);
1282  ++idx;
1283  }
1284 
1285  if (join->sort_by_table &&
1286  join->sort_by_table != join->positions[join->const_tables].table->table)
1287  read_time+= record_count; // We have to make a temp table
1288 
1289  memcpy(join->best_positions, join->positions, sizeof(POSITION)*idx);
1290 
1297  join->best_read= read_time - 0.001;
1298  join->best_rowcount= (ha_rows)record_count;
1299 }
1300 
1301 
1331 static int
1332 semijoin_order_allows_materialization(const JOIN *join,
1333  table_map remaining_tables,
1334  const JOIN_TAB *tab, uint idx)
1335 {
1336  DBUG_ASSERT(!(remaining_tables & tab->table->map));
1337  /*
1338  Check if
1339  1. We're in a semi-join nest that can be run with SJ-materialization
1340  2. All the tables from the subquery are in the prefix
1341  */
1342  const TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
1343  if (!emb_sj_nest ||
1344  !emb_sj_nest->nested_join->sjm.positions ||
1345  (remaining_tables & emb_sj_nest->sj_inner_tables))
1346  return SJ_OPT_NONE;
1347 
1348  /*
1349  Walk back and check if all immediately preceding tables are from
1350  this semi-join.
1351  */
1352  const uint n_tables= my_count_bits(emb_sj_nest->sj_inner_tables);
1353  for (uint i= 1; i < n_tables ; i++)
1354  {
1355  if (join->positions[idx - i].table->emb_sj_nest != emb_sj_nest)
1356  return SJ_OPT_NONE;
1357  }
1358 
1359  /*
1360  Must use MaterializeScan strategy if there are outer correlated tables
1361  among the remaining tables, otherwise, if possible, use MaterializeLookup.
1362  */
1363  if ((remaining_tables & emb_sj_nest->nested_join->sj_depends_on) ||
1364  !emb_sj_nest->nested_join->sjm.lookup_allowed)
1365  {
1366  if (emb_sj_nest->nested_join->sjm.scan_allowed)
1367  return SJ_OPT_MATERIALIZE_SCAN;
1368  return SJ_OPT_NONE;
1369  }
1370  return SJ_OPT_MATERIALIZE_LOOKUP;
1371 }
1372 
1373 
1454 bool Optimize_table_order::greedy_search(table_map remaining_tables)
1455 {
1456  double record_count= 1.0;
1457  double read_time= 0.0;
1458  uint idx= join->const_tables; // index into 'join->best_ref'
1459  uint best_idx;
1460  POSITION best_pos;
1461  JOIN_TAB *best_table; // the next plan node to be added to the curr QEP
1462  DBUG_ENTER("Optimize_table_order::greedy_search");
1463 
1464  /* Number of tables that we are optimizing */
1465  const uint n_tables= my_count_bits(remaining_tables);
1466 
1467  /* Number of tables remaining to be optimized */
1468  uint size_remain= n_tables;
1469 
1470  do {
1471  /* Find the extension of the current QEP with the lowest cost */
1472  join->best_read= DBL_MAX;
1473  join->best_rowcount= HA_POS_ERROR;
1474  if (best_extension_by_limited_search(remaining_tables, idx,
1475  record_count, read_time,
1476  search_depth))
1477  DBUG_RETURN(true);
1478  /*
1479  'best_read < DBL_MAX' means that optimizer managed to find
1480  some plan and updated 'best_positions' array accordingly.
1481  */
1482  DBUG_ASSERT(join->best_read < DBL_MAX);
1483 
1484  if (size_remain <= search_depth)
1485  {
1486  /*
1487  'join->best_positions' contains a complete optimal extension of the
1488  current partial QEP.
1489  */
1490  DBUG_EXECUTE("opt", print_plan(join, n_tables, record_count, read_time,
1491  read_time, "optimal"););
1492  DBUG_RETURN(false);
1493  }
1494 
1495  /* select the first table in the optimal extension as most promising */
1496  best_pos= join->best_positions[idx];
1497  best_table= best_pos.table;
1498  /*
1499  Each subsequent loop of 'best_extension_by_limited_search' uses
1500  'join->positions' for cost estimates, therefore we have to update its
1501  value.
1502  */
1503  join->positions[idx]= best_pos;
1504 
1505  /*
1506  Search depth is smaller than the number of remaining tables to join.
1507  - Update the interleaving state after extending the current partial plan
1508  with a new table. We are doing this here because
1509  best_extension_by_limited_search reverts the interleaving state to the
1510  one of the non-extended partial plan on exit.
1511  - The semi join state is entirely in POSITION, so it is transferred fine
1512  when we copy POSITION objects (no special handling needed).
1513  - After we have chosen the final plan covering all tables, the nested
1514  join state will not be reverted back to its initial state because we
1515  don't "pop" tables already present in the partial plan.
1516  */
1517  bool is_interleave_error __attribute__((unused))=
1518  check_interleaving_with_nj (best_table);
1519  /* This has been already checked by best_extension_by_limited_search */
1520  DBUG_ASSERT(!is_interleave_error);
1521 
1522  /* find the position of 'best_table' in 'join->best_ref' */
1523  best_idx= idx;
1524  JOIN_TAB *pos= join->best_ref[best_idx];
1525  while (pos && best_table != pos)
1526  pos= join->best_ref[++best_idx];
1527  DBUG_ASSERT((pos != NULL)); // should always find 'best_table'
1528  /*
1529  Maintain '#rows-sorted' order of 'best_ref[]':
1530  - Shift 'best_ref[]' to make first position free.
1531  - Insert 'best_table' at the first free position in the array of joins.
1532  */
1533  memmove(join->best_ref + idx + 1, join->best_ref + idx,
1534  sizeof(JOIN_TAB*) * (best_idx - idx));
1535  join->best_ref[idx]= best_table;
1536 
1537  /* compute the cost of the new plan extended with 'best_table' */
1538  record_count*= join->positions[idx].records_read;
1539  read_time+= join->positions[idx].read_time
1540  + record_count * ROW_EVALUATE_COST;
1541 
1542  remaining_tables&= ~(best_table->table->map);
1543  --size_remain;
1544  ++idx;
1545 
1546  DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time,
1547  read_time, "extended"););
1548  } while (true);
1549 }
1550 
1551 
1552 /*
1553  Calculate a cost of given partial join order
1554 
1555  SYNOPSIS
1556  get_partial_join_cost()
1557  join IN Join to use. join->positions holds the
1558  partial join order
1559  n_tables IN # tables in the partial join order
1560  read_time_arg OUT Store read time here
1561  record_count_arg OUT Store record count here
1562 
1563  DESCRIPTION
1564 
1565  This is needed for semi-join materialization code. The idea is that
1566  we detect sj-materialization after we've put all sj-inner tables into
1567  the join prefix
1568 
1569  prefix-tables semi-join-inner-tables tN
1570  ^--we're here
1571 
1572  and we'll need to get the cost of prefix-tables prefix again.
1573 */
1574 
1575 void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
1576  double *record_count_arg)
1577 {
1578  double record_count= 1;
1579  double read_time= 0.0;
1580  for (uint i= join->const_tables; i < n_tables + join->const_tables ; i++)
1581  {
1582  if (join->best_positions[i].records_read)
1583  {
1584  record_count *= join->best_positions[i].records_read;
1585  read_time += join->best_positions[i].read_time
1586  + record_count * ROW_EVALUATE_COST;
1587  }
1588  }
1589  *read_time_arg= read_time;
1590  *record_count_arg= record_count;
1591 }
1592 
1593 
1608 void Optimize_table_order::consider_plan(uint idx,
1609  double record_count,
1610  double read_time,
1611  Opt_trace_object *trace_obj)
1612 {
1613  /*
1614  We may have to make a temp table, note that this is only a
1615  heuristic since we cannot know for sure at this point.
1616  Hence it may be too pesimistic.
1617  */
1618  if (join->sort_by_table &&
1619  join->sort_by_table !=
1620  join->positions[join->const_tables].table->table)
1621  {
1622  read_time+= record_count;
1623  trace_obj->add("sort_cost", record_count).
1624  add("new_cost_for_plan", read_time);
1625  }
1626 
1627  const bool chosen= read_time < join->best_read;
1628  trace_obj->add("chosen", chosen);
1629  if (chosen)
1630  {
1631  memcpy((uchar*) join->best_positions, (uchar*) join->positions,
1632  sizeof(POSITION) * (idx + 1));
1633 
1634  /*
1635  If many plans have identical cost, which one will be used
1636  depends on how compiler optimizes floating-point calculations.
1637  this fix adds repeatability to the optimizer.
1638  (Similar code in best_extension_by_li...)
1639  */
1640  join->best_read= read_time - 0.001;
1641  join->best_rowcount= (ha_rows)record_count;
1642  }
1643  DBUG_EXECUTE("opt", print_plan(join, idx+1,
1644  record_count,
1645  read_time,
1646  read_time,
1647  "full_plan"););
1648 }
1649 
1650 
1775 bool Optimize_table_order::best_extension_by_limited_search(
1776  table_map remaining_tables,
1777  uint idx,
1778  double record_count,
1779  double read_time,
1780  uint current_search_depth)
1781 {
1782  DBUG_ENTER("Optimize_table_order::best_extension_by_limited_search");
1783 
1784  DBUG_EXECUTE_IF("bug13820776_2", thd->killed= THD::KILL_QUERY;);
1785  if (thd->killed) // Abort
1786  DBUG_RETURN(true);
1787  Opt_trace_context * const trace= &thd->opt_trace;
1788 
1789  /*
1790  'join' is a partial plan with lower cost than the best plan so far,
1791  so continue expanding it further with the tables in 'remaining_tables'.
1792  */
1793  double best_record_count= DBL_MAX;
1794  double best_read_time= DBL_MAX;
1795 
1796  DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
1797  "part_plan"););
1798  /*
1799  No need to call advance_sj_state() when
1800  1) there are no semijoin nests or
1801  2) we are optimizing a materialized semijoin nest.
1802  */
1803  const bool has_sj= !(join->select_lex->sj_nests.is_empty() || emb_sjm_nest);
1804 
1805  /*
1806  'eq_ref_extended' are the 'remaining_tables' which has already been
1807  involved in an partial query plan extension if this QEP. These
1808  will not be considered in further EQ_REF extensions based
1809  on current (partial) QEP.
1810  */
1811  table_map eq_ref_extended(0);
1812 
1813  JOIN_TAB *saved_refs[MAX_TABLES];
1814  // Save 'best_ref[]' as we has to restore before return.
1815  memcpy(saved_refs, join->best_ref + idx,
1816  sizeof(JOIN_TAB*) * (join->tables - idx));
1817 
1818  for (JOIN_TAB **pos= join->best_ref + idx; *pos; pos++)
1819  {
1820  JOIN_TAB *const s= *pos;
1821  const table_map real_table_bit= s->table->map;
1822 
1823  /*
1824  Don't move swap inside conditional code: All items should
1825  be uncond. swapped to maintain '#rows-ordered' best_ref[].
1826  This is critical for early pruning of bad plans.
1827  */
1828  swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
1829 
1830  if ((remaining_tables & real_table_bit) &&
1831  !(eq_ref_extended & real_table_bit) &&
1832  !(remaining_tables & s->dependent) &&
1833  (!idx || !check_interleaving_with_nj(s)))
1834  {
1835  double current_record_count, current_read_time;
1836  Opt_trace_object trace_one_table(trace);
1837  if (unlikely(trace->is_started()))
1838  {
1839  trace_plan_prefix(join, idx, excluded_tables);
1840  trace_one_table.add_utf8_table(s->table);
1841  }
1842  POSITION *const position= join->positions + idx;
1843 
1844  // If optimizing a sj-mat nest, tables in this plan must be in nest:
1845  DBUG_ASSERT(emb_sjm_nest == NULL || emb_sjm_nest == s->emb_sj_nest);
1846  /* Find the best access method from 's' to the current partial plan */
1847  POSITION loose_scan_pos;
1848  best_access_path(s, remaining_tables, idx, false, record_count,
1849  position, &loose_scan_pos);
1850 
1851  /* Compute the cost of extending the plan with 's' */
1852  current_record_count= record_count * position->records_read;
1853  current_read_time= read_time
1854  + position->read_time
1855  + current_record_count * ROW_EVALUATE_COST;
1856  position->set_prefix_costs(current_read_time, current_record_count);
1857 
1858  trace_one_table.add("cost_for_plan", current_read_time).
1859  add("rows_for_plan", current_record_count);
1860 
1861  if (has_sj)
1862  {
1863  /*
1864  Even if there are no semijoins, advance_sj_state() has a significant
1865  cost (takes 9% of time in a 20-table plan search), hence the if()
1866  above, which is also more efficient than the same if() inside
1867  advance_sj_state() would be.
1868  Besides, never call advance_sj_state() when calculating the plan
1869  for a materialized semi-join nest.
1870  */
1871  advance_sj_state(remaining_tables, s, idx,
1872  &current_record_count, &current_read_time,
1873  &loose_scan_pos);
1874  }
1875  else
1876  position->no_semijoin();
1877 
1878  /* Expand only partial plans with lower cost than the best QEP so far */
1879  if (current_read_time >= join->best_read)
1880  {
1881  DBUG_EXECUTE("opt", print_plan(join, idx+1,
1882  current_record_count,
1883  read_time,
1884  current_read_time,
1885  "prune_by_cost"););
1886  trace_one_table.add("pruned_by_cost", true);
1887  backout_nj_state(remaining_tables, s);
1888  continue;
1889  }
1890 
1891  /*
1892  Prune some less promising partial plans. This heuristic may miss
1893  the optimal QEPs, thus it results in a non-exhaustive search.
1894  */
1895  if (prune_level == 1)
1896  {
1897  if (best_record_count > current_record_count ||
1898  best_read_time > current_read_time ||
1899  (idx == join->const_tables && // 's' is the first table in the QEP
1900  s->table == join->sort_by_table))
1901  {
1902  if (best_record_count >= current_record_count &&
1903  best_read_time >= current_read_time &&
1904  /* TODO: What is the reasoning behind this condition? */
1905  (!(s->key_dependent & remaining_tables) ||
1906  position->records_read < 2.0))
1907  {
1908  best_record_count= current_record_count;
1909  best_read_time= current_read_time;
1910  }
1911  }
1912  else
1913  {
1914  DBUG_EXECUTE("opt", print_plan(join, idx+1,
1915  current_record_count,
1916  read_time,
1917  current_read_time,
1918  "pruned_by_heuristic"););
1919  trace_one_table.add("pruned_by_heuristic", true);
1920  backout_nj_state(remaining_tables, s);
1921  continue;
1922  }
1923  }
1924 
1925  const table_map remaining_tables_after=
1926  (remaining_tables & ~real_table_bit);
1927  if ((current_search_depth > 1) && remaining_tables_after)
1928  {
1929  /*
1930  Explore more extensions of plan:
1931  If possible, use heuristic to avoid a full expansion of partial QEP.
1932  Evaluate a simplified EQ_REF extension of QEP if:
1933  1) Pruning is enabled.
1934  2) and, There are tables joined by (EQ_)REF key.
1935  3) and, There is a 1::1 relation between those tables
1936  */
1937  if (prune_level == 1 && // 1)
1938  position->key != NULL && // 2)
1939  position->records_read <= 1.0) // 3)
1940  {
1941  /*
1942  Join in this 'position' is an EQ_REF-joined table, append more EQ_REFs.
1943  We do this only for the first EQ_REF we encounter which will then
1944  include other EQ_REFs from 'remaining_tables' and inform about which
1945  tables was 'eq_ref_extended'. These are later 'pruned' as they was
1946  processed here.
1947  */
1948  if (eq_ref_extended == (table_map)0)
1949  {
1950  /* Try an EQ_REF-joined expansion of the partial plan */
1951  Opt_trace_array trace_rest(trace, "rest_of_plan");
1952  eq_ref_extended= real_table_bit |
1953  eq_ref_extension_by_limited_search(
1954  remaining_tables_after,
1955  idx + 1,
1956  current_record_count,
1957  current_read_time,
1958  current_search_depth - 1);
1959  if (eq_ref_extended == ~(table_map)0)
1960  DBUG_RETURN(true); // Failed
1961 
1962  backout_nj_state(remaining_tables, s);
1963 
1964  if (eq_ref_extended == remaining_tables)
1965  goto done;
1966 
1967  continue;
1968  }
1969  else // Skip, as described above
1970  {
1971  DBUG_EXECUTE("opt", print_plan(join, idx+1,
1972  current_record_count,
1973  read_time,
1974  current_read_time,
1975  "pruned_by_eq_ref_heuristic"););
1976  trace_one_table.add("pruned_by_eq_ref_heuristic", true);
1977  backout_nj_state(remaining_tables, s);
1978  continue;
1979  }
1980  } // if (prunable...)
1981 
1982  /* Fallthrough: Explore more best extensions of plan */
1983  Opt_trace_array trace_rest(trace, "rest_of_plan");
1984  if (best_extension_by_limited_search(remaining_tables_after,
1985  idx + 1,
1986  current_record_count,
1987  current_read_time,
1988  current_search_depth - 1))
1989  DBUG_RETURN(true);
1990  }
1991  else //if ((current_search_depth > 1) && ...
1992  {
1993  consider_plan(idx, current_record_count, current_read_time,
1994  &trace_one_table);
1995  /*
1996  If plan is complete, there should be no "open" outer join nest, and
1997  all semi join nests should be handled by a strategy:
1998  */
1999  DBUG_ASSERT((remaining_tables_after != 0) ||
2000  ((cur_embedding_map == 0) &&
2001  (join->positions[idx].dups_producing_tables == 0)));
2002  }
2003  backout_nj_state(remaining_tables, s);
2004  }
2005  }
2006 
2007 done:
2008  // Restore previous #rows sorted best_ref[]
2009  memcpy(join->best_ref + idx, saved_refs,
2010  sizeof(JOIN_TAB*) * (join->tables-idx));
2011  DBUG_RETURN(false);
2012 }
2013 
2014 
2128 table_map Optimize_table_order::eq_ref_extension_by_limited_search(
2129  table_map remaining_tables,
2130  uint idx,
2131  double record_count,
2132  double read_time,
2133  uint current_search_depth)
2134 {
2135  DBUG_ENTER("Optimize_table_order::eq_ref_extension_by_limited_search");
2136 
2137  if (remaining_tables == 0)
2138  DBUG_RETURN(0);
2139 
2140  const bool has_sj= !(join->select_lex->sj_nests.is_empty() || emb_sjm_nest);
2141 
2142  /*
2143  The section below adds 'eq_ref' joinable tables to the QEP in the order
2144  they are found in the 'remaining_tables' set.
2145  See above description for why we can add these without greedy
2146  cost analysis.
2147  */
2148  Opt_trace_context * const trace= &thd->opt_trace;
2149  table_map eq_ref_ext(0);
2150  JOIN_TAB *s;
2151  JOIN_TAB *saved_refs[MAX_TABLES];
2152  // Save 'best_ref[]' as we has to restore before return.
2153  memcpy(saved_refs, join->best_ref + idx,
2154  sizeof(JOIN_TAB*) * (join->tables-idx));
2155 
2156  for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
2157  {
2158  const table_map real_table_bit= s->table->map;
2159 
2160  /*
2161  Don't move swap inside conditional code: All items
2162  should be swapped to maintain '#rows' ordered tables.
2163  This is critical for early pruning of bad plans.
2164  */
2165  swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
2166 
2167  /*
2168  Consider table for 'eq_ref' heuristic if:
2169  1) It might use a keyref for best_access_path
2170  2) and, Table remains to be handled.
2171  3) and, It is independent of those not yet in partial plan.
2172  4) and, It passed the interleaving check.
2173  */
2174  if (s->keyuse && // 1)
2175  (remaining_tables & real_table_bit) && // 2)
2176  !(remaining_tables & s->dependent) && // 3)
2177  (!idx || !check_interleaving_with_nj(s))) // 4)
2178  {
2179  Opt_trace_object trace_one_table(trace);
2180  if (unlikely(trace->is_started()))
2181  {
2182  trace_plan_prefix(join, idx, excluded_tables);
2183  trace_one_table.add_utf8_table(s->table);
2184  }
2185  POSITION *const position= join->positions + idx;
2186  POSITION loose_scan_pos;
2187 
2188  DBUG_ASSERT(emb_sjm_nest == NULL || emb_sjm_nest == s->emb_sj_nest);
2189  /* Find the best access method from 's' to the current partial plan */
2190  best_access_path(s, remaining_tables, idx, false, record_count,
2191  position, &loose_scan_pos);
2192 
2193  /*
2194  EQ_REF prune logic is based on that all joins
2195  in the ref_extension has the same #rows and cost.
2196  -> The total cost of the QEP is independent of the order
2197  of joins within this 'ref_extension'.
2198  Expand QEP with all 'identical' REFs in
2199  'join->positions' order.
2200  */
2201  const bool added_to_eq_ref_extension=
2202  position->key &&
2203  position->read_time == (position-1)->read_time &&
2204  position->records_read == (position-1)->records_read;
2205  trace_one_table.add("added_to_eq_ref_extension",
2206  added_to_eq_ref_extension);
2207  if (added_to_eq_ref_extension)
2208  {
2209  double current_record_count, current_read_time;
2210 
2211  /* Add the cost of extending the plan with 's' */
2212  current_record_count= record_count * position->records_read;
2213  current_read_time= read_time
2214  + position->read_time
2215  + current_record_count * ROW_EVALUATE_COST;
2216  position->set_prefix_costs(current_read_time, current_record_count);
2217 
2218  trace_one_table.add("cost_for_plan", current_read_time).
2219  add("rows_for_plan", current_record_count);
2220 
2221  if (has_sj)
2222  {
2223  /*
2224  Even if there are no semijoins, advance_sj_state() has a
2225  significant cost (takes 9% of time in a 20-table plan search),
2226  hence the if() above, which is also more efficient than the
2227  same if() inside advance_sj_state() would be.
2228  */
2229  advance_sj_state(remaining_tables, s, idx,
2230  &current_record_count, &current_read_time,
2231  &loose_scan_pos);
2232  }
2233  else
2234  position->no_semijoin();
2235 
2236  // Expand only partial plans with lower cost than the best QEP so far
2237  if (current_read_time >= join->best_read)
2238  {
2239  DBUG_EXECUTE("opt", print_plan(join, idx+1,
2240  current_record_count,
2241  read_time,
2242  current_read_time,
2243  "prune_by_cost"););
2244  trace_one_table.add("pruned_by_cost", true);
2245  backout_nj_state(remaining_tables, s);
2246  continue;
2247  }
2248 
2249  eq_ref_ext= real_table_bit;
2250  const table_map remaining_tables_after=
2251  (remaining_tables & ~real_table_bit);
2252  if ((current_search_depth > 1) && remaining_tables_after)
2253  {
2254  DBUG_EXECUTE("opt", print_plan(join, idx + 1,
2255  current_record_count,
2256  read_time,
2257  current_read_time,
2258  "EQ_REF_extension"););
2259 
2260  /* Recursively EQ_REF-extend the current partial plan */
2261  Opt_trace_array trace_rest(trace, "rest_of_plan");
2262  eq_ref_ext|=
2263  eq_ref_extension_by_limited_search(remaining_tables_after,
2264  idx + 1,
2265  current_record_count,
2266  current_read_time,
2267  current_search_depth - 1);
2268  }
2269  else
2270  {
2271  consider_plan(idx, current_record_count, current_read_time,
2272  &trace_one_table);
2273  DBUG_ASSERT((remaining_tables_after != 0) ||
2274  ((cur_embedding_map == 0) &&
2275  (join->positions[idx].dups_producing_tables == 0)));
2276  }
2277  backout_nj_state(remaining_tables, s);
2278  memcpy(join->best_ref + idx, saved_refs,
2279  sizeof(JOIN_TAB*) * (join->tables - idx));
2280  DBUG_RETURN(eq_ref_ext);
2281  } // if (added_to_eq_ref_extension)
2282 
2283  backout_nj_state(remaining_tables, s);
2284  } // if (... !check_interleaving_with_nj() ...)
2285  } // for (JOIN_TAB **pos= ...)
2286 
2287  memcpy(join->best_ref + idx, saved_refs, sizeof(JOIN_TAB*) * (join->tables-idx));
2288  /*
2289  'eq_ref' heuristc didn't find a table to be appended to
2290  the query plan. We need to use the greedy search
2291  for finding the next table to be added.
2292  */
2293  DBUG_ASSERT(!eq_ref_ext);
2294  if (best_extension_by_limited_search(remaining_tables,
2295  idx,
2296  record_count,
2297  read_time,
2298  current_search_depth))
2299  DBUG_RETURN(~(table_map)0);
2300 
2301  DBUG_RETURN(eq_ref_ext);
2302 }
2303 
2304 
2305 /*
2306  Get the number of different row combinations for subset of partial join
2307 
2308  SYNOPSIS
2309  prev_record_reads()
2310  join The join structure
2311  idx Number of tables in the partial join order (i.e. the
2312  partial join order is in join->positions[0..idx-1])
2313  found_ref Bitmap of tables for which we need to find # of distinct
2314  row combinations.
2315 
2316  DESCRIPTION
2317  Given a partial join order (in join->positions[0..idx-1]) and a subset of
2318  tables within that join order (specified in found_ref), find out how many
2319  distinct row combinations of subset tables will be in the result of the
2320  partial join order.
2321 
2322  This is used as follows: Suppose we have a table accessed with a ref-based
2323  method. The ref access depends on current rows of tables in found_ref.
2324  We want to count # of different ref accesses. We assume two ref accesses
2325  will be different if at least one of access parameters is different.
2326  Example: consider a query
2327 
2328  SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field
2329 
2330  and a join order:
2331  t1, ref access on t1.key=c1
2332  t2, ref access on t2.key=c2
2333  t3, ref access on t3.key=t1.field
2334 
2335  For t1: n_ref_scans = 1, n_distinct_ref_scans = 1
2336  For t2: n_ref_scans = records_read(t1), n_distinct_ref_scans=1
2337  For t3: n_ref_scans = records_read(t1)*records_read(t2)
2338  n_distinct_ref_scans = #records_read(t1)
2339 
2340  The reason for having this function (at least the latest version of it)
2341  is that we need to account for buffering in join execution.
2342 
2343  An edge-case example: if we have a non-first table in join accessed via
2344  ref(const) or ref(param) where there is a small number of different
2345  values of param, then the access will likely hit the disk cache and will
2346  not require any disk seeks.
2347 
2348  The proper solution would be to assume an LRU disk cache of some size,
2349  calculate probability of cache hits, etc. For now we just count
2350  identical ref accesses as one.
2351 
2352  RETURN
2353  Expected number of row combinations
2354 */
2355 
2356 static double
2357 prev_record_reads(JOIN *join, uint idx, table_map found_ref)
2358 {
2359  double found=1.0;
2360  POSITION *pos_end= join->positions - 1;
2361  for (POSITION *pos= join->positions + idx - 1; pos != pos_end; pos--)
2362  {
2363  if (pos->table->table->map & found_ref)
2364  {
2365  found_ref|= pos->ref_depend_map;
2366  /*
2367  For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table
2368  with no matching row we will get position[t2].records_read==0.
2369  Actually the size of output is one null-complemented row, therefore
2370  we will use value of 1 whenever we get records_read==0.
2371 
2372  Note
2373  - the above case can't occur if inner part of outer join has more
2374  than one table: table with no matches will not be marked as const.
2375 
2376  - Ideally we should add 1 to records_read for every possible null-
2377  complemented row. We're not doing it because: 1. it will require
2378  non-trivial code and add overhead. 2. The value of records_read
2379  is an inprecise estimate and adding 1 (or, in the worst case,
2380  #max_nested_outer_joins=64-1) will not make it any more precise.
2381  */
2382  if (pos->records_read > DBL_EPSILON)
2383  found*= pos->records_read;
2384  }
2385  }
2386  return found;
2387 }
2388 
2389 
2430 bool Optimize_table_order::fix_semijoin_strategies()
2431 {
2432  table_map remaining_tables= 0;
2433  table_map handled_tables= 0;
2434 
2435  DBUG_ENTER("Optimize_table_order::fix_semijoin_strategies");
2436 
2437  if (join->select_lex->sj_nests.is_empty())
2438  DBUG_RETURN(false);
2439 
2440  Opt_trace_context *const trace= &thd->opt_trace;
2441 
2442  for (uint tableno= join->tables - 1;
2443  tableno != join->const_tables - 1;
2444  tableno--)
2445  {
2446  POSITION *const pos= join->best_positions + tableno;
2447 
2448  if ((handled_tables & pos->table->table->map) ||
2449  pos->sj_strategy == SJ_OPT_NONE)
2450  {
2451  remaining_tables|= pos->table->table->map;
2452  continue;
2453  }
2454 
2455  uint first;
2456  LINT_INIT(first);
2457  if (pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP)
2458  {
2459  TABLE_LIST *const sjm_nest= pos->table->emb_sj_nest;
2460  const uint table_count= my_count_bits(sjm_nest->sj_inner_tables);
2461  /*
2462  This memcpy() copies a partial QEP produced by
2463  optimize_semijoin_nests_for_materialization() (source) into the final
2464  top-level QEP (target), in order to re-use the source plan for
2465  to-be-materialized inner tables.
2466  It is however possible that the source QEP had picked
2467  some semijoin strategy (noted SJY), different from
2468  materialization. The target QEP rules (it has seen more tables), but
2469  this memcpy() is going to copy the source stale strategy SJY,
2470  wrongly. Which is why sj_strategy of each table of the
2471  duplicate-generating range then becomes temporarily unreliable. It is
2472  fixed for the first table of that range right after the memcpy(), and
2473  fixed for the rest of that range at the end of this iteration by
2474  setting it to SJ_OPT_NONE). But until then, pos->sj_strategy should
2475  not be read.
2476  */
2477  memcpy(pos - table_count + 1, sjm_nest->nested_join->sjm.positions,
2478  sizeof(POSITION) * table_count);
2479  first= tableno - table_count + 1;
2480  join->best_positions[first].n_sj_tables= table_count;
2481  join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_LOOKUP;
2482 
2483  Opt_trace_object trace_final_strategy(trace);
2484  trace_final_strategy.add_alnum("final_semijoin_strategy",
2485  "MaterializeLookup");
2486  }
2487  else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
2488  {
2489  const uint last_inner= pos->sjm_scan_last_inner;
2490  TABLE_LIST *const sjm_nest=
2491  (join->best_positions + last_inner)->table->emb_sj_nest;
2492  const uint table_count= my_count_bits(sjm_nest->sj_inner_tables);
2493  first= last_inner - table_count + 1;
2494  DBUG_ASSERT((join->best_positions + first)->table->emb_sj_nest ==
2495  sjm_nest);
2496  memcpy(join->best_positions + first, // stale semijoin strategy here too
2497  sjm_nest->nested_join->sjm.positions,
2498  sizeof(POSITION) * table_count);
2499  join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
2500  join->best_positions[first].n_sj_tables= table_count;
2501 
2502  Opt_trace_object trace_final_strategy(trace);
2503  trace_final_strategy.add_alnum("final_semijoin_strategy",
2504  "MaterializeScan");
2505  // Recalculate final access paths for this semi-join strategy
2506  double rowcount, cost;
2507  semijoin_mat_scan_access_paths(last_inner, tableno,
2508  remaining_tables, sjm_nest, true,
2509  &rowcount, &cost);
2510 
2511  }
2512  else if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
2513  {
2514  first= pos->first_firstmatch_table;
2515  join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
2516  join->best_positions[first].n_sj_tables= tableno - first + 1;
2517 
2518  Opt_trace_object trace_final_strategy(trace);
2519  trace_final_strategy.add_alnum("final_semijoin_strategy", "FirstMatch");
2520 
2521  // Recalculate final access paths for this semi-join strategy
2522  double rowcount, cost;
2523  (void)semijoin_firstmatch_loosescan_access_paths(first, tableno,
2524  remaining_tables, false, true,
2525  &rowcount, &cost);
2526  }
2527  else if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN)
2528  {
2529  first= pos->first_loosescan_table;
2530 
2531  Opt_trace_object trace_final_strategy(trace);
2532  trace_final_strategy.add_alnum("final_semijoin_strategy", "LooseScan");
2533 
2534  // Recalculate final access paths for this semi-join strategy
2535  double rowcount, cost;
2536  (void)semijoin_firstmatch_loosescan_access_paths(first, tableno,
2537  remaining_tables, true, true,
2538  &rowcount, &cost);
2539 
2540  POSITION *const first_pos= join->best_positions + first;
2541  first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
2542  first_pos->n_sj_tables=
2543  my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
2544  }
2545  else if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
2546  {
2547  /*
2548  Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
2549  this table.
2550  */
2551  first= pos->first_dupsweedout_table;
2552  join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
2553  join->best_positions[first].n_sj_tables= tableno - first + 1;
2554 
2555  Opt_trace_object trace_final_strategy(trace);
2556  trace_final_strategy.add_alnum("final_semijoin_strategy",
2557  "DuplicateWeedout");
2558  }
2559 
2560  for (uint i= first; i <= tableno; i++)
2561  {
2562  /*
2563  Eliminate stale strategies. See comment in the
2564  SJ_OPT_MATERIALIZE_LOOKUP case above.
2565  */
2566  if (i != first)
2567  join->best_positions[i].sj_strategy= SJ_OPT_NONE;
2568  handled_tables|= join->best_positions[i].table->table->map;
2569  }
2570 
2571  remaining_tables |= pos->table->table->map;
2572  }
2573 
2574  DBUG_ASSERT(remaining_tables == (join->all_table_map&~join->const_table_map));
2575 
2576  DBUG_RETURN(FALSE);
2577 }
2578 
2579 
2671 bool Optimize_table_order::check_interleaving_with_nj(JOIN_TAB *tab)
2672 {
2673  if (cur_embedding_map & ~tab->embedding_map)
2674  {
2675  /*
2676  tab is outside of the "pair of brackets" we're currently in.
2677  Cannot add it.
2678  */
2679  return true;
2680  }
2681  const TABLE_LIST *next_emb= tab->table->pos_in_table_list->embedding;
2682  /*
2683  Do update counters for "pairs of brackets" that we've left (marked as
2684  X,Y,Z in the above picture)
2685  */
2686  for (; next_emb != emb_sjm_nest; next_emb= next_emb->embedding)
2687  {
2688  // Ignore join nests that are not outer joins.
2689  if (!next_emb->join_cond())
2690  continue;
2691 
2692  next_emb->nested_join->nj_counter++;
2693  cur_embedding_map |= next_emb->nested_join->nj_map;
2694 
2695  if (next_emb->nested_join->nj_total != next_emb->nested_join->nj_counter)
2696  break;
2697 
2698  /*
2699  We're currently at Y or Z-bracket as depicted in the above picture.
2700  Mark that we've left it and continue walking up the brackets hierarchy.
2701  */
2702  cur_embedding_map &= ~next_emb->nested_join->nj_map;
2703  }
2704  return false;
2705 }
2706 
2707 
2751 bool Optimize_table_order::semijoin_firstmatch_loosescan_access_paths(
2752  uint first_tab, uint last_tab, table_map remaining_tables,
2753  bool loosescan, bool final,
2754  double *newcount, double *newcost)
2755 {
2756  DBUG_ENTER(
2757  "Optimize_table_order::semijoin_firstmatch_loosescan_access_paths");
2758  double cost; // Contains running estimate of calculated cost.
2759  double rowcount; // Rowcount of join prefix (ie before first_tab).
2760  double outer_fanout= 1.0; // Fanout contributed by outer tables in range.
2761  double inner_fanout= 1.0; // Fanout contributed by inner tables in range.
2762  Opt_trace_context *const trace= &thd->opt_trace;
2763  Opt_trace_object recalculate(trace, "recalculate_access_paths_and_cost");
2764  Opt_trace_array trace_tables(trace, "tables");
2765 
2766  POSITION *const positions= final ? join->best_positions : join->positions;
2767 
2768  if (first_tab == join->const_tables)
2769  {
2770  cost= 0.0;
2771  rowcount= 1.0;
2772  }
2773  else
2774  {
2775  cost= positions[first_tab - 1].prefix_cost.total_cost();
2776  rowcount= positions[first_tab - 1].prefix_record_count;
2777  }
2778 
2779  uint table_count= 0;
2780  uint no_jbuf_before;
2781  for (uint i= first_tab; i <= last_tab; i++)
2782  {
2783  remaining_tables|= positions[i].table->table->map;
2784  if (positions[i].table->emb_sj_nest)
2785  table_count++;
2786  }
2787  if (loosescan)
2788  {
2789  // LooseScan: May use join buffering for all tables after last inner table.
2790  for (no_jbuf_before= last_tab; no_jbuf_before > first_tab; no_jbuf_before--)
2791  {
2792  if (positions[no_jbuf_before].table->emb_sj_nest != NULL)
2793  break; // Encountered the last inner table.
2794  }
2795  no_jbuf_before++;
2796  }
2797  else
2798  {
2799  // FirstMatch: May use join buffering if there is only one inner table.
2800  no_jbuf_before= (table_count > 1) ? last_tab + 1 : first_tab;
2801  }
2802 
2803 
2804  for (uint i= first_tab; i <= last_tab; i++)
2805  {
2806  JOIN_TAB *const tab= positions[i].table;
2807  POSITION regular_pos, loose_scan_pos;
2808  POSITION *const dst_pos= final ? positions + i : &regular_pos;
2809  POSITION *pos; // Position for later calculations
2810  /*
2811  We always need a new calculation for the first inner table in
2812  the LooseScan strategy. Notice the use of loose_scan_pos.
2813  */
2814  if ((i == first_tab && loosescan) || positions[i].use_join_buffer)
2815  {
2816  Opt_trace_object trace_one_table(trace);
2817  trace_one_table.add_utf8_table(tab->table);
2818 
2819  // Find the best access method with specified join buffering strategy.
2820  best_access_path(tab, remaining_tables, i,
2821  i < no_jbuf_before,
2822  rowcount * inner_fanout * outer_fanout,
2823  dst_pos, &loose_scan_pos);
2824  if (i == first_tab && loosescan) // Use loose scan position
2825  {
2826  *dst_pos= loose_scan_pos;
2827  const double rows= rowcount * dst_pos->records_read;
2828  dst_pos->set_prefix_costs(cost + dst_pos->read_time +
2829  rows * ROW_EVALUATE_COST,
2830  rows);
2831  }
2832  pos= dst_pos;
2833  }
2834  else
2835  pos= positions + i; // Use result from prior calculation
2836 
2837  /*
2838  Terminate search if best_access_path found no possible plan.
2839  Otherwise we will be getting infinite cost when summing up below.
2840  */
2841  if (pos->read_time == DBL_MAX)
2842  {
2843  DBUG_ASSERT(loosescan && !final);
2844  DBUG_RETURN(false);
2845  }
2846 
2847  remaining_tables&= ~tab->table->map;
2848 
2849  if (tab->emb_sj_nest)
2850  inner_fanout*= pos->records_read;
2851  else
2852  outer_fanout*= pos->records_read;
2853 
2854  cost+= pos->read_time +
2855  rowcount * inner_fanout * outer_fanout * ROW_EVALUATE_COST;
2856  }
2857 
2858  *newcount= rowcount * outer_fanout;
2859  *newcost= cost;
2860 
2861  DBUG_RETURN(true);
2862 }
2863 
2864 
2888 void Optimize_table_order::semijoin_mat_scan_access_paths(
2889  uint last_inner_tab, uint last_outer_tab,
2890  table_map remaining_tables, TABLE_LIST *sjm_nest, bool final,
2891  double *newcount, double *newcost)
2892 {
2893  DBUG_ENTER("Optimize_table_order::semijoin_mat_scan_access_paths");
2894 
2895  Opt_trace_context *const trace= &thd->opt_trace;
2896  Opt_trace_object recalculate(trace, "recalculate_access_paths_and_cost");
2897  Opt_trace_array trace_tables(trace, "tables");
2898  double cost; // Calculated running cost of operation
2899  double rowcount; // Rowcount of join prefix (ie before first_inner).
2900 
2901  POSITION *const positions= final ? join->best_positions : join->positions;
2902  const uint inner_count= my_count_bits(sjm_nest->sj_inner_tables);
2903 
2904  // Get the prefix cost.
2905  const uint first_inner= last_inner_tab + 1 - inner_count;
2906  if (first_inner == join->const_tables)
2907  {
2908  rowcount= 1.0;
2909  cost= 0.0;
2910  }
2911  else
2912  {
2913  rowcount= positions[first_inner - 1].prefix_record_count;
2914  cost= positions[first_inner - 1].prefix_cost.total_cost();
2915  }
2916 
2917  // Add materialization cost.
2918  cost+= sjm_nest->nested_join->sjm.materialization_cost.total_cost() +
2919  rowcount * sjm_nest->nested_join->sjm.scan_cost.total_cost();
2920 
2921  for (uint i= last_inner_tab + 1; i <= last_outer_tab; i++)
2922  remaining_tables|= positions[i].table->table->map;
2923  /*
2924  Materialization removes duplicates from the materialized table, so
2925  number of rows to scan is probably less than the number of rows
2926  from a full join, on which the access paths of outer tables are currently
2927  based. Rerun best_access_path to adjust for reduced rowcount.
2928  */
2929  const double inner_fanout= sjm_nest->nested_join->sjm.expected_rowcount;
2930  double outer_fanout= 1.0;
2931 
2932  for (uint i= last_inner_tab + 1; i <= last_outer_tab; i++)
2933  {
2934  Opt_trace_object trace_one_table(trace);
2935  JOIN_TAB *const tab= positions[i].table;
2936  trace_one_table.add_utf8_table(tab->table);
2937  POSITION regular_pos, dummy;
2938  POSITION *const dst_pos= final ? positions + i : &regular_pos;
2939  best_access_path(tab, remaining_tables, i, false,
2940  rowcount * inner_fanout * outer_fanout, dst_pos, &dummy);
2941  remaining_tables&= ~tab->table->map;
2942  outer_fanout*= dst_pos->records_read;
2943  cost+= dst_pos->read_time +
2944  rowcount * inner_fanout * outer_fanout * ROW_EVALUATE_COST;
2945  }
2946 
2947  *newcount= rowcount * outer_fanout;
2948  *newcost= cost;
2949 
2950  DBUG_VOID_RETURN;
2951 }
2952 
2953 
2970 void Optimize_table_order::semijoin_mat_lookup_access_paths(
2971  uint last_inner, TABLE_LIST *sjm_nest,
2972  double *newcount, double *newcost)
2973 {
2974  DBUG_ENTER("Optimize_table_order::semijoin_mat_lookup_access_paths");
2975 
2976  const uint inner_count= my_count_bits(sjm_nest->sj_inner_tables);
2977  double rowcount, cost;
2978 
2979  const uint first_inner= last_inner + 1 - inner_count;
2980  if (first_inner == join->const_tables)
2981  {
2982  cost= 0.0;
2983  rowcount= 1.0;
2984  }
2985  else
2986  {
2987  cost= join->positions[first_inner - 1].prefix_cost.total_cost();
2988  rowcount= join->positions[first_inner - 1].prefix_record_count;
2989  }
2990 
2991  cost+= sjm_nest->nested_join->sjm.materialization_cost.total_cost() +
2992  rowcount * sjm_nest->nested_join->sjm.lookup_cost.total_cost();
2993 
2994  *newcount= rowcount;
2995  *newcost= cost;
2996 
2997  DBUG_VOID_RETURN;
2998 }
2999 
3000 
3027 void Optimize_table_order::semijoin_dupsweedout_access_paths(
3028  uint first_tab, uint last_tab,
3029  table_map remaining_tables,
3030  double *newcount, double *newcost)
3031 {
3032  DBUG_ENTER("Optimize_table_order::semijoin_dupsweedout_access_paths");
3033 
3034  double cost, rowcount;
3035  double inner_fanout= 1.0;
3036  double outer_fanout= 1.0;
3037  uint rowsize; // Row size of the temporary table
3038  if (first_tab == join->const_tables)
3039  {
3040  cost= 0.0;
3041  rowcount= 1.0;
3042  rowsize= 0;
3043  }
3044  else
3045  {
3046  cost= join->positions[first_tab - 1].prefix_cost.total_cost();
3047  rowcount= join->positions[first_tab - 1].prefix_record_count;
3048  rowsize= 8; // This is not true but we'll make it so
3049  }
3071  for (uint j= first_tab; j <= last_tab; j++)
3072  {
3073  const POSITION *const p= join->positions + j;
3074  if (p->table->emb_sj_nest)
3075  {
3076  inner_fanout*= p->records_read;
3077  }
3078  else
3079  {
3080  outer_fanout*= p->records_read;
3081 
3082  rowsize+= p->table->table->file->ref_length;
3083  }
3084  cost+= p->read_time +
3085  rowcount * inner_fanout * outer_fanout * ROW_EVALUATE_COST;
3086  }
3087 
3088  /*
3089  @todo: Change this paragraph in concert with the todo note above.
3090  Add the cost of temptable use. The table will have outer_fanout rows,
3091  and we will make
3092  - rowcount * outer_fanout writes
3093  - rowcount * inner_fanout * outer_fanout lookups.
3094  We assume here that a lookup and a write has the same cost.
3095  */
3096  double one_lookup_cost, create_cost;
3097  if (outer_fanout * rowsize > thd->variables.max_heap_table_size)
3098  {
3099  one_lookup_cost= DISK_TEMPTABLE_ROW_COST;
3100  create_cost= DISK_TEMPTABLE_CREATE_COST;
3101  }
3102  else
3103  {
3104  one_lookup_cost= HEAP_TEMPTABLE_ROW_COST;
3105  create_cost= HEAP_TEMPTABLE_CREATE_COST;
3106  }
3107  const double write_cost= rowcount * outer_fanout * one_lookup_cost;
3108  const double full_lookup_cost= write_cost * inner_fanout;
3109  cost+= create_cost + write_cost + full_lookup_cost;
3110 
3111  *newcount= rowcount * outer_fanout;
3112  *newcost= cost;
3113 
3114  DBUG_VOID_RETURN;
3115 }
3116 
3117 
3177 void Optimize_table_order::advance_sj_state(
3178  table_map remaining_tables,
3179  const JOIN_TAB *new_join_tab, uint idx,
3180  double *current_rowcount, double *current_cost,
3181  POSITION *loose_scan_pos)
3182 {
3183  Opt_trace_context * const trace= &thd->opt_trace;
3184  TABLE_LIST *const emb_sj_nest= new_join_tab->emb_sj_nest;
3185  POSITION *const pos= join->positions + idx;
3186  uint sj_strategy= SJ_OPT_NONE; // Initially: No chosen strategy
3187  /*
3188  Semi-join nests cannot be nested, hence we never need to advance the
3189  semi-join state of a materialized semi-join query.
3190  In fact, doing this may cause undesirable effects because all tables
3191  within a semi-join nest have emb_sj_nest != NULL, which triggers several
3192  of the actions inside this function.
3193  */
3194  DBUG_ASSERT(emb_sjm_nest == NULL);
3195 
3196  /* Add this table to the join prefix */
3197  remaining_tables &= ~new_join_tab->table->map;
3198 
3199  DBUG_ENTER("Optimize_table_order::advance_sj_state");
3200 
3201  Opt_trace_array trace_choices(trace, "semijoin_strategy_choice");
3202 
3203  /* Initialize the state or copy it from prev. tables */
3204  if (idx == join->const_tables)
3205  {
3206  pos->dups_producing_tables= 0;
3207  pos->first_firstmatch_table= MAX_TABLES;
3208  pos->first_loosescan_table= MAX_TABLES;
3209  pos->dupsweedout_tables= 0;
3210  pos->sjm_scan_need_tables= 0;
3211  LINT_INIT(pos->sjm_scan_last_inner);
3212  }
3213  else
3214  {
3216 
3217  // FirstMatch
3218  pos->first_firstmatch_table= pos[-1].first_firstmatch_table;
3219  pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
3220  pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
3221 
3222  // LooseScan
3223  pos->first_loosescan_table=
3224  (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
3225  MAX_TABLES : pos[-1].first_loosescan_table;
3226  pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
3227 
3228  // MaterializeScan
3229  pos->sjm_scan_need_tables=
3230  (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
3231  0 : pos[-1].sjm_scan_need_tables;
3232  pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
3233 
3234  // Duplicate Weedout
3235  pos->dupsweedout_tables= pos[-1].dupsweedout_tables;
3236  pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table;
3237  }
3238 
3239  table_map handled_by_fm_or_ls= 0;
3240  /*
3241  FirstMatch Strategy
3242  ===================
3243 
3244  FirstMatch requires that all dependent outer tables are in the join prefix.
3245  (see "FirstMatch strategy" above setup_semijoin_dups_elimination()).
3246  The execution strategy will handle multiple semi-join nests correctly,
3247  and the optimizer will pick execution strategy according to these rules:
3248  - If tables from multiple semi-join nests are intertwined, they will
3249  be processed as one FirstMatch evaluation.
3250  - If tables from each semi-join nest are grouped together, each semi-join
3251  nest is processed as one FirstMatch evaluation.
3252 
3253  Example: Let's say we have an outer table ot and two semi-join nests with
3254  two tables each: it11 and it12, and it21 and it22.
3255 
3256  Intertwined tables: ot - FM(it11 - it21 - it12 - it22)
3257  Grouped tables: ot - FM(it11 - it12) - FM(it21 - it22)
3258  */
3259  if (emb_sj_nest &&
3260  thd->optimizer_switch_flag(OPTIMIZER_SWITCH_FIRSTMATCH))
3261  {
3262  const table_map outer_corr_tables= emb_sj_nest->nested_join->sj_depends_on;
3263  const table_map sj_inner_tables= emb_sj_nest->sj_inner_tables;
3264  /*
3265  Enter condition:
3266  1. The next join tab belongs to semi-join nest
3267  (verified for the encompassing code block above).
3268  2. We're not in a duplicate producer range yet
3269  3. All outer tables that
3270  - the subquery is correlated with, or
3271  - referred to from the outer_expr
3272  are in the join prefix
3273  */
3274  if (pos->dups_producing_tables == 0 && // (2)
3275  !(remaining_tables & outer_corr_tables)) // (3)
3276  {
3277  /* Start tracking potential FirstMatch range */
3278  pos->first_firstmatch_table= idx;
3279  pos->firstmatch_need_tables= 0;
3280  pos->first_firstmatch_rtbl= remaining_tables;
3281  // All inner tables should still be part of remaining_tables.
3282  DBUG_ASSERT(sj_inner_tables ==
3283  ((remaining_tables | new_join_tab->table->map) &
3284  sj_inner_tables));
3285  }
3286 
3287  if (pos->first_firstmatch_table != MAX_TABLES)
3288  {
3289  /* Record that we need all of this semi-join's inner tables */
3290  pos->firstmatch_need_tables|= sj_inner_tables;
3291 
3292  if (outer_corr_tables & pos->first_firstmatch_rtbl)
3293  {
3294  /*
3295  Trying to add an sj-inner table whose sj-nest has an outer correlated
3296  table that was not in the prefix. This means FirstMatch can't be used.
3297  */
3298  pos->first_firstmatch_table= MAX_TABLES;
3299  }
3300  else if (!(pos->firstmatch_need_tables & remaining_tables))
3301  {
3302  // Got a complete FirstMatch range. Calculate access paths and cost
3303  double cost, rowcount;
3304  /* We use the same FirstLetterUpcase as in EXPLAIN */
3305  Opt_trace_object trace_one_strategy(trace);
3306  trace_one_strategy.add_alnum("strategy", "FirstMatch");
3307  (void)semijoin_firstmatch_loosescan_access_paths(
3308  pos->first_firstmatch_table, idx,
3309  remaining_tables, false, false,
3310  &rowcount, &cost);
3311  /*
3312  We don't yet know what are the other strategies, so pick FirstMatch.
3313 
3314  We ought to save the alternate POSITIONs produced by
3315  semijoin_firstmatch_loosescan_access_paths() but the problem is that
3316  providing save space uses too much space.
3317  Instead, we will re-calculate the alternate POSITIONs after we've
3318  picked the best QEP.
3319  */
3320  sj_strategy= SJ_OPT_FIRST_MATCH;
3321  *current_cost= cost;
3322  *current_rowcount= rowcount;
3323  trace_one_strategy.add("cost", *current_cost).
3324  add("rows", *current_rowcount);
3325  handled_by_fm_or_ls= pos->firstmatch_need_tables;
3326 
3327  trace_one_strategy.add("chosen", true);
3328  }
3329  }
3330  }
3331  /*
3332  LooseScan Strategy
3333  ==================
3334 
3335  LooseScan requires that all dependent outer tables are not in the join
3336  prefix. (see "LooseScan strategy" above setup_semijoin_dups_elimination()).
3337  The tables must come in a rather strictly defined order:
3338  1. The LooseScan driving table (which is a subquery inner table).
3339  2. The remaining tables from the same semi-join nest as the above table.
3340  3. The outer dependent tables, possibly mixed with outer non-dependent
3341  tables.
3342  Notice that any other semi-joined tables must be outside this table range.
3343  */
3344  if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_LOOSE_SCAN))
3345  {
3346  POSITION *const first= join->positions+pos->first_loosescan_table;
3347  /*
3348  LooseScan strategy can't handle interleaving between tables from the
3349  semi-join that LooseScan is handling and any other tables.
3350  */
3351  if (pos->first_loosescan_table != MAX_TABLES)
3352  {
3353  if (first->table->emb_sj_nest->sj_inner_tables &
3354  (remaining_tables | new_join_tab->table->map))
3355  {
3356  // Stage 2: Accept remaining tables from the semi-join nest:
3357  if (emb_sj_nest != first->table->emb_sj_nest)
3358  pos->first_loosescan_table= MAX_TABLES;
3359  }
3360  else
3361  {
3362  // Stage 3: Accept outer dependent and non-dependent tables:
3363  DBUG_ASSERT(emb_sj_nest != first->table->emb_sj_nest);
3364  if (emb_sj_nest != NULL)
3365  pos->first_loosescan_table= MAX_TABLES;
3366  }
3367  }
3368  /*
3369  If we got an option to use LooseScan for the current table, start
3370  considering using LooseScan strategy
3371  */
3372  if (loose_scan_pos->read_time != DBL_MAX)
3373  {
3374  pos->first_loosescan_table= idx;
3375  pos->loosescan_need_tables= emb_sj_nest->sj_inner_tables |
3376  emb_sj_nest->nested_join->sj_depends_on;
3377  }
3378 
3379  if ((pos->first_loosescan_table != MAX_TABLES) &&
3380  !(remaining_tables & pos->loosescan_need_tables))
3381  {
3382  /*
3383  Ok we have LooseScan plan and also have all LooseScan sj-nest's
3384  inner tables and outer correlated tables into the prefix.
3385  */
3386 
3387  // Got a complete LooseScan range. Calculate access paths and cost
3388  double cost, rowcount;
3389  Opt_trace_object trace_one_strategy(trace);
3390  trace_one_strategy.add_alnum("strategy", "LooseScan");
3391  /*
3392  The same problem as with FirstMatch - we need to save POSITIONs
3393  somewhere but reserving space for all cases would require too
3394  much space. We will re-calculate POSITION structures later on.
3395  */
3396  if (semijoin_firstmatch_loosescan_access_paths(
3397  pos->first_loosescan_table, idx,
3398  remaining_tables, true, false,
3399  &rowcount, &cost))
3400  {
3401  /*
3402  We don't yet have any other strategies that could handle this
3403  semi-join nest (the other options are Duplicate Elimination or
3404  Materialization, which need at least the same set of tables in
3405  the join prefix to be considered) so unconditionally pick the
3406  LooseScan.
3407  */
3408  sj_strategy= SJ_OPT_LOOSE_SCAN;
3409  *current_cost= cost;
3410  *current_rowcount= rowcount;
3411  trace_one_strategy.add("cost", *current_cost).
3412  add("rows", *current_rowcount);
3413  handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables;
3414  }
3415  trace_one_strategy.add("chosen", sj_strategy == SJ_OPT_LOOSE_SCAN);
3416  }
3417  }
3418 
3419  if (emb_sj_nest)
3420  pos->dups_producing_tables |= emb_sj_nest->sj_inner_tables;
3421 
3422  pos->dups_producing_tables &= ~handled_by_fm_or_ls;
3423 
3424  /* MaterializeLookup and MaterializeScan strategy handler */
3425  const int sjm_strategy=
3426  semijoin_order_allows_materialization(join, remaining_tables,
3427  new_join_tab, idx);
3428  if (sjm_strategy == SJ_OPT_MATERIALIZE_SCAN)
3429  {
3430  /*
3431  We cannot evaluate this option now. This is because we cannot
3432  account for fanout of sj-inner tables yet:
3433 
3434  ntX SJM-SCAN(it1 ... itN) | ot1 ... otN |
3435  ^(1) ^(2)
3436 
3437  we're now at position (1). SJM temptable in general has multiple
3438  records, so at point (1) we'll get the fanout from sj-inner tables (ie
3439  there will be multiple record combinations).
3440 
3441  The final join result will not contain any semi-join produced
3442  fanout, i.e. tables within SJM-SCAN(...) will not contribute to
3443  the cardinality of the join output. Extra fanout produced by
3444  SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN.
3445 
3446  The simple way to model this is to remove SJM-SCAN(...) fanout once
3447  we reach the point #2.
3448  */
3449  pos->sjm_scan_need_tables=
3450  emb_sj_nest->sj_inner_tables |
3451  emb_sj_nest->nested_join->sj_depends_on;
3452  pos->sjm_scan_last_inner= idx;
3453  Opt_trace_object(trace).add_alnum("strategy", "MaterializeScan").
3454  add_alnum("choice", "deferred");
3455  }
3456  else if (sjm_strategy == SJ_OPT_MATERIALIZE_LOOKUP)
3457  {
3458  // Calculate access paths and cost for MaterializeLookup strategy
3459  double cost, rowcount;
3460  semijoin_mat_lookup_access_paths(idx, emb_sj_nest, &rowcount, &cost);
3461 
3462  Opt_trace_object trace_one_strategy(trace);
3463  trace_one_strategy.add_alnum("strategy", "MaterializeLookup").
3464  add("cost", cost).add("rows", rowcount).
3465  add("duplicate_tables_left", pos->dups_producing_tables != 0);
3466  if (cost < *current_cost || pos->dups_producing_tables)
3467  {
3468  /*
3469  NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
3470  elements to join->positions as that makes it hard to return things
3471  back when making one step back in join optimization. That's done
3472  after the QEP has been chosen.
3473  */
3474  sj_strategy= SJ_OPT_MATERIALIZE_LOOKUP;
3475  *current_cost= cost;
3476  *current_rowcount= rowcount;
3477  pos->dups_producing_tables &= ~emb_sj_nest->sj_inner_tables;
3478  }
3479  trace_one_strategy.add("chosen", sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP);
3480  }
3481 
3482  /* MaterializeScan second phase check */
3483  /*
3484  The optimizer does not support that we have inner tables from more
3485  than one semi-join nest within the table range.
3486  */
3487  if (pos->sjm_scan_need_tables &&
3488  emb_sj_nest != NULL &&
3489  emb_sj_nest !=
3490  join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest)
3491  pos->sjm_scan_need_tables= 0;
3492 
3493  if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */
3494  !(pos->sjm_scan_need_tables & remaining_tables))
3495  {
3496  TABLE_LIST *const sjm_nest=
3497  join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
3498 
3499  double cost, rowcount;
3500 
3501  Opt_trace_object trace_one_strategy(trace);
3502  trace_one_strategy.add_alnum("strategy", "MaterializeScan");
3503 
3504  semijoin_mat_scan_access_paths(pos->sjm_scan_last_inner, idx,
3505  remaining_tables, sjm_nest, false,
3506  &rowcount, &cost);
3507  trace_one_strategy.add("cost", cost).
3508  add("rows", rowcount).
3509  add("duplicate_tables_left", pos->dups_producing_tables != 0);
3510  /*
3511  Use the strategy if
3512  * it is cheaper then what we've had, or
3513  * we haven't picked any other semi-join strategy yet
3514  In the second case, we pick this strategy unconditionally because
3515  comparing cost without semi-join duplicate removal with cost with
3516  duplicate removal is not an apples-to-apples comparison.
3517  */
3518  if (cost < *current_cost || pos->dups_producing_tables)
3519  {
3520  sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
3521  *current_cost= cost;
3522  *current_rowcount= rowcount;
3523  pos->dups_producing_tables &= ~sjm_nest->sj_inner_tables;
3524  }
3525  trace_one_strategy.add("chosen", sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
3526  }
3527 
3528  /* Duplicate Weedout strategy handler */
3529  {
3530  /*
3531  Duplicate weedout can be applied after all ON-correlated and
3532  correlated
3533  */
3534  if (emb_sj_nest)
3535  {
3536  if (!pos->dupsweedout_tables)
3537  pos->first_dupsweedout_table= idx;
3538 
3539  pos->dupsweedout_tables|= emb_sj_nest->sj_inner_tables |
3540  emb_sj_nest->nested_join->sj_depends_on;
3541  }
3542 
3543  if (pos->dupsweedout_tables &&
3544  !(remaining_tables & pos->dupsweedout_tables))
3545  {
3546  Opt_trace_object trace_one_strategy(trace);
3547  trace_one_strategy.add_alnum("strategy", "DuplicatesWeedout");
3548  /*
3549  Ok, reached a state where we could put a dups weedout point.
3550  Walk back and calculate
3551  - the join cost (this is needed as the accumulated cost may assume
3552  some other duplicate elimination method)
3553  - extra fanout that will be removed by duplicate elimination
3554  - duplicate elimination cost
3555  There are two cases:
3556  1. We have other strategy/ies to remove all of the duplicates.
3557  2. We don't.
3558 
3559  We need to calculate the cost in case #2 also because we need to make
3560  choice between this join order and others.
3561  */
3562  double rowcount, cost;
3563  semijoin_dupsweedout_access_paths(pos->first_dupsweedout_table, idx,
3564  remaining_tables, &rowcount, &cost);
3565  /*
3566  Use the strategy if
3567  * it is cheaper then what we've had, or
3568  * we haven't picked any other semi-join strategy yet
3569  The second part is necessary because this strategy is the last one
3570  to consider (it needs "the most" tables in the prefix) and we can't
3571  leave duplicate-producing tables not handled by any strategy.
3572  */
3573  trace_one_strategy.
3574  add("cost", cost).
3575  add("rows", rowcount).
3576  add("duplicate_tables_left", pos->dups_producing_tables != 0);
3577  if (cost < *current_cost || pos->dups_producing_tables)
3578  {
3579  sj_strategy= SJ_OPT_DUPS_WEEDOUT;
3580  *current_cost= cost;
3581  *current_rowcount= rowcount;
3582  /*
3583  Note, dupsweedout_tables contains inner and outer tables, even though
3584  "dups_producing_tables" are always inner table. Ok for this use.
3585  */
3586  pos->dups_producing_tables &= ~pos->dupsweedout_tables;
3587  }
3588  trace_one_strategy.add("chosen", sj_strategy == SJ_OPT_DUPS_WEEDOUT);
3589  }
3590  }
3591  pos->sj_strategy= sj_strategy;
3592  /*
3593  If a semi-join strategy is chosen, update cost and rowcount in positions
3594  as well. These values may be used as prefix cost and rowcount for later
3595  semi-join calculations, e.g for plans like "ot1 - it1 - it2 - ot2",
3596  where we have two semi-join nests containing it1 and it2, respectively,
3597  and we have a dependency between ot1 and it1, and between ot2 and it2.
3598  When looking at a semi-join plan for "it2 - ot2", the correct prefix cost
3599  (located in the join_tab for it1) must be filled in properly.
3600 
3601  Tables in a semijoin range, except the last in range, won't have their
3602  prefix_costs changed below; this is normal: when we process them, this is
3603  a regular join so regular costs calculated in best_ext...() are ok;
3604  duplicates elimination happens only at the last table in range, so it
3605  makes sense to correct prefix_costs of that last table.
3606  */
3607  if (sj_strategy != SJ_OPT_NONE)
3608  pos->set_prefix_costs(*current_cost, *current_rowcount);
3609 
3610  DBUG_VOID_RETURN;
3611 }
3612 
3613 
3667 void Optimize_table_order::backout_nj_state(const table_map remaining_tables,
3668  const JOIN_TAB *tab)
3669 {
3670  DBUG_ASSERT(remaining_tables & tab->table->map);
3671 
3672  /* Restore the nested join state */
3673  TABLE_LIST *last_emb= tab->table->pos_in_table_list->embedding;
3674 
3675  for (; last_emb != emb_sjm_nest; last_emb= last_emb->embedding)
3676  {
3677  // Ignore join nests that are not outer joins.
3678  if (!last_emb->join_cond())
3679  continue;
3680 
3681  NESTED_JOIN *const nest= last_emb->nested_join;
3682 
3683  DBUG_ASSERT(nest->nj_counter > 0);
3684 
3685  cur_embedding_map|= nest->nj_map;
3686  bool was_fully_covered= nest->nj_total == nest->nj_counter;
3687 
3688  if (--nest->nj_counter == 0)
3689  cur_embedding_map&= ~nest->nj_map;
3690 
3691  if (!was_fully_covered)
3692  break;
3693  }
3694 }
3695 
3696 
3700 static void trace_plan_prefix(JOIN *join, uint idx,
3701  table_map excluded_tables)
3702 {
3703 #ifdef OPTIMIZER_TRACE
3704  THD * const thd= join->thd;
3705  Opt_trace_array plan_prefix(&thd->opt_trace, "plan_prefix");
3706  for (uint i= 0; i < idx; i++)
3707  {
3708  const TABLE * const table= join->positions[i].table->table;
3709  if (!(table->map & excluded_tables))
3710  {
3711  TABLE_LIST * const tl= table->pos_in_table_list;
3712  if (tl != NULL)
3713  {
3714  StringBuffer<32> str;
3715  tl->print(thd, &str, enum_query_type(QT_TO_SYSTEM_CHARSET |
3716  QT_SHOW_SELECT_NUMBER |
3717  QT_NO_DEFAULT_DB |
3718  QT_DERIVED_TABLE_ONLY_ALIAS));
3719  plan_prefix.add_utf8(str.ptr(), str.length());
3720  }
3721  }
3722  }
3723 #endif
3724 }
3725