MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
filesort.cc
Go to the documentation of this file.
1 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 
24 #include "sql_priv.h"
25 #include "filesort.h"
26 #include "unireg.h" // REQUIRED by other includes
27 #ifdef HAVE_STDDEF_H
28 #include <stddef.h> /* for macro offsetof */
29 #endif
30 #include <m_ctype.h>
31 #include "sql_sort.h"
32 #include "probes_mysql.h"
33 #include "opt_range.h" // SQL_SELECT
34 #include "bounded_queue.h"
35 #include "filesort_utils.h"
36 #include "sql_select.h"
37 #include "debug_sync.h"
38 #include "opt_trace.h"
39 #include "sql_optimizer.h" // JOIN
40 
41 #include <algorithm>
42 #include <utility>
43 using std::max;
44 using std::min;
45 
46  /* functions defined in this file */
47 
48 static uchar *read_buffpek_from_file(IO_CACHE *buffer_file, uint count,
49  uchar *buf);
50 static ha_rows find_all_keys(Sort_param *param,SQL_SELECT *select,
51  Filesort_info *fs_info,
52  IO_CACHE *buffer_file,
53  IO_CACHE *tempfile,
55  ha_rows *found_rows);
56 static int write_keys(Sort_param *param, Filesort_info *fs_info,
57  uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
58 static void register_used_fields(Sort_param *param);
59 static int merge_index(Sort_param *param,uchar *sort_buffer,
60  BUFFPEK *buffpek,
61  uint maxbuffer,IO_CACHE *tempfile,
62  IO_CACHE *outfile);
63 static bool save_index(Sort_param *param, uint count,
64  Filesort_info *table_sort);
65 static uint suffix_length(ulong string_length);
66 static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
67  Field **ptabfield,
68  uint sortlength, uint *plength);
69 static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
70  uchar *buff);
71 static bool check_if_pq_applicable(Opt_trace_context *trace,
72  Sort_param *param, Filesort_info *info,
73  TABLE *table,
74  ha_rows records, ulong memory_available);
75 
76 
77 void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
78  ulong max_length_for_sort_data,
79  ha_rows maxrows, bool sort_positions)
80 {
81  sort_length= sortlen;
82  ref_length= table->file->ref_length;
83  if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
84  !table->fulltext_searched && !sort_positions)
85  {
86  /*
87  Get the descriptors of all fields whose values are appended
88  to sorted fields and get its total length in addon_length.
89  */
90  addon_field= get_addon_fields(max_length_for_sort_data,
91  table->field, sort_length, &addon_length);
92  }
93  if (addon_field)
94  res_length= addon_length;
95  else
96  {
97  res_length= ref_length;
98  /*
99  The reference to the record is considered
100  as an additional sorted field
101  */
102  sort_length+= ref_length;
103  }
104  rec_length= sort_length + addon_length;
105  max_rows= maxrows;
106 }
107 
108 
109 static void trace_filesort_information(Opt_trace_context *trace,
110  const SORT_FIELD *sortorder,
111  uint s_length)
112 {
113  if (!trace->is_started())
114  return;
115 
116  Opt_trace_array trace_filesort(trace, "filesort_information");
117  for (; s_length-- ; sortorder++)
118  {
119  Opt_trace_object oto(trace);
120  oto.add_alnum("direction", sortorder->reverse ? "desc" : "asc");
121 
122  if (sortorder->field)
123  {
124  if (strlen(sortorder->field->table->alias) != 0)
125  oto.add_utf8_table(sortorder->field->table);
126  else
127  oto.add_alnum("table", "intermediate_tmp_table");
128  oto.add_alnum("field", sortorder->field->field_name ?
129  sortorder->field->field_name : "tmp_table_column");
130  }
131  else
132  oto.add("expression", sortorder->item);
133  }
134 }
135 
136 
173 ha_rows filesort(THD *thd, TABLE *table, Filesort *filesort,
174  bool sort_positions, ha_rows *examined_rows,
175  ha_rows *found_rows)
176 {
177  int error;
178  ulong memory_available= thd->variables.sortbuff_size;
179  uint maxbuffer;
180  BUFFPEK *buffpek;
181  ha_rows num_rows= HA_POS_ERROR;
182  IO_CACHE tempfile, buffpek_pointers, *outfile;
183  Sort_param param;
184  bool multi_byte_charset;
186  Opt_trace_context * const trace= &thd->opt_trace;
187  SQL_SELECT *const select= filesort->select;
188  ha_rows max_rows= filesort->limit;
189  uint s_length= 0;
190 
191  DBUG_ENTER("filesort");
192 
193  if (!(s_length= filesort->make_sortorder()))
194  DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
195 
196  /*
197  We need a nameless wrapper, since we may be inside the "steps" of
198  "join_execution".
199  */
200  Opt_trace_object trace_wrapper(trace);
201  trace_filesort_information(trace, filesort->sortorder, s_length);
202 
203 #ifdef SKIP_DBUG_IN_FILESORT
204  DBUG_PUSH(""); /* No DBUG here */
205 #endif
206  Item_subselect *subselect= table->reginfo.join_tab ?
207  table->reginfo.join_tab->join->select_lex->master_unit()->item :
208  NULL;
209 
210  MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
211  DEBUG_SYNC(thd, "filesort_start");
212 
213  /*
214  Release InnoDB's adaptive hash index latch (if holding) before
215  running a sort.
216  */
218 
219  /*
220  Don't use table->sort in filesort as it is also used by
221  QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
222  when index_merge select has finished with it.
223  */
224  Filesort_info table_sort= table->sort;
225  table->sort.io_cache= NULL;
226  DBUG_ASSERT(table_sort.record_pointers == NULL);
227 
228  outfile= table_sort.io_cache;
229  my_b_clear(&tempfile);
230  my_b_clear(&buffpek_pointers);
231  buffpek=0;
232  error= 1;
233 
234  param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length,
235  &multi_byte_charset),
236  table,
237  thd->variables.max_length_for_sort_data,
238  max_rows, sort_positions);
239 
240  table_sort.addon_buf= 0;
241  table_sort.addon_length= param.addon_length;
242  table_sort.addon_field= param.addon_field;
243  table_sort.unpack= unpack_addon_fields;
244  if (param.addon_field &&
245  !(table_sort.addon_buf=
246  (uchar *) my_malloc(param.addon_length, MYF(MY_WME))))
247  goto err;
248 
249  if (select && select->quick)
250  thd->inc_status_sort_range();
251  else
252  thd->inc_status_sort_scan();
253 
254  // If number of rows is not known, use as much of sort buffer as possible.
255  num_rows= table->file->estimate_rows_upper_bound();
256 
257  if (multi_byte_charset &&
258  !(param.tmp_buffer= (char*) my_malloc(param.sort_length,MYF(MY_WME))))
259  goto err;
260 
261  if (check_if_pq_applicable(trace, &param, &table_sort,
262  table, num_rows, memory_available))
263  {
264  DBUG_PRINT("info", ("filesort PQ is applicable"));
265  const size_t compare_length= param.sort_length;
266  if (pq.init(param.max_rows,
267  true, // max_at_top
268  NULL, // compare_function
269  compare_length,
270  &make_sortkey, &param, table_sort.get_sort_keys()))
271  {
272  /*
273  If we fail to init pq, we have to give up:
274  out of memory means my_malloc() will call my_error().
275  */
276  DBUG_PRINT("info", ("failed to allocate PQ"));
277  table_sort.free_sort_buffer();
278  DBUG_ASSERT(thd->is_error());
279  goto err;
280  }
281  // For PQ queries (with limit) we initialize all pointers.
282  table_sort.init_record_pointers();
283  filesort->using_pq= true;
284  }
285  else
286  {
287  DBUG_PRINT("info", ("filesort PQ is not applicable"));
288 
289  const ulong min_sort_memory=
290  max(MIN_SORT_MEMORY, param.sort_length * MERGEBUFF2);
291  while (memory_available >= min_sort_memory)
292  {
293  ha_rows keys= memory_available / (param.rec_length + sizeof(char*));
294  param.max_keys_per_buffer= (uint) min(num_rows, keys);
295  if (table_sort.get_sort_keys())
296  {
297  // If we have already allocated a buffer, it better have same size!
298  if (std::make_pair(param.max_keys_per_buffer, param.rec_length) !=
299  table_sort.sort_buffer_properties())
300  {
301  /*
302  table->sort will still have a pointer to the same buffer,
303  but that will be overwritten by the assignment below.
304  */
305  table_sort.free_sort_buffer();
306  }
307  }
308  table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
309  if (table_sort.get_sort_keys())
310  break;
311  ulong old_memory_available= memory_available;
312  memory_available= memory_available/4*3;
313  if (memory_available < min_sort_memory &&
314  old_memory_available > min_sort_memory)
315  memory_available= min_sort_memory;
316  }
317  if (memory_available < min_sort_memory)
318  {
319  my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
320  goto err;
321  }
322  }
323 
324  if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
325  DISK_BUFFER_SIZE, MYF(MY_WME)))
326  goto err;
327 
328  param.sort_form= table;
329  param.end= (param.local_sortorder= filesort->sortorder) + s_length;
330  // New scope, because subquery execution must be traced within an array.
331  {
332  Opt_trace_array ota(trace, "filesort_execution");
333  num_rows= find_all_keys(&param, select,
334  &table_sort,
335  &buffpek_pointers,
336  &tempfile,
337  pq.is_initialized() ? &pq : NULL,
338  found_rows);
339  if (num_rows == HA_POS_ERROR)
340  goto err;
341  }
342 
343  maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
344 
345  Opt_trace_object(trace, "filesort_summary")
346  .add("rows", num_rows)
347  .add("examined_rows", param.examined_rows)
348  .add("number_of_tmp_files", maxbuffer)
349  .add("sort_buffer_size", table_sort.sort_buffer_size())
350  .add_alnum("sort_mode",
351  param.addon_field ?
352  "<sort_key, additional_fields>" : "<sort_key, rowid>");
353 
354  if (maxbuffer == 0) // The whole set is in memory
355  {
356  if (save_index(&param, (uint) num_rows, &table_sort))
357  goto err;
358  }
359  else
360  {
361  /* filesort cannot handle zero-length records during merge. */
362  DBUG_ASSERT(param.sort_length != 0);
363 
364  if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
365  {
366  my_free(table_sort.buffpek);
367  table_sort.buffpek= 0;
368  }
369  if (!(table_sort.buffpek=
370  (uchar *) read_buffpek_from_file(&buffpek_pointers, maxbuffer,
371  table_sort.buffpek)))
372  goto err;
373  buffpek= (BUFFPEK *) table_sort.buffpek;
374  table_sort.buffpek_len= maxbuffer;
375  close_cached_file(&buffpek_pointers);
376  /* Open cached file if it isn't open */
377  if (! my_b_inited(outfile) &&
378  open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
379  MYF(MY_WME)))
380  goto err;
381  if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
382  goto err;
383 
384  /*
385  Use also the space previously used by string pointers in sort_buffer
386  for temporary key storage.
387  */
388  param.max_keys_per_buffer= table_sort.sort_buffer_size() / param.rec_length;
389  maxbuffer--; // Offset from 0
390  if (merge_many_buff(&param,
391  (uchar*) table_sort.get_sort_keys(),
392  buffpek,&maxbuffer,
393  &tempfile))
394  goto err;
395  if (flush_io_cache(&tempfile) ||
396  reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
397  goto err;
398  if (merge_index(&param,
399  (uchar*) table_sort.get_sort_keys(),
400  buffpek,
401  maxbuffer,
402  &tempfile,
403  outfile))
404  goto err;
405  }
406 
407  if (num_rows > param.max_rows)
408  {
409  // If find_all_keys() produced more results than the query LIMIT.
410  num_rows= param.max_rows;
411  }
412  error= 0;
413 
414  err:
415  my_free(param.tmp_buffer);
416  if (!subselect || !subselect->is_uncacheable())
417  {
418  table_sort.free_sort_buffer();
419  my_free(buffpek);
420  table_sort.buffpek= 0;
421  table_sort.buffpek_len= 0;
422  }
423  close_cached_file(&tempfile);
424  close_cached_file(&buffpek_pointers);
425  if (my_b_inited(outfile))
426  {
427  if (flush_io_cache(outfile))
428  error=1;
429  {
430  my_off_t save_pos=outfile->pos_in_file;
431  /* For following reads */
432  if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
433  error=1;
434  outfile->end_of_file=save_pos;
435  }
436  }
437  if (error)
438  {
439  int kill_errno= thd->killed_errno();
440  DBUG_ASSERT(thd->is_error() || kill_errno);
441  my_printf_error(ER_FILSORT_ABORT,
442  "%s: %s",
443  MYF(ME_ERROR + ME_WAITTANG),
444  ER_THD(thd, ER_FILSORT_ABORT),
445  kill_errno ?
446  ER(kill_errno) :
447  thd->get_stmt_da()->message());
448 
449  if (log_warnings > 1)
450  {
451  sql_print_warning("%s, host: %s, user: %s, thread: %lu, query: %-.4096s",
452  ER_THD(thd, ER_FILSORT_ABORT),
453  thd->security_ctx->host_or_ip,
454  &thd->security_ctx->priv_user[0],
455  (ulong) thd->thread_id,
456  thd->query());
457  }
458  }
459  else
460  thd->inc_status_sort_rows(num_rows);
461  *examined_rows= param.examined_rows;
462 #ifdef SKIP_DBUG_IN_FILESORT
463  DBUG_POP(); /* Ok to DBUG */
464 #endif
465 
466  // Assign the copy back!
467  table->sort= table_sort;
468 
469  DBUG_PRINT("exit",
470  ("num_rows: %ld examined_rows: %ld found_rows: %ld",
471  (long) num_rows, (long) *examined_rows, (long) *found_rows));
472  MYSQL_FILESORT_DONE(error, num_rows);
473  DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
474 } /* filesort */
475 
476 
477 void filesort_free_buffers(TABLE *table, bool full)
478 {
479  DBUG_ENTER("filesort_free_buffers");
480  my_free(table->sort.record_pointers);
481  table->sort.record_pointers= NULL;
482 
483  if (full)
484  {
485  table->sort.free_sort_buffer();
486  my_free(table->sort.buffpek);
487  table->sort.buffpek= NULL;
488  table->sort.buffpek_len= 0;
489  }
490 
491  my_free(table->sort.addon_buf);
492  my_free(table->sort.addon_field);
493  table->sort.addon_buf= NULL;
494  table->sort.addon_field= NULL;
495  DBUG_VOID_RETURN;
496 }
497 
498 void Filesort::cleanup()
499 {
500  if (select && own_select)
501  {
502  select->cleanup();
503  select= NULL;
504  }
505 }
506 
507 uint Filesort::make_sortorder()
508 {
509  uint count;
510  SORT_FIELD *sort,*pos;
511  ORDER *ord;
512  DBUG_ENTER("make_sortorder");
513 
514 
515  count=0;
516  for (ord = order; ord; ord= ord->next)
517  count++;
518  if (!sortorder)
519  sortorder= (SORT_FIELD*) sql_alloc(sizeof(SORT_FIELD) * (count + 1));
520  pos= sort= sortorder;
521 
522  if (!pos)
523  DBUG_RETURN(0);
524 
525  for (ord= order; ord; ord= ord->next, pos++)
526  {
527  Item *item= ord->item[0]->real_item();
528  pos->field= 0; pos->item= 0;
529  if (item->type() == Item::FIELD_ITEM)
530  pos->field= ((Item_field*) item)->field;
531  else if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item())
532  pos->field= ((Item_sum*) item)->get_tmp_table_field();
533  else if (item->type() == Item::COPY_STR_ITEM)
534  { // Blob patch
535  pos->item= ((Item_copy*) item)->get_item();
536  }
537  else
538  pos->item= *ord->item;
539  pos->reverse= (ord->direction == ORDER::ORDER_DESC);
540  DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
541  }
542  DBUG_RETURN(count);
543 }
544 
545 
556 static uchar *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count,
557  uchar *buf)
558 {
559  ulong length= sizeof(BUFFPEK)*count;
560  uchar *tmp= buf;
561  DBUG_ENTER("read_buffpek_from_file");
562  if (count > UINT_MAX/sizeof(BUFFPEK))
563  return 0; /* sizeof(BUFFPEK)*count will overflow */
564  if (!tmp)
565  tmp= (uchar *)my_malloc(length, MYF(MY_WME));
566  if (tmp)
567  {
568  if (reinit_io_cache(buffpek_pointers,READ_CACHE,0L,0,0) ||
569  my_b_read(buffpek_pointers, (uchar*) tmp, length))
570  {
571  my_free(tmp);
572  tmp=0;
573  }
574  }
575  DBUG_RETURN(tmp);
576 }
577 
578 #ifndef DBUG_OFF
579 /*
580  Print a text, SQL-like record representation into dbug trace.
581 
582  Note: this function is a work in progress: at the moment
583  - column read bitmap is ignored (can print garbage for unused columns)
584  - there is no quoting
585 */
586 static void dbug_print_record(TABLE *table, bool print_rowid)
587 {
588  char buff[1024];
589  Field **pfield;
590  String tmp(buff,sizeof(buff),&my_charset_bin);
591  DBUG_LOCK_FILE;
592 
593  fprintf(DBUG_FILE, "record (");
594  for (pfield= table->field; *pfield ; pfield++)
595  fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name, (pfield[1])? ", ":"");
596  fprintf(DBUG_FILE, ") = ");
597 
598  fprintf(DBUG_FILE, "(");
599  for (pfield= table->field; *pfield ; pfield++)
600  {
601  Field *field= *pfield;
602 
603  if (field->is_null())
604  fwrite("NULL", sizeof(char), 4, DBUG_FILE);
605 
606  if (field->type() == MYSQL_TYPE_BIT)
607  (void) field->val_int_as_str(&tmp, 1);
608  else
609  field->val_str(&tmp);
610 
611  fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
612  if (pfield[1])
613  fwrite(", ", sizeof(char), 2, DBUG_FILE);
614  }
615  fprintf(DBUG_FILE, ")");
616  if (print_rowid)
617  {
618  fprintf(DBUG_FILE, " rowid ");
619  for (uint i=0; i < table->file->ref_length; i++)
620  {
621  fprintf(DBUG_FILE, "%x", (uchar)table->file->ref[i]);
622  }
623  }
624  fprintf(DBUG_FILE, "\n");
625  DBUG_UNLOCK_FILE;
626 }
627 #endif
628 
675 static ha_rows find_all_keys(Sort_param *param, SQL_SELECT *select,
676  Filesort_info *fs_info,
677  IO_CACHE *buffpek_pointers,
678  IO_CACHE *tempfile,
680  ha_rows *found_rows)
681 {
682  int error,flag,quick_select;
683  uint idx,indexpos,ref_length;
684  uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
685  my_off_t record;
686  TABLE *sort_form;
687  THD *thd= current_thd;
688  volatile THD::killed_state *killed= &thd->killed;
689  handler *file;
690  MY_BITMAP *save_read_set, *save_write_set;
691  bool skip_record;
692 
693  DBUG_ENTER("find_all_keys");
694  DBUG_PRINT("info",("using: %s",
695  (select ? select->quick ? "ranges" : "where":
696  "every row")));
697 
698  idx=indexpos=0;
699  error=quick_select=0;
700  sort_form=param->sort_form;
701  file=sort_form->file;
702  ref_length=param->ref_length;
703  ref_pos= ref_buff;
704  quick_select=select && select->quick;
705  record=0;
706  *found_rows= 0;
707  flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
708  if (flag)
709  ref_pos= &file->ref[0];
710  next_pos=ref_pos;
711  if (!quick_select)
712  {
713  next_pos=(uchar*) 0; /* Find records in sequence */
714  DBUG_EXECUTE_IF("bug14365043_1",
715  DBUG_SET("+d,ha_rnd_init_fail"););
716  if ((error= file->ha_rnd_init(1)))
717  {
718  file->print_error(error, MYF(0));
719  DBUG_RETURN(HA_POS_ERROR);
720  }
721  file->extra_opt(HA_EXTRA_CACHE,
722  current_thd->variables.read_buff_size);
723  }
724 
725  if (quick_select)
726  {
727  if (select->quick->reset())
728  DBUG_RETURN(HA_POS_ERROR);
729  }
730 
731  /* Remember original bitmaps */
732  save_read_set= sort_form->read_set;
733  save_write_set= sort_form->write_set;
734  /* Set up temporary column read map for columns used by sort */
735  bitmap_clear_all(&sort_form->tmp_set);
736  /* Temporary set for register_used_fields and register_field_in_read_map */
737  sort_form->read_set= &sort_form->tmp_set;
738  // Include fields used for sorting in the read_set.
739  register_used_fields(param);
740 
741  // Include fields used by conditions in the read_set.
742  if (select && select->cond)
743  select->cond->walk(&Item::register_field_in_read_map, 1,
744  (uchar*) sort_form);
745 
746  // Include fields used by pushed conditions in the read_set.
747  if (select && select->icp_cond)
748  select->icp_cond->walk(&Item::register_field_in_read_map, 1,
749  (uchar*) sort_form);
750 
751  sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
752 
753  for (;;)
754  {
755  if (quick_select)
756  {
757  if ((error= select->quick->get_next()))
758  break;
759  file->position(sort_form->record[0]);
760  DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
761  }
762  else /* Not quick-select */
763  {
764  {
765  error= file->ha_rnd_next(sort_form->record[0]);
766  if (!flag)
767  {
768  my_store_ptr(ref_pos,ref_length,record); // Position to row
769  record+= sort_form->s->db_record_offset;
770  }
771  else if (!error)
772  file->position(sort_form->record[0]);
773  }
774  if (error && error != HA_ERR_RECORD_DELETED)
775  break;
776  }
777 
778  if (*killed)
779  {
780  DBUG_PRINT("info",("Sort killed by user"));
781  if (!quick_select)
782  {
783  (void) file->extra(HA_EXTRA_NO_CACHE);
784  file->ha_rnd_end();
785  }
786  DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
787  }
788  if (error == 0)
789  param->examined_rows++;
790  if (!error && (!select ||
791  (!select->skip_record(thd, &skip_record) && !skip_record)))
792  {
793  ++(*found_rows);
794  if (pq)
795  {
796  pq->push(ref_pos);
797  idx= pq->num_elements();
798  }
799  else
800  {
801  if (idx == param->max_keys_per_buffer)
802  {
803  if (write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
804  DBUG_RETURN(HA_POS_ERROR);
805  idx= 0;
806  indexpos++;
807  }
808  make_sortkey(param, fs_info->get_record_buffer(idx++), ref_pos);
809  }
810  }
811  /*
812  Don't try unlocking the row if skip_record reported an error since in
813  this case the transaction might have been rolled back already.
814  */
815  else if (!thd->is_error())
816  file->unlock_row();
817  /* It does not make sense to read more keys in case of a fatal error */
818  if (thd->is_error())
819  break;
820  }
821  if (!quick_select)
822  {
823  (void) file->extra(HA_EXTRA_NO_CACHE); /* End cacheing of records */
824  if (!next_pos)
825  file->ha_rnd_end();
826  }
827 
828  if (thd->is_error())
829  DBUG_RETURN(HA_POS_ERROR);
830 
831  /* Signal we should use orignal column read and write maps */
832  sort_form->column_bitmaps_set(save_read_set, save_write_set);
833 
834  DBUG_PRINT("test",("error: %d indexpos: %d",error,indexpos));
835  if (error != HA_ERR_END_OF_FILE)
836  {
837  file->print_error(error,MYF(ME_ERROR | ME_WAITTANG)); // purecov: inspected
838  DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
839  }
840  if (indexpos && idx &&
841  write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
842  DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
843  const ha_rows retval=
844  my_b_inited(tempfile) ?
845  (ha_rows) (my_b_tell(tempfile)/param->rec_length) : idx;
846  DBUG_PRINT("info", ("find_all_keys return %u", (uint) retval));
847  DBUG_RETURN(retval);
848 } /* find_all_keys */
849 
850 
873 static int
874 write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
875  IO_CACHE *buffpek_pointers, IO_CACHE *tempfile)
876 {
877  size_t rec_length;
878  uchar **end;
879  BUFFPEK buffpek;
880  DBUG_ENTER("write_keys");
881 
882  rec_length= param->rec_length;
883  uchar **sort_keys= fs_info->get_sort_keys();
884 
885  fs_info->sort_buffer(param, count);
886 
887  if (!my_b_inited(tempfile) &&
888  open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
889  MYF(MY_WME)))
890  goto err; /* purecov: inspected */
891  /* check we won't have more buffpeks than we can possibly keep in memory */
892  if (my_b_tell(buffpek_pointers) + sizeof(BUFFPEK) > (ulonglong)UINT_MAX)
893  goto err;
894  buffpek.file_pos= my_b_tell(tempfile);
895  if ((ha_rows) count > param->max_rows)
896  count=(uint) param->max_rows; /* purecov: inspected */
897  buffpek.count=(ha_rows) count;
898  for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
899  if (my_b_write(tempfile, (uchar*) *sort_keys, (uint) rec_length))
900  goto err;
901  if (my_b_write(buffpek_pointers, (uchar*) &buffpek, sizeof(buffpek)))
902  goto err;
903  DBUG_RETURN(0);
904 
905 err:
906  DBUG_RETURN(1);
907 } /* write_keys */
908 
909 
914 static inline void store_length(uchar *to, uint length, uint pack_length)
915 {
916  switch (pack_length) {
917  case 1:
918  *to= (uchar) length;
919  break;
920  case 2:
921  mi_int2store(to, length);
922  break;
923  case 3:
924  mi_int3store(to, length);
925  break;
926  default:
927  mi_int4store(to, length);
928  break;
929  }
930 }
931 
932 
933 #ifdef WORDS_BIGENDIAN
934 const bool Is_big_endian= true;
935 #else
936 const bool Is_big_endian= false;
937 #endif
938 void copy_native_longlong(uchar *to, int to_length,
939  longlong val, bool is_unsigned)
940 {
941  copy_integer<Is_big_endian>(to, to_length,
942  static_cast<uchar*>(static_cast<void*>(&val)),
943  sizeof(longlong),
944  is_unsigned);
945 }
946 
947 
950 void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos)
951 {
952  SORT_FIELD *sort_field;
953 
954  for (sort_field= param->local_sortorder ;
955  sort_field != param->end ;
956  sort_field++)
957  {
958  bool maybe_null= false;
959  if (sort_field->field)
960  {
961  Field *field= sort_field->field;
962  if (field->maybe_null())
963  {
964  if (field->is_null())
965  {
966  if (sort_field->reverse)
967  memset(to, 255, sort_field->length+1);
968  else
969  memset(to, 0, sort_field->length+1);
970  to+= sort_field->length+1;
971  continue;
972  }
973  else
974  *to++=1;
975  }
976  field->make_sort_key(to, sort_field->length);
977  }
978  else
979  { // Item
980  Item *item=sort_field->item;
981  maybe_null= item->maybe_null;
982  switch (sort_field->result_type) {
983  case STRING_RESULT:
984  {
985  const CHARSET_INFO *cs=item->collation.collation;
986  char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
987  int diff;
988  uint sort_field_length;
989 
990  if (maybe_null)
991  *to++=1;
992  /* All item->str() to use some extra byte for end null.. */
993  String tmp((char*) to,sort_field->length+4,cs);
994  String *res= item->str_result(&tmp);
995  if (!res)
996  {
997  if (maybe_null)
998  memset(to-1, 0, sort_field->length+1);
999  else
1000  {
1001  /* purecov: begin deadcode */
1002  /*
1003  This should only happen during extreme conditions if we run out
1004  of memory or have an item marked not null when it can be null.
1005  This code is here mainly to avoid a hard crash in this case.
1006  */
1007  DBUG_ASSERT(0);
1008  DBUG_PRINT("warning",
1009  ("Got null on something that shouldn't be null"));
1010  memset(to, 0, sort_field->length); // Avoid crash
1011  /* purecov: end */
1012  }
1013  break;
1014  }
1015  uint length= res->length();
1016  sort_field_length= sort_field->length - sort_field->suffix_length;
1017  diff=(int) (sort_field_length - length);
1018  if (diff < 0)
1019  {
1020  diff=0;
1021  length= sort_field_length;
1022  }
1023  if (sort_field->suffix_length)
1024  {
1025  /* Store length last in result_string */
1026  store_length(to + sort_field_length, length,
1027  sort_field->suffix_length);
1028  }
1029  if (sort_field->need_strxnfrm)
1030  {
1031  char *from=(char*) res->ptr();
1032  uint tmp_length;
1033  if ((uchar*) from == to)
1034  {
1035  set_if_smaller(length,sort_field->length);
1036  memcpy(param->tmp_buffer,from,length);
1037  from=param->tmp_buffer;
1038  }
1039  tmp_length= cs->coll->strnxfrm(cs, to, sort_field->length,
1040  item->max_char_length(),
1041  (uchar*) from, length,
1042  MY_STRXFRM_PAD_WITH_SPACE |
1043  MY_STRXFRM_PAD_TO_MAXLEN);
1044  DBUG_ASSERT(tmp_length == sort_field->length);
1045  }
1046  else
1047  {
1048  my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length);
1049  cs->cset->fill(cs, (char *)to+length,diff,fill_char);
1050  }
1051  break;
1052  }
1053  case INT_RESULT:
1054  {
1055  longlong value= item->field_type() == MYSQL_TYPE_TIME ?
1056  item->val_time_temporal_result() :
1057  item->is_temporal_with_date() ?
1058  item->val_date_temporal_result() :
1059  item->val_int_result();
1060  if (maybe_null)
1061  {
1062  *to++=1; /* purecov: inspected */
1063  if (item->null_value)
1064  {
1065  if (maybe_null)
1066  memset(to-1, 0, sort_field->length+1);
1067  else
1068  {
1069  DBUG_PRINT("warning",
1070  ("Got null on something that shouldn't be null"));
1071  memset(to, 0, sort_field->length);
1072  }
1073  break;
1074  }
1075  }
1076  copy_native_longlong(to, sort_field->length,
1077  value, item->unsigned_flag);
1078  break;
1079  }
1080  case DECIMAL_RESULT:
1081  {
1082  my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
1083  if (maybe_null)
1084  {
1085  if (item->null_value)
1086  {
1087  memset(to, 0, sort_field->length+1);
1088  to++;
1089  break;
1090  }
1091  *to++=1;
1092  }
1093  if (sort_field->length < DECIMAL_MAX_FIELD_SIZE)
1094  {
1095  uchar buf[DECIMAL_MAX_FIELD_SIZE];
1096  my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, buf,
1097  item->max_length - (item->decimals ? 1:0),
1098  item->decimals);
1099  memcpy(to, buf, sort_field->length);
1100  }
1101  else
1102  {
1103  my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, to,
1104  item->max_length - (item->decimals ? 1:0),
1105  item->decimals);
1106  }
1107  break;
1108  }
1109  case REAL_RESULT:
1110  {
1111  double value= item->val_result();
1112  if (maybe_null)
1113  {
1114  if (item->null_value)
1115  {
1116  memset(to, 0, sort_field->length+1);
1117  to++;
1118  break;
1119  }
1120  *to++=1;
1121  }
1122  if (sort_field->length < sizeof(double))
1123  {
1124  uchar buf[sizeof(double)];
1125  change_double_for_sort(value, buf);
1126  memcpy(to, buf, sort_field->length);
1127  }
1128  else
1129  {
1130  change_double_for_sort(value, (uchar*) to);
1131  }
1132  break;
1133  }
1134  case ROW_RESULT:
1135  default:
1136  // This case should never be choosen
1137  DBUG_ASSERT(0);
1138  break;
1139  }
1140  }
1141  if (sort_field->reverse)
1142  { /* Revers key */
1143  if (maybe_null)
1144  to[-1]= ~to[-1];
1145  uint length= sort_field->length;
1146  while (length--)
1147  {
1148  *to = (uchar) (~ *to);
1149  to++;
1150  }
1151  }
1152  else
1153  to+= sort_field->length;
1154  }
1155 
1156  if (param->addon_field)
1157  {
1158  /*
1159  Save field values appended to sorted fields.
1160  First null bit indicators are appended then field values follow.
1161  In this implementation we use fixed layout for field values -
1162  the same for all records.
1163  */
1164  SORT_ADDON_FIELD *addonf= param->addon_field;
1165  uchar *nulls= to;
1166  DBUG_ASSERT(addonf != 0);
1167  memset(nulls, 0, addonf->offset);
1168  to+= addonf->offset;
1169  for (Field* field; (field= addonf->field) ; addonf++)
1170  {
1171  if (addonf->null_bit && field->is_null())
1172  {
1173  nulls[addonf->null_offset]|= addonf->null_bit;
1174  }
1175  else
1176  {
1177  (void) field->pack(to, field->ptr);
1178  }
1179  to+= addonf->length;
1180  }
1181  }
1182  else
1183  {
1184  /* Save filepos last */
1185  memcpy((uchar*) to, ref_pos, (size_t) param->ref_length);
1186  }
1187  return;
1188 }
1189 
1190 
1191 /*
1192  Register fields used by sorting in the sorted table's read set
1193 */
1194 
1195 static void register_used_fields(Sort_param *param)
1196 {
1197  reg1 SORT_FIELD *sort_field;
1198  TABLE *table=param->sort_form;
1199  MY_BITMAP *bitmap= table->read_set;
1200 
1201  for (sort_field= param->local_sortorder ;
1202  sort_field != param->end ;
1203  sort_field++)
1204  {
1205  Field *field;
1206  if ((field= sort_field->field))
1207  {
1208  if (field->table == table)
1209  bitmap_set_bit(bitmap, field->field_index);
1210  }
1211  else
1212  { // Item
1213  sort_field->item->walk(&Item::register_field_in_read_map, 1,
1214  (uchar *) table);
1215  }
1216  }
1217 
1218  if (param->addon_field)
1219  {
1220  SORT_ADDON_FIELD *addonf= param->addon_field;
1221  Field *field;
1222  for ( ; (field= addonf->field) ; addonf++)
1223  bitmap_set_bit(bitmap, field->field_index);
1224  }
1225  else
1226  {
1227  /* Save filepos last */
1228  table->prepare_for_position();
1229  }
1230 }
1231 
1232 static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
1233 {
1234  uint offset,res_length;
1235  uchar *to;
1236  DBUG_ENTER("save_index");
1237 
1238  table_sort->sort_buffer(param, count);
1239  res_length= param->res_length;
1240  offset= param->rec_length-res_length;
1241  if (!(to= table_sort->record_pointers=
1242  (uchar*) my_malloc(res_length*count, MYF(MY_WME))))
1243  DBUG_RETURN(1); /* purecov: inspected */
1244  uchar **sort_keys= table_sort->get_sort_keys();
1245  for (uchar **end= sort_keys+count ; sort_keys != end ; sort_keys++)
1246  {
1247  memcpy(to, *sort_keys+offset, res_length);
1248  to+= res_length;
1249  }
1250  DBUG_RETURN(0);
1251 }
1252 
1253 
1284 bool check_if_pq_applicable(Opt_trace_context *trace,
1285  Sort_param *param,
1286  Filesort_info *filesort_info,
1287  TABLE *table, ha_rows num_rows,
1288  ulong memory_available)
1289 {
1290  DBUG_ENTER("check_if_pq_applicable");
1291 
1292  /*
1293  How much Priority Queue sort is slower than qsort.
1294  Measurements (see unit test) indicate that PQ is roughly 3 times slower.
1295  */
1296  const double PQ_slowness= 3.0;
1297 
1298  Opt_trace_object trace_filesort(trace,
1299  "filesort_priority_queue_optimization");
1300  if (param->max_rows == HA_POS_ERROR)
1301  {
1302  trace_filesort
1303  .add("usable", false)
1304  .add_alnum("cause", "not applicable (no LIMIT)");
1305  DBUG_RETURN(false);
1306  }
1307 
1308  trace_filesort
1309  .add("limit", param->max_rows)
1310  .add("rows_estimate", num_rows)
1311  .add("row_size", param->rec_length)
1312  .add("memory_available", memory_available);
1313 
1314  if (param->max_rows + 2 >= UINT_MAX)
1315  {
1316  trace_filesort.add("usable", false).add_alnum("cause", "limit too large");
1317  DBUG_RETURN(false);
1318  }
1319 
1320  ulong num_available_keys=
1321  memory_available / (param->rec_length + sizeof(char*));
1322  // We need 1 extra record in the buffer, when using PQ.
1323  param->max_keys_per_buffer= (uint) param->max_rows + 1;
1324 
1325  if (num_rows < num_available_keys)
1326  {
1327  // The whole source set fits into memory.
1328  if (param->max_rows < num_rows/PQ_slowness )
1329  {
1330  filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1331  param->rec_length);
1332  trace_filesort.add("chosen", true);
1333  DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
1334  }
1335  else
1336  {
1337  // PQ will be slower.
1338  trace_filesort.add("chosen", false)
1339  .add_alnum("cause", "quicksort_is_cheaper");
1340  DBUG_RETURN(false);
1341  }
1342  }
1343 
1344  // Do we have space for LIMIT rows in memory?
1345  if (param->max_keys_per_buffer < num_available_keys)
1346  {
1347  filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1348  param->rec_length);
1349  trace_filesort.add("chosen", true);
1350  DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
1351  }
1352 
1353  // Try to strip off addon fields.
1354  if (param->addon_field)
1355  {
1356  const ulong row_length=
1357  param->sort_length + param->ref_length + sizeof(char*);
1358  num_available_keys= memory_available / row_length;
1359 
1360  Opt_trace_object trace_addon(trace, "strip_additional_fields");
1361  trace_addon.add("row_size", row_length);
1362 
1363  // Can we fit all the keys in memory?
1364  if (param->max_keys_per_buffer >= num_available_keys)
1365  {
1366  trace_addon.add("chosen", false).add_alnum("cause", "not_enough_space");
1367  }
1368  else
1369  {
1370  const double sort_merge_cost=
1371  get_merge_many_buffs_cost_fast(num_rows,
1372  num_available_keys,
1373  row_length);
1374  trace_addon.add("sort_merge_cost", sort_merge_cost);
1375  /*
1376  PQ has cost:
1377  (insert + qsort) * log(queue size) * ROWID_COMPARE_COST +
1378  cost of file lookup afterwards.
1379  The lookup cost is a bit pessimistic: we take scan_time and assume
1380  that on average we find the row after scanning half of the file.
1381  A better estimate would be lookup cost, but note that we are doing
1382  random lookups here, rather than sequential scan.
1383  */
1384  const double pq_cpu_cost=
1385  (PQ_slowness * num_rows + param->max_keys_per_buffer) *
1386  log((double) param->max_keys_per_buffer) * ROWID_COMPARE_COST;
1387  const double pq_io_cost=
1388  param->max_rows * table->file->scan_time() / 2.0;
1389  const double pq_cost= pq_cpu_cost + pq_io_cost;
1390  trace_addon.add("priority_queue_cost", pq_cost);
1391 
1392  if (sort_merge_cost < pq_cost)
1393  {
1394  trace_addon.add("chosen", false);
1395  DBUG_RETURN(false);
1396  }
1397 
1398  trace_addon.add("chosen", true);
1399  filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1400  param->sort_length + param->ref_length);
1401  if (filesort_info->get_sort_keys())
1402  {
1403  // Make attached data to be references instead of fields.
1404  my_free(filesort_info->addon_buf);
1405  my_free(filesort_info->addon_field);
1406  filesort_info->addon_buf= NULL;
1407  filesort_info->addon_field= NULL;
1408  param->addon_field= NULL;
1409  param->addon_length= 0;
1410 
1411  param->res_length= param->ref_length;
1412  param->sort_length+= param->ref_length;
1413  param->rec_length= param->sort_length;
1414 
1415  DBUG_RETURN(true);
1416  }
1417  }
1418  }
1419  DBUG_RETURN(false);
1420 }
1421 
1422 
1425 int merge_many_buff(Sort_param *param, uchar *sort_buffer,
1426  BUFFPEK *buffpek, uint *maxbuffer, IO_CACHE *t_file)
1427 {
1428  register uint i;
1429  IO_CACHE t_file2,*from_file,*to_file,*temp;
1430  BUFFPEK *lastbuff;
1431  DBUG_ENTER("merge_many_buff");
1432 
1433  if (*maxbuffer < MERGEBUFF2)
1434  DBUG_RETURN(0); /* purecov: inspected */
1435  if (flush_io_cache(t_file) ||
1436  open_cached_file(&t_file2,mysql_tmpdir,TEMP_PREFIX,DISK_BUFFER_SIZE,
1437  MYF(MY_WME)))
1438  DBUG_RETURN(1); /* purecov: inspected */
1439 
1440  from_file= t_file ; to_file= &t_file2;
1441  while (*maxbuffer >= MERGEBUFF2)
1442  {
1443  if (reinit_io_cache(from_file,READ_CACHE,0L,0,0))
1444  goto cleanup;
1445  if (reinit_io_cache(to_file,WRITE_CACHE,0L,0,0))
1446  goto cleanup;
1447  lastbuff=buffpek;
1448  for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF)
1449  {
1450  if (merge_buffers(param,from_file,to_file,sort_buffer,lastbuff++,
1451  buffpek+i,buffpek+i+MERGEBUFF-1,0))
1452  goto cleanup;
1453  }
1454  if (merge_buffers(param,from_file,to_file,sort_buffer,lastbuff++,
1455  buffpek+i,buffpek+ *maxbuffer,0))
1456  break; /* purecov: inspected */
1457  if (flush_io_cache(to_file))
1458  break; /* purecov: inspected */
1459  temp=from_file; from_file=to_file; to_file=temp;
1460  setup_io_cache(from_file);
1461  setup_io_cache(to_file);
1462  *maxbuffer= (uint) (lastbuff-buffpek)-1;
1463  }
1464 cleanup:
1465  close_cached_file(to_file); // This holds old result
1466  if (to_file == t_file)
1467  {
1468  *t_file=t_file2; // Copy result file
1469  setup_io_cache(t_file);
1470  }
1471 
1472  DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */
1473 } /* merge_many_buff */
1474 
1475 
1483 uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
1484  uint rec_length)
1485 {
1486  register uint count;
1487  uint length;
1488 
1489  if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
1490  {
1491  if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
1492  (length= rec_length*count),
1493  buffpek->file_pos, MYF_RW))
1494  return((uint) -1); /* purecov: inspected */
1495  buffpek->key=buffpek->base;
1496  buffpek->file_pos+= length; /* New filepos */
1497  buffpek->count-= count;
1498  buffpek->mem_count= count;
1499  }
1500  return (count*rec_length);
1501 } /* read_to_buffer */
1502 
1503 
1515 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
1516 {
1517  uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
1518  for (uint i= 0; i < queue->elements; ++i)
1519  {
1520  BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
1521  if (bp->base + bp->max_keys * key_length == reuse->base)
1522  {
1523  bp->max_keys+= reuse->max_keys;
1524  return;
1525  }
1526  else if (bp->base == reuse_end)
1527  {
1528  bp->base= reuse->base;
1529  bp->max_keys+= reuse->max_keys;
1530  return;
1531  }
1532  }
1533  DBUG_ASSERT(0);
1534 }
1535 
1536 
1555 int merge_buffers(Sort_param *param, IO_CACHE *from_file,
1556  IO_CACHE *to_file, uchar *sort_buffer,
1557  BUFFPEK *lastbuff, BUFFPEK *Fb, BUFFPEK *Tb,
1558  int flag)
1559 {
1560  int error;
1561  uint rec_length,res_length,offset;
1562  size_t sort_length;
1563  ulong maxcount;
1564  ha_rows max_rows,org_max_rows;
1565  my_off_t to_start_filepos;
1566  uchar *strpos;
1567  BUFFPEK *buffpek;
1568  QUEUE queue;
1569  qsort2_cmp cmp;
1570  void *first_cmp_arg;
1571  volatile THD::killed_state *killed= &current_thd->killed;
1572  THD::killed_state not_killable;
1573  DBUG_ENTER("merge_buffers");
1574 
1575  current_thd->inc_status_sort_merge_passes();
1576  if (param->not_killable)
1577  {
1578  killed= &not_killable;
1579  not_killable= THD::NOT_KILLED;
1580  }
1581 
1582  error=0;
1583  rec_length= param->rec_length;
1584  res_length= param->res_length;
1585  sort_length= param->sort_length;
1586  offset= rec_length-res_length;
1587  maxcount= (ulong) (param->max_keys_per_buffer / ((uint) (Tb-Fb) +1));
1588  to_start_filepos= my_b_tell(to_file);
1589  strpos= (uchar*) sort_buffer;
1590  org_max_rows=max_rows= param->max_rows;
1591 
1592  /* The following will fire if there is not enough space in sort_buffer */
1593  DBUG_ASSERT(maxcount!=0);
1594 
1595  if (param->unique_buff)
1596  {
1597  cmp= param->compare;
1598  first_cmp_arg= (void *) &param->cmp_context;
1599  }
1600  else
1601  {
1602  cmp= get_ptr_compare(sort_length);
1603  first_cmp_arg= (void*) &sort_length;
1604  }
1605  if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
1606  (queue_compare) cmp, first_cmp_arg))
1607  DBUG_RETURN(1); /* purecov: inspected */
1608  for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
1609  {
1610  buffpek->base= strpos;
1611  buffpek->max_keys= maxcount;
1612  strpos+=
1613  (uint) (error= (int)read_to_buffer(from_file, buffpek, rec_length));
1614  if (error == -1)
1615  goto err; /* purecov: inspected */
1616  buffpek->max_keys= buffpek->mem_count; // If less data in buffers than expected
1617  queue_insert(&queue, (uchar*) buffpek);
1618  }
1619 
1620  if (param->unique_buff)
1621  {
1622  /*
1623  Called by Unique::get()
1624  Copy the first argument to param->unique_buff for unique removal.
1625  Store it also in 'to_file'.
1626 
1627  This is safe as we know that there is always more than one element
1628  in each block to merge (This is guaranteed by the Unique:: algorithm
1629  */
1630  buffpek= (BUFFPEK*) queue_top(&queue);
1631  memcpy(param->unique_buff, buffpek->key, rec_length);
1632  if (my_b_write(to_file, (uchar*) buffpek->key, rec_length))
1633  {
1634  error=1; goto err; /* purecov: inspected */
1635  }
1636  buffpek->key+= rec_length;
1637  buffpek->mem_count--;
1638  if (!--max_rows)
1639  {
1640  error= 0; /* purecov: inspected */
1641  goto end; /* purecov: inspected */
1642  }
1643  queue_replaced(&queue); // Top element has been used
1644  }
1645  else
1646  cmp= 0; // Not unique
1647 
1648  while (queue.elements > 1)
1649  {
1650  if (*killed)
1651  {
1652  error= 1; goto err; /* purecov: inspected */
1653  }
1654  for (;;)
1655  {
1656  buffpek= (BUFFPEK*) queue_top(&queue);
1657  if (cmp) // Remove duplicates
1658  {
1659  if (!(*cmp)(first_cmp_arg, &(param->unique_buff),
1660  (uchar**) &buffpek->key))
1661  goto skip_duplicate;
1662  memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length);
1663  }
1664  if (flag == 0)
1665  {
1666  if (my_b_write(to_file,(uchar*) buffpek->key, rec_length))
1667  {
1668  error=1; goto err; /* purecov: inspected */
1669  }
1670  }
1671  else
1672  {
1673  if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length))
1674  {
1675  error=1; goto err; /* purecov: inspected */
1676  }
1677  }
1678  if (!--max_rows)
1679  {
1680  error= 0; /* purecov: inspected */
1681  goto end; /* purecov: inspected */
1682  }
1683 
1684  skip_duplicate:
1685  buffpek->key+= rec_length;
1686  if (! --buffpek->mem_count)
1687  {
1688  if (!(error= (int) read_to_buffer(from_file,buffpek,
1689  rec_length)))
1690  {
1691  (void) queue_remove(&queue,0);
1692  reuse_freed_buff(&queue, buffpek, rec_length);
1693  break; /* One buffer have been removed */
1694  }
1695  else if (error == -1)
1696  goto err; /* purecov: inspected */
1697  }
1698  queue_replaced(&queue); /* Top element has been replaced */
1699  }
1700  }
1701  buffpek= (BUFFPEK*) queue_top(&queue);
1702  buffpek->base= sort_buffer;
1703  buffpek->max_keys= param->max_keys_per_buffer;
1704 
1705  /*
1706  As we know all entries in the buffer are unique, we only have to
1707  check if the first one is the same as the last one we wrote
1708  */
1709  if (cmp)
1710  {
1711  if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key))
1712  {
1713  buffpek->key+= rec_length; // Remove duplicate
1714  --buffpek->mem_count;
1715  }
1716  }
1717 
1718  do
1719  {
1720  if ((ha_rows) buffpek->mem_count > max_rows)
1721  { /* Don't write too many records */
1722  buffpek->mem_count= (uint) max_rows;
1723  buffpek->count= 0; /* Don't read more */
1724  }
1725  max_rows-= buffpek->mem_count;
1726  if (flag == 0)
1727  {
1728  if (my_b_write(to_file,(uchar*) buffpek->key,
1729  (rec_length*buffpek->mem_count)))
1730  {
1731  error= 1; goto err; /* purecov: inspected */
1732  }
1733  }
1734  else
1735  {
1736  register uchar *end;
1737  strpos= buffpek->key+offset;
1738  for (end= strpos+buffpek->mem_count*rec_length ;
1739  strpos != end ;
1740  strpos+= rec_length)
1741  {
1742  if (my_b_write(to_file, (uchar *) strpos, res_length))
1743  {
1744  error=1; goto err;
1745  }
1746  }
1747  }
1748  }
1749  while ((error=(int) read_to_buffer(from_file,buffpek, rec_length))
1750  != -1 && error != 0);
1751 
1752 end:
1753  lastbuff->count= min(org_max_rows-max_rows, param->max_rows);
1754  lastbuff->file_pos= to_start_filepos;
1755 err:
1756  delete_queue(&queue);
1757  DBUG_RETURN(error);
1758 } /* merge_buffers */
1759 
1760 
1761  /* Do a merge to output-file (save only positions) */
1762 
1763 static int merge_index(Sort_param *param, uchar *sort_buffer,
1764  BUFFPEK *buffpek, uint maxbuffer,
1765  IO_CACHE *tempfile, IO_CACHE *outfile)
1766 {
1767  DBUG_ENTER("merge_index");
1768  if (merge_buffers(param,tempfile,outfile,sort_buffer,buffpek,buffpek,
1769  buffpek+maxbuffer,1))
1770  DBUG_RETURN(1); /* purecov: inspected */
1771  DBUG_RETURN(0);
1772 } /* merge_index */
1773 
1774 
1775 static uint suffix_length(ulong string_length)
1776 {
1777  if (string_length < 256)
1778  return 1;
1779  if (string_length < 256L*256L)
1780  return 2;
1781  if (string_length < 256L*256L*256L)
1782  return 3;
1783  return 4; // Can't sort longer than 4G
1784 }
1785 
1786 
1787 
1806 uint
1807 sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
1808  bool *multi_byte_charset)
1809 {
1810  uint total_length= 0;
1811  const CHARSET_INFO *cs;
1812  *multi_byte_charset= false;
1813 
1814  for (; s_length-- ; sortorder++)
1815  {
1816  sortorder->need_strxnfrm= 0;
1817  sortorder->suffix_length= 0;
1818  if (sortorder->field)
1819  {
1820  cs= sortorder->field->sort_charset();
1821  sortorder->length= sortorder->field->sort_length();
1822 
1823  if (use_strnxfrm((cs=sortorder->field->sort_charset())))
1824  {
1825  sortorder->need_strxnfrm= 1;
1826  *multi_byte_charset= 1;
1827  sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
1828  }
1829  if (sortorder->field->maybe_null())
1830  total_length++; // Place for NULL marker
1831 
1832  if (sortorder->field->result_type() == STRING_RESULT &&
1833  !sortorder->field->is_temporal())
1834  {
1835  set_if_smaller(sortorder->length, thd->variables.max_sort_length);
1836  }
1837  }
1838  else
1839  {
1840  sortorder->result_type= sortorder->item->result_type();
1841  if (sortorder->item->is_temporal())
1842  sortorder->result_type= INT_RESULT;
1843  switch (sortorder->result_type) {
1844  case STRING_RESULT:
1845  sortorder->length= sortorder->item->max_length;
1846  set_if_smaller(sortorder->length, thd->variables.max_sort_length);
1847  if (use_strnxfrm((cs=sortorder->item->collation.collation)))
1848  {
1849  sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
1850  sortorder->need_strxnfrm= 1;
1851  *multi_byte_charset= 1;
1852  }
1853  else if (cs == &my_charset_bin)
1854  {
1855  /* Store length last to be able to sort blob/varbinary */
1856  sortorder->suffix_length= suffix_length(sortorder->length);
1857  sortorder->length+= sortorder->suffix_length;
1858  }
1859  break;
1860  case INT_RESULT:
1861 #if SIZEOF_LONG_LONG > 4
1862  sortorder->length=8; // Size of intern longlong
1863 #else
1864  sortorder->length=4;
1865 #endif
1866  break;
1867  case DECIMAL_RESULT:
1868  sortorder->length=
1869  my_decimal_get_binary_size(sortorder->item->max_length -
1870  (sortorder->item->decimals ? 1 : 0),
1871  sortorder->item->decimals);
1872  break;
1873  case REAL_RESULT:
1874  sortorder->length=sizeof(double);
1875  break;
1876  case ROW_RESULT:
1877  default:
1878  // This case should never be choosen
1879  DBUG_ASSERT(0);
1880  break;
1881  }
1882  if (sortorder->item->maybe_null)
1883  total_length++; // Place for NULL marker
1884  }
1885  total_length+= sortorder->length;
1886  }
1887  sortorder->field= NULL; // end marker
1888  DBUG_PRINT("info",("sort_length: %u", total_length));
1889  return total_length;
1890 }
1891 
1892 
1920 static SORT_ADDON_FIELD *
1921 get_addon_fields(ulong max_length_for_sort_data,
1922  Field **ptabfield, uint sortlength, uint *plength)
1923 {
1924  Field **pfield;
1925  Field *field;
1926  SORT_ADDON_FIELD *addonf;
1927  uint length= 0;
1928  uint fields= 0;
1929  uint null_fields= 0;
1930  MY_BITMAP *read_set= (*ptabfield)->table->read_set;
1931 
1932  /*
1933  If there is a reference to a field in the query add it
1934  to the the set of appended fields.
1935  Note for future refinement:
1936  This this a too strong condition.
1937  Actually we need only the fields referred in the
1938  result set. And for some of them it makes sense to use
1939  the values directly from sorted fields.
1940  */
1941  *plength= 0;
1942 
1943  for (pfield= ptabfield; (field= *pfield) ; pfield++)
1944  {
1945  if (!bitmap_is_set(read_set, field->field_index))
1946  continue;
1947  if (field->flags & BLOB_FLAG)
1948  return 0;
1949  length+= field->max_packed_col_length(field->pack_length());
1950  if (field->maybe_null())
1951  null_fields++;
1952  fields++;
1953  }
1954  if (!fields)
1955  return 0;
1956  length+= (null_fields+7)/8;
1957 
1958  if (length+sortlength > max_length_for_sort_data ||
1959  !(addonf= (SORT_ADDON_FIELD *) my_malloc(sizeof(SORT_ADDON_FIELD)*
1960  (fields+1), MYF(MY_WME))))
1961  return 0;
1962 
1963  *plength= length;
1964  length= (null_fields+7)/8;
1965  null_fields= 0;
1966  for (pfield= ptabfield; (field= *pfield) ; pfield++)
1967  {
1968  if (!bitmap_is_set(read_set, field->field_index))
1969  continue;
1970  addonf->field= field;
1971  addonf->offset= length;
1972  if (field->maybe_null())
1973  {
1974  addonf->null_offset= null_fields/8;
1975  addonf->null_bit= 1<<(null_fields & 7);
1976  null_fields++;
1977  }
1978  else
1979  {
1980  addonf->null_offset= 0;
1981  addonf->null_bit= 0;
1982  }
1983  addonf->length= field->max_packed_col_length(field->pack_length());
1984  length+= addonf->length;
1985  addonf++;
1986  }
1987  addonf->field= 0; // Put end marker
1988 
1989  DBUG_PRINT("info",("addon_length: %d",length));
1990  return (addonf-fields);
1991 }
1992 
1993 
2009 static void
2010 unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff)
2011 {
2012  Field *field;
2013  SORT_ADDON_FIELD *addonf= addon_field;
2014 
2015  for ( ; (field= addonf->field) ; addonf++)
2016  {
2017  if (addonf->null_bit && (addonf->null_bit & buff[addonf->null_offset]))
2018  {
2019  field->set_null();
2020  continue;
2021  }
2022  field->set_notnull();
2023  field->unpack(field->ptr, buff + addonf->offset);
2024  }
2025 }
2026 
2027 /*
2028 ** functions to change a double or float to a sortable string
2029 ** The following should work for IEEE
2030 */
2031 
2032 #define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG)
2033 
2034 void change_double_for_sort(double nr,uchar *to)
2035 {
2036  uchar *tmp=(uchar*) to;
2037  if (nr == 0.0)
2038  { /* Change to zero string */
2039  tmp[0]=(uchar) 128;
2040  memset(tmp+1, 0, sizeof(nr)-1);
2041  }
2042  else
2043  {
2044 #ifdef WORDS_BIGENDIAN
2045  memcpy(tmp, &nr, sizeof(nr));
2046 #else
2047  {
2048  uchar *ptr= (uchar*) &nr;
2049 #if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
2050  tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0];
2051  tmp[4]= ptr[7]; tmp[5]=ptr[6]; tmp[6]= ptr[5]; tmp[7]=ptr[4];
2052 #else
2053  tmp[0]= ptr[7]; tmp[1]=ptr[6]; tmp[2]= ptr[5]; tmp[3]=ptr[4];
2054  tmp[4]= ptr[3]; tmp[5]=ptr[2]; tmp[6]= ptr[1]; tmp[7]=ptr[0];
2055 #endif
2056  }
2057 #endif
2058  if (tmp[0] & 128) /* Negative */
2059  { /* make complement */
2060  uint i;
2061  for (i=0 ; i < sizeof(nr); i++)
2062  tmp[i]=tmp[i] ^ (uchar) 255;
2063  }
2064  else
2065  { /* Set high and move exponent one up */
2066  ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] |
2067  (ushort) 32768);
2068  exp_part+= (ushort) 1 << (16-1-DBL_EXP_DIG);
2069  tmp[0]= (uchar) (exp_part >> 8);
2070  tmp[1]= (uchar) exp_part;
2071  }
2072  }
2073 }