MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
filesort_compare-t.cc
1 /* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 // First include (the generated) my_config.h, to get correct platform defines.
17 #include "my_config.h"
18 #include <gtest/gtest.h>
19 
20 #include "filesort_utils.h"
21 
22 #include <algorithm>
23 #include <memory>
24 #include <vector>
25 
26 namespace filesort_compare_unittest {
27 
28 /*
29  Below are some performance microbenchmarks in order to compare our sorting
30  options:
31  my_qsort2 - requires no extra memory, uses insert sort on small ranges,
32  uses quicksort on larger ranges
33  radixsort - requires extra memory: array of n pointers,
34  seems to be quite fast on intel *when it is appliccable*:
35  if (size <= 20 && items >= 1000 && items < 100000)
36  std::sort - requires no extra memory,
37  typically implemented with introsort/insertion sort
38  std::stable_sort - requires extra memory: array of n pointers,
39  typically implemented with mergesort
40 
41  The record format for filesort is constructed in such a way that we can
42  compare records byte-by-byte, without knowing the data types.
43  Nullable fields (maybe_null()) are pre-pended with an extra byte.
44  If we are sorting in descending mode, all the bytes are simply flipped.
45 
46  This means that any variant of memcmp() can be used for comparing record.
47  Below we test different variants, including memcmp() itself.
48 */
49 
50 // A simple helper function to determine array size.
51 template <class T, int size>
52 int array_size(const T (&)[size])
53 {
54  return size;
55 }
56 
57 inline int bytes_to_int(const uchar *s)
58 {
59  int val;
60  longget(val, s);
61  return val ^ 0x80000000;
62 }
63 
64 inline void int_to_bytes(uchar *s, int val)
65 {
66  val= val ^ 0x80000000;
67  longstore(s, val);
68 }
69 
70 
71 TEST(BufferAlignmentTest, IntsToBytesToInt)
72 {
73  uchar buf[10];
74  memset(buf, 0, sizeof(buf));
75  for (int ix= 0; ix < 6; ++ix)
76  {
77  int test_data[]= { INT_MIN32, -42, -1, 0, 1, 42, INT_MAX32 };
78  for (int iy= 0; iy < array_size(test_data); ++iy)
79  {
80  int val= test_data[iy];
81  int_to_bytes(buf+ix, val);
82  EXPECT_EQ(val, bytes_to_int(buf+ix));
83  }
84  }
85 }
86 
87 
88 class FileSortCompareTest : public ::testing::Test
89 {
90 protected:
91  // Do each sort algorithm this many times. Increase value for benchmarking!
92  static const int num_iterations= 1;
93  // Number of records.
94  static const int num_records= 100 * 1000;
95  // Number of keys in each record.
96  static const int keys_per_record= 4;
97  // Size of each record.
98  static const int record_size= keys_per_record * sizeof(int);
99 
100  // Static buffer containing data to be sorted.
101  // (actually: we only sort the sort_keys below, data is stable).
102  static std::vector<int> test_data;
103 
104  static void SetUpTestCase()
105  {
106  test_data.reserve(num_records * keys_per_record);
107  union { int val; uchar buf[sizeof(int)]; } sort_str;
108 
109  for (int ix= 0; ix < num_records * keys_per_record; ++ix)
110  {
111  int val= ix / (10 * keys_per_record);
112  if (ix % 10 == 0) val= -val;
113  int_to_bytes(sort_str.buf, val);
114  test_data.push_back(sort_str.val);
115  }
116  // Comment away shuffling for testing partially pre-sorted data.
117  std::random_shuffle(test_data.begin(), test_data.end());
118  }
119 
120  static void TearDownTestCase()
121  {
122  // Delete the data now, rather than during exit().
123  std::vector<int>().swap(test_data);
124  }
125 
126  virtual void SetUp()
127  {
128  sort_keys= new uchar* [num_records];
129  for (int ix= 0; ix < num_records; ++ix)
130  sort_keys[ix]=
131  static_cast<uchar*>(static_cast<void*>(&test_data[keys_per_record*ix]));
132  }
133 
134  virtual void TearDown()
135  {
136  delete[] sort_keys;
137  }
138 
139  uchar **sort_keys;
140 };
141 std::vector<int> FileSortCompareTest::test_data;
142 
143 /*
144  Some different mem_compare functions.
145  The first one seems to win on all platforms, except sparc,
146  where the builtin memcmp() wins.
147  */
148 inline bool mem_compare_1(const uchar *s1, const uchar *s2, size_t len)
149 {
150  do {
151  if (*s1++ != *s2++)
152  return *--s1 < *--s2;
153  } while (--len != 0);
154  return false;
155 }
156 
157 inline bool mem_compare_2(const uchar *s1, const uchar *s2, size_t len)
158 {
159  int v= 0;
160  while (len-- > 0 && v == 0)
161  {
162  v= *(s1++) - *(s2++);
163  }
164  return v < 0;
165 }
166 
167 inline bool mem_compare_3(const uchar *s1, const uchar *s2, size_t len)
168 {
169  while (--len && (s1[0] == s2[0]))
170  {
171  ++s1; ++s2;
172  }
173  return s1[0] < s2[0];
174 }
175 
176 #if defined(__WIN__)
177 #pragma intrinsic(memcmp)
178 #endif
179 // For gcc, __builtin_memcmp is actually *slower* than the library call:
180 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
181 
182 
184  public std::binary_function<const uchar*, const uchar*, bool>
185 {
186 public:
187  Mem_compare_memcmp(size_t n) : m_size(n) {}
188  bool operator()(const uchar *s1, const uchar *s2)
189  {
190  return memcmp(s1, s2, m_size) < 0;
191  }
192  size_t m_size;
193 };
194 
195 
197  public std::binary_function<const uchar*, const uchar*, bool>
198 {
199 public:
200  Mem_compare_1(size_t n) : m_size(n) {}
201  bool operator()(const uchar *s1, const uchar *s2)
202  {
203  return mem_compare_1(s1, s2, m_size);
204  }
205  size_t m_size;
206 };
207 
208 
210  public std::binary_function<const uchar*, const uchar*, bool>
211 {
212 public:
213  Mem_compare_2(size_t n) : m_size(n) {}
214  bool operator()(const uchar *s1, const uchar *s2)
215  {
216  return mem_compare_2(s1, s2, m_size);
217  }
218  size_t m_size;
219 };
220 
221 
223  public std::binary_function<const uchar*, const uchar*, bool>
224 {
225 public:
226  Mem_compare_3(size_t n) : m_size(n) {}
227  bool operator()(const uchar *s1, const uchar *s2)
228  {
229  return mem_compare_3(s1, s2, m_size);
230  }
231  size_t m_size;
232 };
233 
234 
235 #define COMPARE(N) if (s1[N] != s2[N]) return s1[N] < s2[N]
236 
238  public std::binary_function<const uchar*, const uchar*, bool>
239 {
240 public:
241  Mem_compare_0(size_t n) : m_size(n) {}
242  bool operator()(const uchar *s1, const uchar *s2)
243  {
244  size_t len= m_size;
245  while(len > 0)
246  {
247  COMPARE(0);
248  COMPARE(1);
249  COMPARE(2);
250  COMPARE(3);
251  len-= 4;
252  s1 += 4;
253  s2 += 4;
254  }
255  return false;
256  }
257  size_t m_size;
258 };
259 
260 
261 // This one works for any number of keys.
262 // We treat the first key as int, the rest byte-by-byte.
264  public std::binary_function<const uchar*, const uchar*, bool>
265 {
266 public:
267  Mem_compare_int(size_t n) : m_size(n), rest(n - sizeof(int)) {}
268  bool operator()(const uchar *s1, const uchar *s2)
269  {
270  int int1= bytes_to_int(s1);
271  int int2= bytes_to_int(s2);
272  if (int1 == int2)
273  return mem_compare_1(s1 + rest, s2 + rest, rest);
274  return int1 < int2;
275  }
276 private:
277  size_t m_size;
278  const size_t rest;
279 };
280 
282  public std::binary_function<const uchar*, const uchar*, bool>
283 {
284 public:
285  Mem_compare_int_4(size_t) : keyno(1) {}
286  bool operator() (const uchar *s1, const uchar *s2)
287  {
288  int inta1= bytes_to_int(s1);
289  int intb1= bytes_to_int(s2);
290  if (keyno < 4 && inta1 == intb1)
291  {
292  ++keyno;
293  return operator()(s1 + sizeof(int), s2 + sizeof(int));
294  }
295  return inta1 < intb1;
296  }
297  int keyno;
298 };
299 
300 /*
301  Several sorting tests below, each one runs num_iterations.
302  For each iteration we take a copy of the key pointers, and sort the copy.
303  Most of the tests below are run with std::sort and std::stable_sort.
304  Stable sort seems to be faster for all test cases, on all platforms.
305  */
306 TEST_F(FileSortCompareTest, SetUpOnly)
307 {
308  for (int ix= 0; ix < num_iterations; ++ix)
309  {
310  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
311  }
312 }
313 
314 TEST_F(FileSortCompareTest, RadixSort)
315 {
316  for (int ix= 0; ix < num_iterations; ++ix)
317  {
318  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
319  std::pair<uchar**, ptrdiff_t> buffer=
320  std::get_temporary_buffer<uchar*>(num_records);
321  radixsort_for_str_ptr(&keys[0], num_records, record_size, buffer.first);
322  std::return_temporary_buffer(buffer.first);
323  }
324 }
325 
326 TEST_F(FileSortCompareTest, MyQsort)
327 {
328  size_t size= record_size;
329  for (int ix= 0; ix < num_iterations; ++ix)
330  {
331  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
332  my_qsort2((uchar*) &keys[0], num_records, sizeof(uchar*),
333  get_ptr_compare(record_size), &size);
334  }
335 }
336 
337 TEST_F(FileSortCompareTest, StdSortmemcmp)
338 {
339  for (int ix= 0; ix < num_iterations; ++ix)
340  {
341  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
342  std::sort(keys.begin(), keys.end(), Mem_compare_memcmp(record_size));
343  }
344 }
345 
346 TEST_F(FileSortCompareTest, StdStableSortmemcmp)
347 {
348  for (int ix= 0; ix < num_iterations; ++ix)
349  {
350  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
351  std::stable_sort(keys.begin(), keys.end(),
352  Mem_compare_memcmp(record_size));
353  }
354 }
355 
356 TEST_F(FileSortCompareTest, StdSortCompare1)
357 {
358  for (int ix= 0; ix < num_iterations; ++ix)
359  {
360  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
361  std::sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
362  }
363 }
364 
365 TEST_F(FileSortCompareTest, StdStableSortCompare1)
366 {
367  for (int ix= 0; ix < num_iterations; ++ix)
368  {
369  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
370  std::stable_sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
371  }
372 }
373 
374 TEST_F(FileSortCompareTest, StdSortCompare2)
375 {
376  for (int ix= 0; ix < num_iterations; ++ix)
377  {
378  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
379  std::sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
380  }
381 }
382 
383 TEST_F(FileSortCompareTest, StdStableSortCompare2)
384 {
385  for (int ix= 0; ix < num_iterations; ++ix)
386  {
387  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
388  std::stable_sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
389  }
390 }
391 
392 TEST_F(FileSortCompareTest, StdSortCompare3)
393 {
394  for (int ix= 0; ix < num_iterations; ++ix)
395  {
396  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
397  std::sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
398  }
399 }
400 
401 TEST_F(FileSortCompareTest, StdStableSortCompare3)
402 {
403  for (int ix= 0; ix < num_iterations; ++ix)
404  {
405  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
406  std::stable_sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
407  }
408 }
409 
410 TEST_F(FileSortCompareTest, StdSortCompare4)
411 {
412  for (int ix= 0; ix < num_iterations; ++ix)
413  {
414  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
415  std::sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
416  }
417 }
418 
419 TEST_F(FileSortCompareTest, StdStableSortCompare4)
420 {
421  for (int ix= 0; ix < num_iterations; ++ix)
422  {
423  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
424  std::stable_sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
425  }
426 }
427 
428 TEST_F(FileSortCompareTest, DISABLED_StdSortIntCompare)
429 {
430  for (int ix= 0; ix < num_iterations; ++ix)
431  {
432  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
433  std::sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
434  }
435 }
436 
437 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntCompare)
438 {
439  for (int ix= 0; ix < num_iterations; ++ix)
440  {
441  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
442  std::stable_sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
443  }
444 }
445 
446 TEST_F(FileSortCompareTest, DISABLED_StdSortIntIntIntInt)
447 {
448  for (int ix= 0; ix < num_iterations; ++ix)
449  {
450  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
451  std::sort(keys.begin(), keys.end(),
452  Mem_compare_int_4(record_size));
453  }
454 }
455 
456 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntIntIntInt)
457 {
458  for (int ix= 0; ix < num_iterations; ++ix)
459  {
460  std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
461  std::stable_sort(keys.begin(), keys.end(),
462  Mem_compare_int_4(record_size));
463  }
464 }
465 
466 } // namespace