MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
btr0btr.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2012, Facebook Inc.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************/
27 #include "btr0btr.h"
28 
29 #ifdef UNIV_NONINL
30 #include "btr0btr.ic"
31 #endif
32 
33 #include "fsp0fsp.h"
34 #include "page0page.h"
35 #include "page0zip.h"
36 
37 #ifndef UNIV_HOTBACKUP
38 #include "btr0cur.h"
39 #include "btr0sea.h"
40 #include "btr0pcur.h"
41 #include "rem0cmp.h"
42 #include "lock0lock.h"
43 #include "ibuf0ibuf.h"
44 #include "trx0trx.h"
45 #include "srv0mon.h"
46 
47 /**************************************************************/
51 UNIV_INTERN
52 ibool
54 /*====================*/
55  btr_cur_t* cursor,
56  ulint page_no,
57  buf_block_t** merge_block,
58  mtr_t* mtr);
60 #endif /* UNIV_HOTBACKUP */
61 
62 /**************************************************************/
64 UNIV_INTERN
65 void
67 /*==================*/
68  const buf_block_t* block,
69  const dict_index_t* index)
70 {
71  fprintf(stderr, "InnoDB: flag mismatch in space %u page %u"
72  " index %s of table %s\n",
73  (unsigned) buf_block_get_space(block),
74  (unsigned) buf_block_get_page_no(block),
75  index->name, index->table_name);
76  if (block->page.zip.data) {
77  buf_page_print(block->page.zip.data,
80  }
81  buf_page_print(buf_block_get_frame(block), 0, 0);
82 }
83 
84 #ifndef UNIV_HOTBACKUP
85 #ifdef UNIV_BLOB_DEBUG
86 # include "srv0srv.h"
87 # include "ut0rbt.h"
88 
90 static ibool btr_blob_dbg_msg;
91 
96 #define btr_blob_dbg_msg_issue(op, b, ctx) \
97  fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n", \
98  (b)->ref_page_no, (b)->ref_heap_no, \
99  (b)->ref_field_no, (b)->blob_page_no, ctx, \
100  (b)->owner, (b)->always_owner, (b)->del)
101 
106 UNIV_INTERN
107 void
108 btr_blob_dbg_rbt_insert(
109 /*====================*/
111  const btr_blob_dbg_t* b,
112  const char* ctx)
113 {
114  if (btr_blob_dbg_msg) {
115  btr_blob_dbg_msg_issue("insert", b, ctx);
116  }
117  mutex_enter(&index->blobs_mutex);
118  rbt_insert(index->blobs, b, b);
119  mutex_exit(&index->blobs_mutex);
120 }
121 
126 UNIV_INTERN
127 void
128 btr_blob_dbg_rbt_delete(
129 /*====================*/
130  dict_index_t* index,
131  const btr_blob_dbg_t* b,
132  const char* ctx)
133 {
134  if (btr_blob_dbg_msg) {
135  btr_blob_dbg_msg_issue("delete", b, ctx);
136  }
137  mutex_enter(&index->blobs_mutex);
138  ut_a(rbt_delete(index->blobs, b));
139  mutex_exit(&index->blobs_mutex);
140 }
141 
142 /**************************************************************/
146 static
147 int
148 btr_blob_dbg_cmp(
149 /*=============*/
150  const void* a,
151  const void* b)
152 {
153  const btr_blob_dbg_t* aa = static_cast<const btr_blob_dbg_t*>(a);
154  const btr_blob_dbg_t* bb = static_cast<const btr_blob_dbg_t*>(b);
155 
156  ut_ad(aa != NULL);
157  ut_ad(bb != NULL);
158 
159  if (aa->ref_page_no != bb->ref_page_no) {
160  return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
161  }
162  if (aa->ref_heap_no != bb->ref_heap_no) {
163  return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
164  }
165  if (aa->ref_field_no != bb->ref_field_no) {
166  return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
167  }
168  return(0);
169 }
170 
171 /**************************************************************/
173 UNIV_INTERN
174 void
175 btr_blob_dbg_add_blob(
176 /*==================*/
177  const rec_t* rec,
178  ulint field_no,
179  ulint page_no,
180  dict_index_t* index,
181  const char* ctx)
182 {
183  btr_blob_dbg_t b;
184  const page_t* page = page_align(rec);
185 
186  ut_a(index->blobs);
187 
188  b.blob_page_no = page_no;
189  b.ref_page_no = page_get_page_no(page);
190  b.ref_heap_no = page_rec_get_heap_no(rec);
191  b.ref_field_no = field_no;
192  ut_a(b.ref_field_no >= index->n_uniq);
193  b.always_owner = b.owner = TRUE;
194  b.del = FALSE;
195  ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
196  btr_blob_dbg_rbt_insert(index, &b, ctx);
197 }
198 
199 /**************************************************************/
202 UNIV_INTERN
203 ulint
204 btr_blob_dbg_add_rec(
205 /*=================*/
206  const rec_t* rec,
207  dict_index_t* index,
208  const ulint* offsets,
209  const char* ctx)
210 {
211  ulint count = 0;
212  ulint i;
213  btr_blob_dbg_t b;
214  ibool del;
215 
216  ut_ad(rec_offs_validate(rec, index, offsets));
217 
218  if (!rec_offs_any_extern(offsets)) {
219  return(0);
220  }
221 
222  b.ref_page_no = page_get_page_no(page_align(rec));
223  b.ref_heap_no = page_rec_get_heap_no(rec);
224  del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
225 
226  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
227  if (rec_offs_nth_extern(offsets, i)) {
228  ulint len;
229  const byte* field_ref = rec_get_nth_field(
230  rec, offsets, i, &len);
231 
232  ut_a(len != UNIV_SQL_NULL);
234  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
235 
236  if (!memcmp(field_ref, field_ref_zero,
237  BTR_EXTERN_FIELD_REF_SIZE)) {
238  /* the column has not been stored yet */
239  continue;
240  }
241 
242  b.ref_field_no = i;
243  b.blob_page_no = mach_read_from_4(
244  field_ref + BTR_EXTERN_PAGE_NO);
245  ut_a(b.ref_field_no >= index->n_uniq);
246  b.always_owner = b.owner
247  = !(field_ref[BTR_EXTERN_LEN]
249  b.del = del;
250 
251  btr_blob_dbg_rbt_insert(index, &b, ctx);
252  count++;
253  }
254  }
255 
256  return(count);
257 }
258 
259 /**************************************************************/
263 UNIV_INTERN
264 void
265 btr_blob_dbg_print(
266 /*===============*/
267  const dict_index_t* index)
268 {
269  const ib_rbt_node_t* node;
270 
271  if (!index->blobs) {
272  return;
273  }
274 
275  /* We intentionally do not acquire index->blobs_mutex here.
276  This function is to be called from a debugger, and the caller
277  should make sure that the index->blobs_mutex is held. */
278 
279  for (node = rbt_first(index->blobs);
280  node != NULL; node = rbt_next(index->blobs, node)) {
281  const btr_blob_dbg_t* b
282  = rbt_value(btr_blob_dbg_t, node);
283  fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
284  b->ref_page_no, b->ref_heap_no, b->ref_field_no,
285  b->blob_page_no,
286  b->owner ? "" : "(disowned)",
287  b->always_owner ? "" : "(has disowned)",
288  b->del ? "(deleted)" : "");
289  }
290 }
291 
292 /**************************************************************/
295 UNIV_INTERN
296 ulint
297 btr_blob_dbg_remove_rec(
298 /*====================*/
299  const rec_t* rec,
300  dict_index_t* index,
301  const ulint* offsets,
302  const char* ctx)
303 {
304  ulint i;
305  ulint count = 0;
306  btr_blob_dbg_t b;
307 
308  ut_ad(rec_offs_validate(rec, index, offsets));
309 
310  if (!rec_offs_any_extern(offsets)) {
311  return(0);
312  }
313 
314  b.ref_page_no = page_get_page_no(page_align(rec));
315  b.ref_heap_no = page_rec_get_heap_no(rec);
316 
317  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
318  if (rec_offs_nth_extern(offsets, i)) {
319  ulint len;
320  const byte* field_ref = rec_get_nth_field(
321  rec, offsets, i, &len);
322 
323  ut_a(len != UNIV_SQL_NULL);
324  ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
325  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
326 
327  b.ref_field_no = i;
328  b.blob_page_no = mach_read_from_4(
329  field_ref + BTR_EXTERN_PAGE_NO);
330 
331  switch (b.blob_page_no) {
332  case 0:
333  /* The column has not been stored yet.
334  The BLOB pointer must be all zero.
335  There cannot be a BLOB starting at
336  page 0, because page 0 is reserved for
337  the tablespace header. */
338  ut_a(!memcmp(field_ref, field_ref_zero,
339  BTR_EXTERN_FIELD_REF_SIZE));
340  /* fall through */
341  case FIL_NULL:
342  /* the column has been freed already */
343  continue;
344  }
345 
346  btr_blob_dbg_rbt_delete(index, &b, ctx);
347  count++;
348  }
349  }
350 
351  return(count);
352 }
353 
354 /**************************************************************/
358 UNIV_INTERN
359 ibool
360 btr_blob_dbg_is_empty(
361 /*==================*/
362  dict_index_t* index,
363  ulint page_no)
364 {
365  const ib_rbt_node_t* node;
366  ibool success = TRUE;
367 
368  if (!index->blobs) {
369  return(success);
370  }
371 
372  mutex_enter(&index->blobs_mutex);
373 
374  for (node = rbt_first(index->blobs);
375  node != NULL; node = rbt_next(index->blobs, node)) {
376  const btr_blob_dbg_t* b
377  = rbt_value(btr_blob_dbg_t, node);
378 
379  if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
380  continue;
381  }
382 
383  fprintf(stderr,
384  "InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
385  b->owner ? "" : "(disowned)",
386  b->always_owner ? "" : "(has disowned)",
387  b->del ? "(deleted)" : "",
388  b->ref_page_no, b->ref_heap_no, b->ref_field_no,
389  b->blob_page_no);
390 
391  if (b->blob_page_no != page_no || b->owner || !b->del) {
392  success = FALSE;
393  }
394  }
395 
396  mutex_exit(&index->blobs_mutex);
397  return(success);
398 }
399 
400 /**************************************************************/
403 UNIV_INTERN
404 ulint
405 btr_blob_dbg_op(
406 /*============*/
407  const page_t* page,
408  const rec_t* rec,
410  dict_index_t* index,
411  const char* ctx,
412  const btr_blob_dbg_op_f op)
413 {
414  ulint count = 0;
415  mem_heap_t* heap = NULL;
416  ulint offsets_[REC_OFFS_NORMAL_SIZE];
417  ulint* offsets = offsets_;
418  rec_offs_init(offsets_);
419 
421  ut_a(!rec || page_align(rec) == page);
422 
423  if (!index->blobs || !page_is_leaf(page)
424  || !dict_index_is_clust(index)) {
425  return(0);
426  }
427 
428  if (rec == NULL) {
429  rec = page_get_infimum_rec(page);
430  }
431 
432  do {
433  offsets = rec_get_offsets(rec, index, offsets,
434  ULINT_UNDEFINED, &heap);
435  count += op(rec, index, offsets, ctx);
436  rec = page_rec_get_next_const(rec);
437  } while (!page_rec_is_supremum(rec));
438 
439  if (heap) {
440  mem_heap_free(heap);
441  }
442 
443  return(count);
444 }
445 
446 /**************************************************************/
450 UNIV_INTERN
451 ulint
452 btr_blob_dbg_add(
453 /*=============*/
454  const page_t* page,
455  dict_index_t* index,
456  const char* ctx)
457 {
458  btr_blob_dbg_assert_empty(index, page_get_page_no(page));
459 
460  return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
461 }
462 
463 /**************************************************************/
468 UNIV_INTERN
469 ulint
470 btr_blob_dbg_remove(
471 /*================*/
472  const page_t* page,
473  dict_index_t* index,
474  const char* ctx)
475 {
476  ulint count;
477 
478  count = btr_blob_dbg_op(page, NULL, index, ctx,
479  btr_blob_dbg_remove_rec);
480 
481  /* Check that no references exist. */
482  btr_blob_dbg_assert_empty(index, page_get_page_no(page));
483 
484  return(count);
485 }
486 
487 /**************************************************************/
490 UNIV_INTERN
491 void
492 btr_blob_dbg_restore(
493 /*=================*/
494  const page_t* npage,
495  const page_t* page,
496  dict_index_t* index,
497  const char* ctx)
498 {
499  ulint removed;
500  ulint added;
501 
502  ut_a(page_get_page_no(npage) == page_get_page_no(page));
503  ut_a(page_get_space_id(npage) == page_get_space_id(page));
504 
505  removed = btr_blob_dbg_remove(npage, index, ctx);
506  added = btr_blob_dbg_add(page, index, ctx);
507  ut_a(added == removed);
508 }
509 
510 /**************************************************************/
512 UNIV_INTERN
513 void
514 btr_blob_dbg_set_deleted_flag(
515 /*==========================*/
516  const rec_t* rec,
517  dict_index_t* index,
518  const ulint* offsets,
519  ibool del)
520 {
521  const ib_rbt_node_t* node;
522  btr_blob_dbg_t b;
523  btr_blob_dbg_t* c;
524  ulint i;
525 
526  ut_ad(rec_offs_validate(rec, index, offsets));
527  ut_a(dict_index_is_clust(index));
528  ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
529 
530  if (!rec_offs_any_extern(offsets) || !index->blobs) {
531 
532  return;
533  }
534 
535  b.ref_page_no = page_get_page_no(page_align(rec));
536  b.ref_heap_no = page_rec_get_heap_no(rec);
537 
538  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
539  if (rec_offs_nth_extern(offsets, i)) {
540  ulint len;
541  const byte* field_ref = rec_get_nth_field(
542  rec, offsets, i, &len);
543 
544  ut_a(len != UNIV_SQL_NULL);
545  ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
546  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
547 
548  b.ref_field_no = i;
549  b.blob_page_no = mach_read_from_4(
550  field_ref + BTR_EXTERN_PAGE_NO);
551 
552  switch (b.blob_page_no) {
553  case 0:
554  ut_a(memcmp(field_ref, field_ref_zero,
555  BTR_EXTERN_FIELD_REF_SIZE));
556  /* page number 0 is for the
557  page allocation bitmap */
558  case FIL_NULL:
559  /* the column has been freed already */
560  ut_error;
561  }
562 
563  mutex_enter(&index->blobs_mutex);
564  node = rbt_lookup(index->blobs, &b);
565  ut_a(node);
566 
567  c = rbt_value(btr_blob_dbg_t, node);
568  /* The flag should be modified. */
569  c->del = del;
570  if (btr_blob_dbg_msg) {
571  b = *c;
572  mutex_exit(&index->blobs_mutex);
573  btr_blob_dbg_msg_issue("del_mk", &b, "");
574  } else {
575  mutex_exit(&index->blobs_mutex);
576  }
577  }
578  }
579 }
580 
581 /**************************************************************/
583 UNIV_INTERN
584 void
585 btr_blob_dbg_owner(
586 /*===============*/
587  const rec_t* rec,
588  dict_index_t* index,
589  const ulint* offsets,
590  ulint i,
591  ibool own)
592 {
593  const ib_rbt_node_t* node;
594  btr_blob_dbg_t b;
595  const byte* field_ref;
596  ulint len;
597 
598  ut_ad(rec_offs_validate(rec, index, offsets));
599  ut_a(rec_offs_nth_extern(offsets, i));
600 
601  field_ref = rec_get_nth_field(rec, offsets, i, &len);
602  ut_a(len != UNIV_SQL_NULL);
603  ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
604  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
605 
606  b.ref_page_no = page_get_page_no(page_align(rec));
607  b.ref_heap_no = page_rec_get_heap_no(rec);
608  b.ref_field_no = i;
609  b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
610  b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
611 
612  ut_a(b.owner == own);
613 
614  mutex_enter(&index->blobs_mutex);
615  node = rbt_lookup(index->blobs, &b);
616  /* row_ins_clust_index_entry_by_modify() invokes
617  btr_cur_unmark_extern_fields() also for the newly inserted
618  references, which are all zero bytes until the columns are stored.
619  The node lookup must fail if and only if that is the case. */
620  ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
621  == !node);
622 
623  if (node) {
624  btr_blob_dbg_t* c = rbt_value(btr_blob_dbg_t, node);
625  /* Some code sets ownership from TRUE to TRUE.
626  We do not allow changing ownership from FALSE to FALSE. */
627  ut_a(own || c->owner);
628 
629  c->owner = own;
630  if (!own) {
631  c->always_owner = FALSE;
632  }
633  }
634 
635  mutex_exit(&index->blobs_mutex);
636 }
637 #endif /* UNIV_BLOB_DEBUG */
638 
639 /*
640 Latching strategy of the InnoDB B-tree
641 --------------------------------------
642 A tree latch protects all non-leaf nodes of the tree. Each node of a tree
643 also has a latch of its own.
644 
645 A B-tree operation normally first acquires an S-latch on the tree. It
646 searches down the tree and releases the tree latch when it has the
647 leaf node latch. To save CPU time we do not acquire any latch on
648 non-leaf nodes of the tree during a search, those pages are only bufferfixed.
649 
650 If an operation needs to restructure the tree, it acquires an X-latch on
651 the tree before searching to a leaf node. If it needs, for example, to
652 split a leaf,
653 (1) InnoDB decides the split point in the leaf,
654 (2) allocates a new page,
655 (3) inserts the appropriate node pointer to the first non-leaf level,
656 (4) releases the tree X-latch,
657 (5) and then moves records from the leaf to the new allocated page.
658 
659 Node pointers
660 -------------
661 Leaf pages of a B-tree contain the index records stored in the
662 tree. On levels n > 0 we store 'node pointers' to pages on level
663 n - 1. For each page there is exactly one node pointer stored:
664 thus the our tree is an ordinary B-tree, not a B-link tree.
665 
666 A node pointer contains a prefix P of an index record. The prefix
667 is long enough so that it determines an index record uniquely.
668 The file page number of the child page is added as the last
669 field. To the child page we can store node pointers or index records
670 which are >= P in the alphabetical order, but < P1 if there is
671 a next node pointer on the level, and P1 is its prefix.
672 
673 If a node pointer with a prefix P points to a non-leaf child,
674 then the leftmost record in the child must have the same
675 prefix P. If it points to a leaf node, the child is not required
676 to contain any record with a prefix equal to P. The leaf case
677 is decided this way to allow arbitrary deletions in a leaf node
678 without touching upper levels of the tree.
679 
680 We have predefined a special minimum record which we
681 define as the smallest record in any alphabetical order.
682 A minimum record is denoted by setting a bit in the record
683 header. A minimum record acts as the prefix of a node pointer
684 which points to a leftmost node on any level of the tree.
685 
686 File page allocation
687 --------------------
688 In the root node of a B-tree there are two file segment headers.
689 The leaf pages of a tree are allocated from one file segment, to
690 make them consecutive on disk if possible. From the other file segment
691 we allocate pages for the non-leaf levels of the tree.
692 */
693 
694 #ifdef UNIV_BTR_DEBUG
695 /**************************************************************/
698 static
699 ibool
700 btr_root_fseg_validate(
701 /*===================*/
702  const fseg_header_t* seg_header,
703  ulint space)
704 {
705  ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
706 
707  ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
708  ut_a(offset >= FIL_PAGE_DATA);
709  ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
710  return(TRUE);
711 }
712 #endif /* UNIV_BTR_DEBUG */
713 
714 /**************************************************************/
717 static
719 btr_root_block_get(
720 /*===============*/
721  const dict_index_t* index,
722  ulint mode,
724  mtr_t* mtr)
725 {
726  ulint space;
727  ulint zip_size;
728  ulint root_page_no;
730 
731  space = dict_index_get_space(index);
732  zip_size = dict_table_zip_size(index->table);
733  root_page_no = dict_index_get_page(index);
734 
735  block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
736  btr_assert_not_corrupted(block, index);
737 #ifdef UNIV_BTR_DEBUG
738  if (!dict_index_is_ibuf(index)) {
739  const page_t* root = buf_block_get_frame(block);
740 
741  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
742  + root, space));
743  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
744  + root, space));
745  }
746 #endif /* UNIV_BTR_DEBUG */
747 
748  return(block);
749 }
750 
751 /**************************************************************/
754 UNIV_INTERN
755 page_t*
757 /*=========*/
758  const dict_index_t* index,
759  mtr_t* mtr)
760 {
761  return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH,
762  mtr)));
763 }
764 
765 /**************************************************************/
770 UNIV_INTERN
771 ulint
773 /*===========*/
774  dict_index_t* index,
775  mtr_t* mtr)
776 {
777  ulint height;
778  buf_block_t* root_block;
779 
780  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
781  MTR_MEMO_S_LOCK)
782  || mtr_memo_contains(mtr, dict_index_get_lock(index),
783  MTR_MEMO_X_LOCK));
784 
785  /* S latches the page */
786  root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
787 
788  height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
789 
790  /* Release the S latch on the root page. */
791  mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
792 #ifdef UNIV_SYNC_DEBUG
793  sync_thread_reset_level(&root_block->lock);
794 #endif /* UNIV_SYNC_DEBUG */
795 
796  return(height);
797 }
798 
799 /**************************************************************/
803 static
804 bool
805 btr_root_fseg_adjust_on_import(
806 /*===========================*/
807  fseg_header_t* seg_header,
808  page_zip_des_t* page_zip,
810  ulint space,
811  mtr_t* mtr)
812 {
813  ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
814 
815  if (offset < FIL_PAGE_DATA
816  || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
817 
818  return(FALSE);
819 
820  } else if (page_zip) {
821  mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
822  page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
823  4, mtr);
824  } else {
825  mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
826  space, MLOG_4BYTES, mtr);
827  }
828 
829  return(TRUE);
830 }
831 
832 /**************************************************************/
835 UNIV_INTERN
836 dberr_t
838 /*======================*/
839  const dict_index_t* index)
840 {
841  dberr_t err;
842  mtr_t mtr;
843  page_t* page;
845  page_zip_des_t* page_zip;
846  dict_table_t* table = index->table;
847  ulint space_id = dict_index_get_space(index);
848  ulint zip_size = dict_table_zip_size(table);
849  ulint root_page_no = dict_index_get_page(index);
850 
851  mtr_start(&mtr);
852 
853  mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
854 
855  DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
856  return(DB_CORRUPTION););
857 
858  block = btr_block_get(
859  space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr);
860 
861  page = buf_block_get_frame(block);
862  page_zip = buf_block_get_page_zip(block);
863 
864  /* Check that this is a B-tree page and both the PREV and NEXT
865  pointers are FIL_NULL, because the root page does not have any
866  siblings. */
867  if (fil_page_get_type(page) != FIL_PAGE_INDEX
868  || fil_page_get_prev(page) != FIL_NULL
869  || fil_page_get_next(page) != FIL_NULL) {
870 
871  err = DB_CORRUPTION;
872 
873  } else if (dict_index_is_clust(index)) {
874  bool page_is_compact_format;
875 
876  page_is_compact_format = page_is_comp(page) > 0;
877 
878  /* Check if the page format and table format agree. */
879  if (page_is_compact_format != dict_table_is_comp(table)) {
880  err = DB_CORRUPTION;
881  } else {
882 
883  /* Check that the table flags and the tablespace
884  flags match. */
885  ulint flags = fil_space_get_flags(table->space);
886 
887  if (flags
888  && flags != dict_tf_to_fsp_flags(table->flags)) {
889 
890  err = DB_CORRUPTION;
891  } else {
892  err = DB_SUCCESS;
893  }
894  }
895  } else {
896  err = DB_SUCCESS;
897  }
898 
899  /* Check and adjust the file segment headers, if all OK so far. */
900  if (err == DB_SUCCESS
901  && (!btr_root_fseg_adjust_on_import(
902  FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
903  + page, page_zip, space_id, &mtr)
904  || !btr_root_fseg_adjust_on_import(
905  FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
906  + page, page_zip, space_id, &mtr))) {
907 
908  err = DB_CORRUPTION;
909  }
910 
911  mtr_commit(&mtr);
912 
913  return(err);
914 }
915 
916 /*************************************************************/
920 UNIV_INTERN
921 rec_t*
923 /*==================*/
924  rec_t* rec,
925  mtr_t* mtr)
927 {
928  page_t* page;
929  page_t* prev_page;
930  ulint prev_page_no;
931 
932  if (!page_rec_is_infimum(rec)) {
933 
934  rec_t* prev_rec = page_rec_get_prev(rec);
935 
936  if (!page_rec_is_infimum(prev_rec)) {
937 
938  return(prev_rec);
939  }
940  }
941 
942  page = page_align(rec);
943  prev_page_no = btr_page_get_prev(page, mtr);
944 
945  if (prev_page_no != FIL_NULL) {
946 
947  ulint space;
948  ulint zip_size;
949  buf_block_t* prev_block;
950 
951  space = page_get_space_id(page);
952  zip_size = fil_space_get_zip_size(space);
953 
954  prev_block = buf_page_get_with_no_latch(space, zip_size,
955  prev_page_no, mtr);
956  prev_page = buf_block_get_frame(prev_block);
957  /* The caller must already have a latch to the brother */
958  ut_ad(mtr_memo_contains(mtr, prev_block,
959  MTR_MEMO_PAGE_S_FIX)
960  || mtr_memo_contains(mtr, prev_block,
961  MTR_MEMO_PAGE_X_FIX));
962 #ifdef UNIV_BTR_DEBUG
963  ut_a(page_is_comp(prev_page) == page_is_comp(page));
964  ut_a(btr_page_get_next(prev_page, mtr)
965  == page_get_page_no(page));
966 #endif /* UNIV_BTR_DEBUG */
967 
968  return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
969  }
970 
971  return(NULL);
972 }
973 
974 /*************************************************************/
978 UNIV_INTERN
979 rec_t*
981 /*==================*/
982  rec_t* rec,
983  mtr_t* mtr)
985 {
986  page_t* page;
987  page_t* next_page;
988  ulint next_page_no;
989 
990  if (!page_rec_is_supremum(rec)) {
991 
992  rec_t* next_rec = page_rec_get_next(rec);
993 
994  if (!page_rec_is_supremum(next_rec)) {
995 
996  return(next_rec);
997  }
998  }
999 
1000  page = page_align(rec);
1001  next_page_no = btr_page_get_next(page, mtr);
1002 
1003  if (next_page_no != FIL_NULL) {
1004  ulint space;
1005  ulint zip_size;
1006  buf_block_t* next_block;
1007 
1008  space = page_get_space_id(page);
1009  zip_size = fil_space_get_zip_size(space);
1010 
1011  next_block = buf_page_get_with_no_latch(space, zip_size,
1012  next_page_no, mtr);
1013  next_page = buf_block_get_frame(next_block);
1014  /* The caller must already have a latch to the brother */
1015  ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
1016  || mtr_memo_contains(mtr, next_block,
1017  MTR_MEMO_PAGE_X_FIX));
1018 #ifdef UNIV_BTR_DEBUG
1019  ut_a(page_is_comp(next_page) == page_is_comp(page));
1020  ut_a(btr_page_get_prev(next_page, mtr)
1021  == page_get_page_no(page));
1022 #endif /* UNIV_BTR_DEBUG */
1023 
1024  return(page_rec_get_next(page_get_infimum_rec(next_page)));
1025  }
1026 
1027  return(NULL);
1028 }
1029 
1030 /**************************************************************/
1033 static
1034 void
1035 btr_page_create(
1036 /*============*/
1037  buf_block_t* block,
1038  page_zip_des_t* page_zip,
1039  dict_index_t* index,
1040  ulint level,
1041  mtr_t* mtr)
1042 {
1043  page_t* page = buf_block_get_frame(block);
1044 
1045  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1046  btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
1047 
1048  if (page_zip) {
1049  page_create_zip(block, index, level, 0, mtr);
1050  } else {
1051  page_create(block, mtr, dict_table_is_comp(index->table));
1052  /* Set the level of the new index page */
1053  btr_page_set_level(page, NULL, level, mtr);
1054  }
1055 
1056  block->check_index_page_at_flush = TRUE;
1057 
1058  btr_page_set_index_id(page, page_zip, index->id, mtr);
1059 }
1060 
1061 /**************************************************************/
1065 static
1066 buf_block_t*
1067 btr_page_alloc_for_ibuf(
1068 /*====================*/
1069  dict_index_t* index,
1070  mtr_t* mtr)
1071 {
1072  fil_addr_t node_addr;
1073  page_t* root;
1074  page_t* new_page;
1075  buf_block_t* new_block;
1076 
1077  root = btr_root_get(index, mtr);
1078 
1079  node_addr = flst_get_first(root + PAGE_HEADER
1080  + PAGE_BTR_IBUF_FREE_LIST, mtr);
1081  ut_a(node_addr.page != FIL_NULL);
1082 
1083  new_block = buf_page_get(dict_index_get_space(index),
1084  dict_table_zip_size(index->table),
1085  node_addr.page, RW_X_LATCH, mtr);
1086  new_page = buf_block_get_frame(new_block);
1087  buf_block_dbg_add_level(new_block, SYNC_IBUF_TREE_NODE_NEW);
1088 
1089  flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1090  new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
1091  mtr);
1092  ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1093  mtr));
1094 
1095  return(new_block);
1096 }
1097 
1098 /**************************************************************/
1105 static __attribute__((nonnull, warn_unused_result))
1106 buf_block_t*
1107 btr_page_alloc_low(
1108 /*===============*/
1109  dict_index_t* index,
1111  byte file_direction,
1113  ulint level,
1115  mtr_t* mtr,
1117  mtr_t* init_mtr)
1123 {
1124  fseg_header_t* seg_header;
1125  page_t* root;
1126 
1127  root = btr_root_get(index, mtr);
1128 
1129  if (level == 0) {
1130  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
1131  } else {
1132  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1133  }
1134 
1135  /* Parameter TRUE below states that the caller has made the
1136  reservation for free extents, and thus we know that a page can
1137  be allocated: */
1138 
1140  seg_header, hint_page_no, file_direction,
1141  TRUE, mtr, init_mtr));
1142 }
1143 
1144 /**************************************************************/
1151 UNIV_INTERN
1152 buf_block_t*
1154 /*===========*/
1155  dict_index_t* index,
1156  ulint hint_page_no,
1157  byte file_direction,
1159  ulint level,
1161  mtr_t* mtr,
1163  mtr_t* init_mtr)
1166 {
1167  buf_block_t* new_block;
1168 
1169  if (dict_index_is_ibuf(index)) {
1170 
1171  return(btr_page_alloc_for_ibuf(index, mtr));
1172  }
1173 
1174  new_block = btr_page_alloc_low(
1175  index, hint_page_no, file_direction, level, mtr, init_mtr);
1176 
1177  if (new_block) {
1178  buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
1179  }
1180 
1181  return(new_block);
1182 }
1183 
1184 /**************************************************************/
1187 UNIV_INTERN
1188 ulint
1190 /*=========*/
1191  dict_index_t* index,
1192  ulint flag,
1193  mtr_t* mtr)
1195 {
1196  fseg_header_t* seg_header;
1197  page_t* root;
1198  ulint n;
1199  ulint dummy;
1200 
1201  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1202  MTR_MEMO_S_LOCK));
1203 
1204  if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
1205  || *index->name == TEMP_INDEX_PREFIX) {
1206  return(ULINT_UNDEFINED);
1207  }
1208 
1209  root = btr_root_get(index, mtr);
1210 
1211  if (flag == BTR_N_LEAF_PAGES) {
1212  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
1213 
1214  fseg_n_reserved_pages(seg_header, &n, mtr);
1215 
1216  } else if (flag == BTR_TOTAL_SIZE) {
1217  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1218 
1219  n = fseg_n_reserved_pages(seg_header, &dummy, mtr);
1220 
1221  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
1222 
1223  n += fseg_n_reserved_pages(seg_header, &dummy, mtr);
1224  } else {
1225  ut_error;
1226  }
1227 
1228  return(n);
1229 }
1230 
1231 /**************************************************************/
1234 static
1235 void
1236 btr_page_free_for_ibuf(
1237 /*===================*/
1238  dict_index_t* index,
1239  buf_block_t* block,
1240  mtr_t* mtr)
1241 {
1242  page_t* root;
1243 
1244  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1245  root = btr_root_get(index, mtr);
1246 
1247  flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1248  buf_block_get_frame(block)
1249  + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
1250 
1251  ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1252  mtr));
1253 }
1254 
1255 /**************************************************************/
1259 UNIV_INTERN
1260 void
1262 /*==============*/
1263  dict_index_t* index,
1264  buf_block_t* block,
1265  ulint level,
1266  mtr_t* mtr)
1267 {
1268  fseg_header_t* seg_header;
1269  page_t* root;
1270 
1271  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1272  /* The page gets invalid for optimistic searches: increment the frame
1273  modify clock */
1274 
1276  btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
1277 
1278  if (dict_index_is_ibuf(index)) {
1279 
1280  btr_page_free_for_ibuf(index, block, mtr);
1281 
1282  return;
1283  }
1284 
1285  root = btr_root_get(index, mtr);
1286 
1287  if (level == 0) {
1288  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
1289  } else {
1290  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1291  }
1292 
1293  fseg_free_page(seg_header,
1294  buf_block_get_space(block),
1295  buf_block_get_page_no(block), mtr);
1296 
1297  /* The page was marked free in the allocation bitmap, but it
1298  should remain buffer-fixed until mtr_commit(mtr) or until it
1299  is explicitly freed from the mini-transaction. */
1300  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1301  /* TODO: Discard any operations on the page from the redo log
1302  and remove the block from the flush list and the buffer pool.
1303  This would free up buffer pool earlier and reduce writes to
1304  both the tablespace and the redo log. */
1305 }
1306 
1307 /**************************************************************/
1310 UNIV_INTERN
1311 void
1313 /*==========*/
1314  dict_index_t* index,
1315  buf_block_t* block,
1316  mtr_t* mtr)
1317 {
1318  const page_t* page = buf_block_get_frame(block);
1319  ulint level = btr_page_get_level(page, mtr);
1320 
1322  btr_page_free_low(index, block, level, mtr);
1323 }
1324 
1325 /**************************************************************/
1327 UNIV_INLINE
1328 void
1330 /*===========================*/
1331  rec_t* rec,
1332  page_zip_des_t* page_zip,
1334  const ulint* offsets,
1335  ulint page_no,
1336  mtr_t* mtr)
1337 {
1338  byte* field;
1339  ulint len;
1340 
1341  ut_ad(rec_offs_validate(rec, NULL, offsets));
1342  ut_ad(!page_is_leaf(page_align(rec)));
1343  ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
1344 
1345  /* The child address is in the last field */
1346  field = rec_get_nth_field(rec, offsets,
1347  rec_offs_n_fields(offsets) - 1, &len);
1348 
1349  ut_ad(len == REC_NODE_PTR_SIZE);
1350 
1351  if (page_zip) {
1352  page_zip_write_node_ptr(page_zip, rec,
1353  rec_offs_data_size(offsets),
1354  page_no, mtr);
1355  } else {
1356  mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
1357  }
1358 }
1359 
1360 /************************************************************/
1363 static
1364 buf_block_t*
1365 btr_node_ptr_get_child(
1366 /*===================*/
1367  const rec_t* node_ptr,
1368  dict_index_t* index,
1369  const ulint* offsets,
1370  mtr_t* mtr)
1371 {
1372  ulint page_no;
1373  ulint space;
1374 
1375  ut_ad(rec_offs_validate(node_ptr, index, offsets));
1376  space = page_get_space_id(page_align(node_ptr));
1377  page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
1378 
1379  return(btr_block_get(space, dict_table_zip_size(index->table),
1380  page_no, RW_X_LATCH, index, mtr));
1381 }
1382 
1383 /************************************************************/
1387 static
1388 ulint*
1389 btr_page_get_father_node_ptr_func(
1390 /*==============================*/
1391  ulint* offsets,
1392  mem_heap_t* heap,
1393  btr_cur_t* cursor,
1396  const char* file,
1397  ulint line,
1398  mtr_t* mtr)
1399 {
1400  dtuple_t* tuple;
1401  rec_t* user_rec;
1402  rec_t* node_ptr;
1403  ulint level;
1404  ulint page_no;
1406 
1407  page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
1408  index = btr_cur_get_index(cursor);
1409 
1410  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1411  MTR_MEMO_X_LOCK));
1412 
1413  ut_ad(dict_index_get_page(index) != page_no);
1414 
1415  level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
1416 
1417  user_rec = btr_cur_get_rec(cursor);
1418  ut_a(page_rec_is_user_rec(user_rec));
1419  tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
1420 
1421  btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
1422  BTR_CONT_MODIFY_TREE, cursor, 0,
1423  file, line, mtr);
1424 
1425  node_ptr = btr_cur_get_rec(cursor);
1426  ut_ad(!page_rec_is_comp(node_ptr)
1427  || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
1428  offsets = rec_get_offsets(node_ptr, index, offsets,
1429  ULINT_UNDEFINED, &heap);
1430 
1431  if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
1432  rec_t* print_rec;
1433  fputs("InnoDB: Dump of the child page:\n", stderr);
1434  buf_page_print(page_align(user_rec), 0,
1436  fputs("InnoDB: Dump of the parent page:\n", stderr);
1437  buf_page_print(page_align(node_ptr), 0,
1439 
1440  fputs("InnoDB: Corruption of an index tree: table ", stderr);
1441  ut_print_name(stderr, NULL, TRUE, index->table_name);
1442  fputs(", index ", stderr);
1443  ut_print_name(stderr, NULL, FALSE, index->name);
1444  fprintf(stderr, ",\n"
1445  "InnoDB: father ptr page no %lu, child page no %lu\n",
1446  (ulong)
1447  btr_node_ptr_get_child_page_no(node_ptr, offsets),
1448  (ulong) page_no);
1449  print_rec = page_rec_get_next(
1450  page_get_infimum_rec(page_align(user_rec)));
1451  offsets = rec_get_offsets(print_rec, index,
1452  offsets, ULINT_UNDEFINED, &heap);
1453  page_rec_print(print_rec, offsets);
1454  offsets = rec_get_offsets(node_ptr, index, offsets,
1455  ULINT_UNDEFINED, &heap);
1456  page_rec_print(node_ptr, offsets);
1457 
1458  fputs("InnoDB: You should dump + drop + reimport the table"
1459  " to fix the\n"
1460  "InnoDB: corruption. If the crash happens at "
1461  "the database startup, see\n"
1462  "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n"
1463  "InnoDB: forcing recovery. "
1464  "Then dump + drop + reimport.\n", stderr);
1465 
1466  ut_error;
1467  }
1468 
1469  return(offsets);
1470 }
1471 
1472 #define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
1473  btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
1474 
1475 /************************************************************/
1479 static
1480 ulint*
1481 btr_page_get_father_block(
1482 /*======================*/
1483  ulint* offsets,
1484  mem_heap_t* heap,
1485  dict_index_t* index,
1486  buf_block_t* block,
1487  mtr_t* mtr,
1488  btr_cur_t* cursor)
1490 {
1491  rec_t* rec
1492  = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
1493  block)));
1494  btr_cur_position(index, rec, block, cursor);
1495  return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
1496 }
1497 
1498 /************************************************************/
1501 static
1502 void
1503 btr_page_get_father(
1504 /*================*/
1505  dict_index_t* index,
1506  buf_block_t* block,
1507  mtr_t* mtr,
1508  btr_cur_t* cursor)
1510 {
1511  mem_heap_t* heap;
1512  rec_t* rec
1513  = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
1514  block)));
1515  btr_cur_position(index, rec, block, cursor);
1516 
1517  heap = mem_heap_create(100);
1518  btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
1519  mem_heap_free(heap);
1520 }
1521 
1522 /************************************************************/
1525 UNIV_INTERN
1526 ulint
1528 /*=======*/
1529  ulint type,
1530  ulint space,
1531  ulint zip_size,
1533  index_id_t index_id,
1534  dict_index_t* index,
1535  mtr_t* mtr)
1536 {
1537  ulint page_no;
1538  buf_block_t* block;
1539  buf_frame_t* frame;
1540  page_t* page;
1541  page_zip_des_t* page_zip;
1542 
1543  /* Create the two new segments (one, in the case of an ibuf tree) for
1544  the index tree; the segment headers are put on the allocated root page
1545  (for an ibuf tree, not in the root, but on a separate ibuf header
1546  page) */
1547 
1548  if (type & DICT_IBUF) {
1549  /* Allocate first the ibuf header page */
1550  buf_block_t* ibuf_hdr_block = fseg_create(
1551  space, 0,
1552  IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
1553 
1554  buf_block_dbg_add_level(
1555  ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW);
1556 
1557  ut_ad(buf_block_get_page_no(ibuf_hdr_block)
1558  == IBUF_HEADER_PAGE_NO);
1559  /* Allocate then the next page to the segment: it will be the
1560  tree root page */
1561 
1562  block = fseg_alloc_free_page(
1563  buf_block_get_frame(ibuf_hdr_block)
1564  + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
1565  IBUF_TREE_ROOT_PAGE_NO,
1566  FSP_UP, mtr);
1567  ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO);
1568  } else {
1569 #ifdef UNIV_BLOB_DEBUG
1570  if ((type & DICT_CLUSTERED) && !index->blobs) {
1571  mutex_create(PFS_NOT_INSTRUMENTED,
1572  &index->blobs_mutex, SYNC_ANY_LATCH);
1573  index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
1574  btr_blob_dbg_cmp);
1575  }
1576 #endif /* UNIV_BLOB_DEBUG */
1577  block = fseg_create(space, 0,
1578  PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
1579  }
1580 
1581  if (block == NULL) {
1582 
1583  return(FIL_NULL);
1584  }
1585 
1586  page_no = buf_block_get_page_no(block);
1587  frame = buf_block_get_frame(block);
1588 
1589  if (type & DICT_IBUF) {
1590  /* It is an insert buffer tree: initialize the free list */
1591  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
1592 
1593  ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
1594 
1595  flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
1596  } else {
1597  /* It is a non-ibuf tree: create a file segment for leaf
1598  pages */
1599  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1600 
1601  if (!fseg_create(space, page_no,
1602  PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
1603  /* Not enough space for new segment, free root
1604  segment before return. */
1605  btr_free_root(space, zip_size, page_no, mtr);
1606 
1607  return(FIL_NULL);
1608  }
1609 
1610  /* The fseg create acquires a second latch on the page,
1611  therefore we must declare it: */
1612  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1613  }
1614 
1615  /* Create a new index page on the allocated segment page */
1616  page_zip = buf_block_get_page_zip(block);
1617 
1618  if (page_zip) {
1619  page = page_create_zip(block, index, 0, 0, mtr);
1620  } else {
1621  page = page_create(block, mtr,
1622  dict_table_is_comp(index->table));
1623  /* Set the level of the new index page */
1624  btr_page_set_level(page, NULL, 0, mtr);
1625  }
1626 
1627  block->check_index_page_at_flush = TRUE;
1628 
1629  /* Set the index id of the page */
1630  btr_page_set_index_id(page, page_zip, index_id, mtr);
1631 
1632  /* Set the next node and previous node fields */
1633  btr_page_set_next(page, page_zip, FIL_NULL, mtr);
1634  btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
1635 
1636  /* We reset the free bits for the page to allow creation of several
1637  trees in the same mtr, otherwise the latch on a bitmap page would
1638  prevent it because of the latching order */
1639 
1640  if (!(type & DICT_CLUSTERED)) {
1641  ibuf_reset_free_bits(block);
1642  }
1643 
1644  /* In the following assertion we test that two records of maximum
1645  allowed size fit on the root page: this fact is needed to ensure
1646  correctness of split algorithms */
1647 
1649 
1650  return(page_no);
1651 }
1652 
1653 /************************************************************/
1656 UNIV_INTERN
1657 void
1659 /*==================*/
1660  ulint space,
1661  ulint zip_size,
1663  ulint root_page_no)
1664 {
1665  ibool finished;
1666  page_t* root;
1667  mtr_t mtr;
1668 
1669 leaf_loop:
1670  mtr_start(&mtr);
1671 
1672  root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
1673  NULL, &mtr);
1674 #ifdef UNIV_BTR_DEBUG
1675  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
1676  + root, space));
1677  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1678  + root, space));
1679 #endif /* UNIV_BTR_DEBUG */
1680 
1681  /* NOTE: page hash indexes are dropped when a page is freed inside
1682  fsp0fsp. */
1683 
1684  finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
1685  &mtr);
1686  mtr_commit(&mtr);
1687 
1688  if (!finished) {
1689 
1690  goto leaf_loop;
1691  }
1692 top_loop:
1693  mtr_start(&mtr);
1694 
1695  root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
1696  NULL, &mtr);
1697 #ifdef UNIV_BTR_DEBUG
1698  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1699  + root, space));
1700 #endif /* UNIV_BTR_DEBUG */
1701 
1702  finished = fseg_free_step_not_header(
1703  root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
1704  mtr_commit(&mtr);
1705 
1706  if (!finished) {
1707 
1708  goto top_loop;
1709  }
1710 }
1711 
1712 /************************************************************/
1714 UNIV_INTERN
1715 void
1717 /*==========*/
1718  ulint space,
1719  ulint zip_size,
1721  ulint root_page_no,
1722  mtr_t* mtr)
1723 {
1724  buf_block_t* block;
1725  fseg_header_t* header;
1726 
1727  block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
1728  NULL, mtr);
1729 
1731 
1732  header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1733 #ifdef UNIV_BTR_DEBUG
1734  ut_a(btr_root_fseg_validate(header, space));
1735 #endif /* UNIV_BTR_DEBUG */
1736 
1737  while (!fseg_free_step(header, mtr)) {
1738  /* Free the entire segment in small steps. */
1739  }
1740 }
1741 #endif /* !UNIV_HOTBACKUP */
1742 
1743 /*************************************************************/
1754 UNIV_INTERN
1755 bool
1757 /*====================*/
1758  bool recovery,
1763  ulint z_level,
1765  page_cur_t* cursor,
1766  dict_index_t* index,
1767  mtr_t* mtr)
1768 {
1769  buf_block_t* block = page_cur_get_block(cursor);
1770 #ifndef UNIV_HOTBACKUP
1771  buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
1772 #endif /* !UNIV_HOTBACKUP */
1773  page_t* page = buf_block_get_frame(block);
1774  page_zip_des_t* page_zip = buf_block_get_page_zip(block);
1775  buf_block_t* temp_block;
1776  page_t* temp_page;
1777  ulint log_mode;
1778  ulint data_size1;
1779  ulint data_size2;
1780  ulint max_ins_size1;
1781  ulint max_ins_size2;
1782  bool success = false;
1783  ulint pos;
1784  bool log_compressed;
1785 
1786  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1787  btr_assert_not_corrupted(block, index);
1788 #ifdef UNIV_ZIP_DEBUG
1789  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
1790 #endif /* UNIV_ZIP_DEBUG */
1791  data_size1 = page_get_data_size(page);
1792  max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
1793 
1794  /* Turn logging off */
1795  log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
1796 
1797 #ifndef UNIV_HOTBACKUP
1798  temp_block = buf_block_alloc(buf_pool);
1799 #else /* !UNIV_HOTBACKUP */
1800  ut_ad(block == back_block1);
1801  temp_block = back_block2;
1802 #endif /* !UNIV_HOTBACKUP */
1803  temp_page = temp_block->frame;
1804 
1805  /* Copy the old page to temporary space */
1806  buf_frame_copy(temp_page, page);
1807 
1808 #ifndef UNIV_HOTBACKUP
1809  if (!recovery) {
1811  }
1812 
1813  block->check_index_page_at_flush = TRUE;
1814 #endif /* !UNIV_HOTBACKUP */
1815  btr_blob_dbg_remove(page, index, "btr_page_reorganize");
1816 
1817  /* Save the cursor position. */
1818  pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor));
1819 
1820  /* Recreate the page: note that global data on page (possible
1821  segment headers, next page-field, etc.) is preserved intact */
1822 
1823  page_create(block, mtr, dict_table_is_comp(index->table));
1824 
1825  /* Copy the records from the temporary space to the recreated page;
1826  do not copy the lock bits yet */
1827 
1828  page_copy_rec_list_end_no_locks(block, temp_block,
1829  page_get_infimum_rec(temp_page),
1830  index, mtr);
1831 
1832  if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
1833  /* Copy max trx id to recreated page */
1834  trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
1835  page_set_max_trx_id(block, NULL, max_trx_id, mtr);
1836  /* In crash recovery, dict_index_is_sec_or_ibuf() always
1837  holds, even for clustered indexes. max_trx_id is
1838  unused in clustered index pages. */
1839  ut_ad(max_trx_id != 0 || recovery);
1840  }
1841 
1842  /* If innodb_log_compressed_pages is ON, page reorganize should log the
1843  compressed page image.*/
1844  log_compressed = page_zip && page_zip_log_pages;
1845 
1846  if (log_compressed) {
1847  mtr_set_log_mode(mtr, log_mode);
1848  }
1849 
1850  if (page_zip
1851  && !page_zip_compress(page_zip, page, index, z_level, mtr)) {
1852 
1853  /* Restore the old page and exit. */
1854  btr_blob_dbg_restore(page, temp_page, index,
1855  "btr_page_reorganize_compress_fail");
1856 
1857 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
1858  /* Check that the bytes that we skip are identical. */
1859  ut_a(!memcmp(page, temp_page, PAGE_HEADER));
1860  ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
1861  PAGE_HEADER + PAGE_N_RECS + temp_page,
1862  PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
1863  ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
1864  UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
1866 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
1867 
1868  memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
1869  PAGE_N_RECS - PAGE_N_DIR_SLOTS);
1870  memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
1871  UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
1872 
1873 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
1874  ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
1875 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
1876 
1877  goto func_exit;
1878  }
1879 
1880 #ifndef UNIV_HOTBACKUP
1881  if (!recovery) {
1882  /* Update the record lock bitmaps */
1883  lock_move_reorganize_page(block, temp_block);
1884  }
1885 #endif /* !UNIV_HOTBACKUP */
1886 
1887  data_size2 = page_get_data_size(page);
1888  max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
1889 
1890  if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
1892  buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
1893 
1894  fprintf(stderr,
1895  "InnoDB: Error: page old data size %lu"
1896  " new data size %lu\n"
1897  "InnoDB: Error: page old max ins size %lu"
1898  " new max ins size %lu\n"
1899  "InnoDB: Submit a detailed bug report"
1900  " to http://bugs.mysql.com\n",
1901  (unsigned long) data_size1, (unsigned long) data_size2,
1902  (unsigned long) max_ins_size1,
1903  (unsigned long) max_ins_size2);
1904  ut_ad(0);
1905  } else {
1906  success = true;
1907  }
1908 
1909  /* Restore the cursor position. */
1910  if (pos > 0) {
1911  cursor->rec = page_rec_get_nth(page, pos);
1912  } else {
1913  ut_ad(cursor->rec == page_get_infimum_rec(page));
1914  }
1915 
1916 func_exit:
1917 #ifdef UNIV_ZIP_DEBUG
1918  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
1919 #endif /* UNIV_ZIP_DEBUG */
1920 #ifndef UNIV_HOTBACKUP
1921  buf_block_free(temp_block);
1922 #endif /* !UNIV_HOTBACKUP */
1923 
1924  /* Restore logging mode */
1925  mtr_set_log_mode(mtr, log_mode);
1926 
1927 #ifndef UNIV_HOTBACKUP
1928  if (success) {
1929  byte type;
1930  byte* log_ptr;
1931 
1932  /* Write the log record */
1933  if (page_zip) {
1934  ut_ad(page_is_comp(page));
1935  type = MLOG_ZIP_PAGE_REORGANIZE;
1936  } else if (page_is_comp(page)) {
1938  } else {
1939  type = MLOG_PAGE_REORGANIZE;
1940  }
1941 
1942  log_ptr = log_compressed
1943  ? NULL
1945  mtr, page, index, type,
1946  page_zip ? 1 : 0);
1947 
1948  /* For compressed pages write the compression level. */
1949  if (log_ptr && page_zip) {
1950  mach_write_to_1(log_ptr, z_level);
1951  mlog_close(mtr, log_ptr + 1);
1952  }
1953  }
1954 #endif /* !UNIV_HOTBACKUP */
1955 
1956  return(success);
1957 }
1958 
1959 /*************************************************************/
1970 static __attribute__((nonnull))
1971 bool
1972 btr_page_reorganize_block(
1973 /*======================*/
1974  bool recovery,
1979  ulint z_level,
1981  buf_block_t* block,
1982  dict_index_t* index,
1983  mtr_t* mtr)
1984 {
1985  page_cur_t cur;
1986  page_cur_set_before_first(block, &cur);
1987 
1988  return(btr_page_reorganize_low(recovery, z_level, &cur, index, mtr));
1989 }
1990 
1991 #ifndef UNIV_HOTBACKUP
1992 /*************************************************************/
2003 UNIV_INTERN
2004 bool
2006 /*================*/
2007  page_cur_t* cursor,
2008  dict_index_t* index,
2009  mtr_t* mtr)
2010 {
2011  return(btr_page_reorganize_low(false, page_zip_level,
2012  cursor, index, mtr));
2013 }
2014 #endif /* !UNIV_HOTBACKUP */
2015 
2016 /***********************************************************/
2019 UNIV_INTERN
2020 byte*
2022 /*======================*/
2023  byte* ptr,
2024  byte* end_ptr,
2025  dict_index_t* index,
2026  bool compressed,
2027  buf_block_t* block,
2028  mtr_t* mtr)
2029 {
2030  ulint level;
2031 
2032  ut_ad(ptr && end_ptr);
2033 
2034  /* If dealing with a compressed page the record has the
2035  compression level used during original compression written in
2036  one byte. Otherwise record is empty. */
2037  if (compressed) {
2038  if (ptr == end_ptr) {
2039  return(NULL);
2040  }
2041 
2042  level = mach_read_from_1(ptr);
2043 
2044  ut_a(level <= 9);
2045  ++ptr;
2046  } else {
2047  level = page_zip_level;
2048  }
2049 
2050  if (block != NULL) {
2051  btr_page_reorganize_block(true, level, block, index, mtr);
2052  }
2053 
2054  return(ptr);
2055 }
2056 
2057 #ifndef UNIV_HOTBACKUP
2058 /*************************************************************/
2060 static
2061 void
2062 btr_page_empty(
2063 /*===========*/
2064  buf_block_t* block,
2065  page_zip_des_t* page_zip,
2066  dict_index_t* index,
2067  ulint level,
2068  mtr_t* mtr)
2069 {
2070  page_t* page = buf_block_get_frame(block);
2071 
2072  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2073  ut_ad(page_zip == buf_block_get_page_zip(block));
2074 #ifdef UNIV_ZIP_DEBUG
2075  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
2076 #endif /* UNIV_ZIP_DEBUG */
2077 
2079  btr_blob_dbg_remove(page, index, "btr_page_empty");
2080 
2081  /* Recreate the page: note that global data on page (possible
2082  segment headers, next page-field, etc.) is preserved intact */
2083 
2084  if (page_zip) {
2085  page_create_zip(block, index, level, 0, mtr);
2086  } else {
2087  page_create(block, mtr, dict_table_is_comp(index->table));
2088  btr_page_set_level(page, NULL, level, mtr);
2089  }
2090 
2091  block->check_index_page_at_flush = TRUE;
2092 }
2093 
2094 /*************************************************************/
2101 UNIV_INTERN
2102 rec_t*
2104 /*======================*/
2105  ulint flags,
2106  btr_cur_t* cursor,
2110  ulint** offsets,
2111  mem_heap_t** heap,
2112  const dtuple_t* tuple,
2113  ulint n_ext,
2114  mtr_t* mtr)
2115 {
2117  page_t* root;
2118  page_t* new_page;
2119  ulint new_page_no;
2120  rec_t* rec;
2121  dtuple_t* node_ptr;
2122  ulint level;
2123  rec_t* node_ptr_rec;
2124  page_cur_t* page_cursor;
2125  page_zip_des_t* root_page_zip;
2126  page_zip_des_t* new_page_zip;
2127  buf_block_t* root_block;
2128  buf_block_t* new_block;
2129 
2130  root = btr_cur_get_page(cursor);
2131  root_block = btr_cur_get_block(cursor);
2132  root_page_zip = buf_block_get_page_zip(root_block);
2133  ut_ad(!page_is_empty(root));
2134  index = btr_cur_get_index(cursor);
2135 #ifdef UNIV_ZIP_DEBUG
2136  ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index));
2137 #endif /* UNIV_ZIP_DEBUG */
2138 #ifdef UNIV_BTR_DEBUG
2139  if (!dict_index_is_ibuf(index)) {
2140  ulint space = dict_index_get_space(index);
2141 
2142  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
2143  + root, space));
2144  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
2145  + root, space));
2146  }
2147 
2148  ut_a(dict_index_get_page(index) == page_get_page_no(root));
2149 #endif /* UNIV_BTR_DEBUG */
2150  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2151  MTR_MEMO_X_LOCK));
2152  ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
2153 
2154  /* Allocate a new page to the tree. Root splitting is done by first
2155  moving the root records to the new page, emptying the root, putting
2156  a node pointer to the new page, and then splitting the new page. */
2157 
2158  level = btr_page_get_level(root, mtr);
2159 
2160  new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr);
2161  new_page = buf_block_get_frame(new_block);
2162  new_page_zip = buf_block_get_page_zip(new_block);
2163  ut_a(!new_page_zip == !root_page_zip);
2164  ut_a(!new_page_zip
2165  || page_zip_get_size(new_page_zip)
2166  == page_zip_get_size(root_page_zip));
2167 
2168  btr_page_create(new_block, new_page_zip, index, level, mtr);
2169 
2170  /* Set the next node and previous node fields of new page */
2171  btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
2172  btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
2173 
2174  /* Copy the records from root to the new page one by one. */
2175 
2176  if (0
2177 #ifdef UNIV_ZIP_COPY
2178  || new_page_zip
2179 #endif /* UNIV_ZIP_COPY */
2180  || !page_copy_rec_list_end(new_block, root_block,
2181  page_get_infimum_rec(root),
2182  index, mtr)) {
2183  ut_a(new_page_zip);
2184 
2185  /* Copy the page byte for byte. */
2186  page_zip_copy_recs(new_page_zip, new_page,
2187  root_page_zip, root, index, mtr);
2188 
2189  /* Update the lock table and possible hash index. */
2190 
2191  lock_move_rec_list_end(new_block, root_block,
2192  page_get_infimum_rec(root));
2193 
2194  btr_search_move_or_delete_hash_entries(new_block, root_block,
2195  index);
2196  }
2197 
2198  /* If this is a pessimistic insert which is actually done to
2199  perform a pessimistic update then we have stored the lock
2200  information of the record to be inserted on the infimum of the
2201  root page: we cannot discard the lock structs on the root page */
2202 
2203  lock_update_root_raise(new_block, root_block);
2204 
2205  /* Create a memory heap where the node pointer is stored */
2206  if (!*heap) {
2207  *heap = mem_heap_create(1000);
2208  }
2209 
2210  rec = page_rec_get_next(page_get_infimum_rec(new_page));
2211  new_page_no = buf_block_get_page_no(new_block);
2212 
2213  /* Build the node pointer (= node key and page address) for the
2214  child */
2215 
2216  node_ptr = dict_index_build_node_ptr(
2217  index, rec, new_page_no, *heap, level);
2218  /* The node pointer must be marked as the predefined minimum record,
2219  as there is no lower alphabetical limit to records in the leftmost
2220  node of a level: */
2221  dtuple_set_info_bits(node_ptr,
2222  dtuple_get_info_bits(node_ptr)
2223  | REC_INFO_MIN_REC_FLAG);
2224 
2225  /* Rebuild the root page to get free space */
2226  btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
2227 
2228  /* Set the next node and previous node fields, although
2229  they should already have been set. The previous node field
2230  must be FIL_NULL if root_page_zip != NULL, because the
2231  REC_INFO_MIN_REC_FLAG (of the first user record) will be
2232  set if and only if btr_page_get_prev() == FIL_NULL. */
2233  btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
2234  btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
2235 
2236  page_cursor = btr_cur_get_page_cur(cursor);
2237 
2238  /* Insert node pointer to the root */
2239 
2240  page_cur_set_before_first(root_block, page_cursor);
2241 
2242  node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
2243  index, offsets, heap, 0, mtr);
2244 
2245  /* The root page should only contain the node pointer
2246  to new_page at this point. Thus, the data should fit. */
2247  ut_a(node_ptr_rec);
2248 
2249  /* We play safe and reset the free bits for the new page */
2250 
2251 #if 0
2252  fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
2253 #endif
2254 
2255  if (!dict_index_is_clust(index)) {
2256  ibuf_reset_free_bits(new_block);
2257  }
2258 
2259  /* Reposition the cursor to the child node */
2260  page_cur_search(new_block, index, tuple,
2261  PAGE_CUR_LE, page_cursor);
2262 
2263  /* Split the child and insert tuple */
2264  return(btr_page_split_and_insert(flags, cursor, offsets, heap,
2265  tuple, n_ext, mtr));
2266 }
2267 
2268 /*************************************************************/
2272 UNIV_INTERN
2273 ibool
2275 /*===========================*/
2276  btr_cur_t* cursor,
2277  rec_t** split_rec)
2281 {
2282  page_t* page;
2283  rec_t* insert_point;
2284  rec_t* infimum;
2285 
2286  page = btr_cur_get_page(cursor);
2287  insert_point = btr_cur_get_rec(cursor);
2288 
2289  if (page_header_get_ptr(page, PAGE_LAST_INSERT)
2290  == page_rec_get_next(insert_point)) {
2291 
2292  infimum = page_get_infimum_rec(page);
2293 
2294  /* If the convergence is in the middle of a page, include also
2295  the record immediately before the new insert to the upper
2296  page. Otherwise, we could repeatedly move from page to page
2297  lots of records smaller than the convergence point. */
2298 
2299  if (infimum != insert_point
2300  && page_rec_get_next(infimum) != insert_point) {
2301 
2302  *split_rec = insert_point;
2303  } else {
2304  *split_rec = page_rec_get_next(insert_point);
2305  }
2306 
2307  return(TRUE);
2308  }
2309 
2310  return(FALSE);
2311 }
2312 
2313 /*************************************************************/
2317 UNIV_INTERN
2318 ibool
2320 /*============================*/
2321  btr_cur_t* cursor,
2322  rec_t** split_rec)
2326 {
2327  page_t* page;
2328  rec_t* insert_point;
2329 
2330  page = btr_cur_get_page(cursor);
2331  insert_point = btr_cur_get_rec(cursor);
2332 
2333  /* We use eager heuristics: if the new insert would be right after
2334  the previous insert on the same page, we assume that there is a
2335  pattern of sequential inserts here. */
2336 
2337  if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {
2338 
2339  rec_t* next_rec;
2340 
2341  next_rec = page_rec_get_next(insert_point);
2342 
2343  if (page_rec_is_supremum(next_rec)) {
2344 split_at_new:
2345  /* Split at the new record to insert */
2346  *split_rec = NULL;
2347  } else {
2348  rec_t* next_next_rec = page_rec_get_next(next_rec);
2349  if (page_rec_is_supremum(next_next_rec)) {
2350 
2351  goto split_at_new;
2352  }
2353 
2354  /* If there are >= 2 user records up from the insert
2355  point, split all but 1 off. We want to keep one because
2356  then sequential inserts can use the adaptive hash
2357  index, as they can do the necessary checks of the right
2358  search position just by looking at the records on this
2359  page. */
2360 
2361  *split_rec = next_next_rec;
2362  }
2363 
2364  return(TRUE);
2365  }
2366 
2367  return(FALSE);
2368 }
2369 
2370 /*************************************************************/
2376 static
2377 rec_t*
2378 btr_page_get_split_rec(
2379 /*===================*/
2380  btr_cur_t* cursor,
2381  const dtuple_t* tuple,
2382  ulint n_ext)
2383 {
2384  page_t* page;
2385  page_zip_des_t* page_zip;
2386  ulint insert_size;
2387  ulint free_space;
2388  ulint total_data;
2389  ulint total_n_recs;
2390  ulint total_space;
2391  ulint incl_data;
2392  rec_t* ins_rec;
2393  rec_t* rec;
2394  rec_t* next_rec;
2395  ulint n;
2396  mem_heap_t* heap;
2397  ulint* offsets;
2398 
2399  page = btr_cur_get_page(cursor);
2400 
2401  insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
2402  free_space = page_get_free_space_of_empty(page_is_comp(page));
2403 
2404  page_zip = btr_cur_get_page_zip(cursor);
2405  if (page_zip) {
2406  /* Estimate the free space of an empty compressed page. */
2407  ulint free_space_zip = page_zip_empty_size(
2408  cursor->index->n_fields,
2409  page_zip_get_size(page_zip));
2410 
2411  if (free_space > (ulint) free_space_zip) {
2412  free_space = (ulint) free_space_zip;
2413  }
2414  }
2415 
2416  /* free_space is now the free space of a created new page */
2417 
2418  total_data = page_get_data_size(page) + insert_size;
2419  total_n_recs = page_get_n_recs(page) + 1;
2420  ut_ad(total_n_recs >= 2);
2421  total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
2422 
2423  n = 0;
2424  incl_data = 0;
2425  ins_rec = btr_cur_get_rec(cursor);
2426  rec = page_get_infimum_rec(page);
2427 
2428  heap = NULL;
2429  offsets = NULL;
2430 
2431  /* We start to include records to the left half, and when the
2432  space reserved by them exceeds half of total_space, then if
2433  the included records fit on the left page, they will be put there
2434  if something was left over also for the right page,
2435  otherwise the last included record will be the first on the right
2436  half page */
2437 
2438  do {
2439  /* Decide the next record to include */
2440  if (rec == ins_rec) {
2441  rec = NULL; /* NULL denotes that tuple is
2442  now included */
2443  } else if (rec == NULL) {
2444  rec = page_rec_get_next(ins_rec);
2445  } else {
2446  rec = page_rec_get_next(rec);
2447  }
2448 
2449  if (rec == NULL) {
2450  /* Include tuple */
2451  incl_data += insert_size;
2452  } else {
2453  offsets = rec_get_offsets(rec, cursor->index,
2454  offsets, ULINT_UNDEFINED,
2455  &heap);
2456  incl_data += rec_offs_size(offsets);
2457  }
2458 
2459  n++;
2460  } while (incl_data + page_dir_calc_reserved_space(n)
2461  < total_space / 2);
2462 
2463  if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
2464  /* The next record will be the first on
2465  the right half page if it is not the
2466  supremum record of page */
2467 
2468  if (rec == ins_rec) {
2469  rec = NULL;
2470 
2471  goto func_exit;
2472  } else if (rec == NULL) {
2473  next_rec = page_rec_get_next(ins_rec);
2474  } else {
2475  next_rec = page_rec_get_next(rec);
2476  }
2477  ut_ad(next_rec);
2478  if (!page_rec_is_supremum(next_rec)) {
2479  rec = next_rec;
2480  }
2481  }
2482 
2483 func_exit:
2484  if (heap) {
2485  mem_heap_free(heap);
2486  }
2487  return(rec);
2488 }
2489 
2490 /*************************************************************/
2494 static __attribute__((nonnull(1,3,4,6), warn_unused_result))
2495 bool
2496 btr_page_insert_fits(
2497 /*=================*/
2498  btr_cur_t* cursor,
2500  const rec_t* split_rec,
2503  ulint** offsets,
2505  const dtuple_t* tuple,
2506  ulint n_ext,
2507  mem_heap_t** heap)
2508 {
2509  page_t* page;
2510  ulint insert_size;
2511  ulint free_space;
2512  ulint total_data;
2513  ulint total_n_recs;
2514  const rec_t* rec;
2515  const rec_t* end_rec;
2516 
2517  page = btr_cur_get_page(cursor);
2518 
2519  ut_ad(!split_rec
2520  || !page_is_comp(page) == !rec_offs_comp(*offsets));
2521  ut_ad(!split_rec
2522  || rec_offs_validate(split_rec, cursor->index, *offsets));
2523 
2524  insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
2525  free_space = page_get_free_space_of_empty(page_is_comp(page));
2526 
2527  /* free_space is now the free space of a created new page */
2528 
2529  total_data = page_get_data_size(page) + insert_size;
2530  total_n_recs = page_get_n_recs(page) + 1;
2531 
2532  /* We determine which records (from rec to end_rec, not including
2533  end_rec) will end up on the other half page from tuple when it is
2534  inserted. */
2535 
2536  if (split_rec == NULL) {
2537  rec = page_rec_get_next(page_get_infimum_rec(page));
2538  end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
2539 
2540  } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) {
2541 
2542  rec = page_rec_get_next(page_get_infimum_rec(page));
2543  end_rec = split_rec;
2544  } else {
2545  rec = split_rec;
2546  end_rec = page_get_supremum_rec(page);
2547  }
2548 
2549  if (total_data + page_dir_calc_reserved_space(total_n_recs)
2550  <= free_space) {
2551 
2552  /* Ok, there will be enough available space on the
2553  half page where the tuple is inserted */
2554 
2555  return(true);
2556  }
2557 
2558  while (rec != end_rec) {
2559  /* In this loop we calculate the amount of reserved
2560  space after rec is removed from page. */
2561 
2562  *offsets = rec_get_offsets(rec, cursor->index, *offsets,
2563  ULINT_UNDEFINED, heap);
2564 
2565  total_data -= rec_offs_size(*offsets);
2566  total_n_recs--;
2567 
2568  if (total_data + page_dir_calc_reserved_space(total_n_recs)
2569  <= free_space) {
2570 
2571  /* Ok, there will be enough available space on the
2572  half page where the tuple is inserted */
2573 
2574  return(true);
2575  }
2576 
2577  rec = page_rec_get_next_const(rec);
2578  }
2579 
2580  return(false);
2581 }
2582 
2583 /*******************************************************/
2586 UNIV_INTERN
2587 void
2589 /*==============================*/
2590  ulint flags,
2591  dict_index_t* index,
2592  ulint level,
2593  dtuple_t* tuple,
2594  const char* file,
2595  ulint line,
2596  mtr_t* mtr)
2597 {
2598  big_rec_t* dummy_big_rec;
2599  btr_cur_t cursor;
2600  dberr_t err;
2601  rec_t* rec;
2602  ulint* offsets = NULL;
2603  mem_heap_t* heap = NULL;
2604 
2605  ut_ad(level > 0);
2606 
2607  btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
2609  &cursor, 0, file, line, mtr);
2610 
2611  ut_ad(cursor.flag == BTR_CUR_BINARY);
2612 
2614  flags
2618  &cursor, &offsets, &heap,
2619  tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
2620 
2621  if (err == DB_FAIL) {
2622  err = btr_cur_pessimistic_insert(flags
2626  &cursor, &offsets, &heap,
2627  tuple, &rec,
2628  &dummy_big_rec, 0, NULL, mtr);
2629  ut_a(err == DB_SUCCESS);
2630  }
2631  mem_heap_free(heap);
2632 }
2633 
2634 /**************************************************************/
2637 static __attribute__((nonnull))
2638 void
2639 btr_attach_half_pages(
2640 /*==================*/
2641  ulint flags,
2643  dict_index_t* index,
2644  buf_block_t* block,
2645  const rec_t* split_rec,
2647  buf_block_t* new_block,
2648  ulint direction,
2649  mtr_t* mtr)
2650 {
2651  ulint space;
2652  ulint zip_size;
2653  ulint prev_page_no;
2654  ulint next_page_no;
2655  ulint level;
2656  page_t* page = buf_block_get_frame(block);
2657  page_t* lower_page;
2658  page_t* upper_page;
2659  ulint lower_page_no;
2660  ulint upper_page_no;
2661  page_zip_des_t* lower_page_zip;
2662  page_zip_des_t* upper_page_zip;
2663  dtuple_t* node_ptr_upper;
2664  mem_heap_t* heap;
2665 
2666  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2667  ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
2668 
2669  /* Create a memory heap where the data tuple is stored */
2670  heap = mem_heap_create(1024);
2671 
2672  /* Based on split direction, decide upper and lower pages */
2673  if (direction == FSP_DOWN) {
2674 
2675  btr_cur_t cursor;
2676  ulint* offsets;
2677 
2678  lower_page = buf_block_get_frame(new_block);
2679  lower_page_no = buf_block_get_page_no(new_block);
2680  lower_page_zip = buf_block_get_page_zip(new_block);
2681  upper_page = buf_block_get_frame(block);
2682  upper_page_no = buf_block_get_page_no(block);
2683  upper_page_zip = buf_block_get_page_zip(block);
2684 
2685  /* Look up the index for the node pointer to page */
2686  offsets = btr_page_get_father_block(NULL, heap, index,
2687  block, mtr, &cursor);
2688 
2689  /* Replace the address of the old child node (= page) with the
2690  address of the new lower half */
2691 
2693  btr_cur_get_rec(&cursor),
2694  btr_cur_get_page_zip(&cursor),
2695  offsets, lower_page_no, mtr);
2696  mem_heap_empty(heap);
2697  } else {
2698  lower_page = buf_block_get_frame(block);
2699  lower_page_no = buf_block_get_page_no(block);
2700  lower_page_zip = buf_block_get_page_zip(block);
2701  upper_page = buf_block_get_frame(new_block);
2702  upper_page_no = buf_block_get_page_no(new_block);
2703  upper_page_zip = buf_block_get_page_zip(new_block);
2704  }
2705 
2706  /* Get the level of the split pages */
2707  level = btr_page_get_level(buf_block_get_frame(block), mtr);
2708  ut_ad(level
2709  == btr_page_get_level(buf_block_get_frame(new_block), mtr));
2710 
2711  /* Build the node pointer (= node key and page address) for the upper
2712  half */
2713 
2714  node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
2715  upper_page_no, heap, level);
2716 
2717  /* Insert it next to the pointer to the lower half. Note that this
2718  may generate recursion leading to a split on the higher level. */
2719 
2720  btr_insert_on_non_leaf_level(flags, index, level + 1,
2721  node_ptr_upper, mtr);
2722 
2723  /* Free the memory heap */
2724  mem_heap_free(heap);
2725 
2726  /* Get the previous and next pages of page */
2727 
2728  prev_page_no = btr_page_get_prev(page, mtr);
2729  next_page_no = btr_page_get_next(page, mtr);
2730  space = buf_block_get_space(block);
2731  zip_size = buf_block_get_zip_size(block);
2732 
2733  /* Update page links of the level */
2734 
2735  if (prev_page_no != FIL_NULL) {
2736  buf_block_t* prev_block = btr_block_get(
2737  space, zip_size, prev_page_no, RW_X_LATCH, index, mtr);
2738 #ifdef UNIV_BTR_DEBUG
2739  ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
2740  ut_a(btr_page_get_next(prev_block->frame, mtr)
2741  == buf_block_get_page_no(block));
2742 #endif /* UNIV_BTR_DEBUG */
2743 
2744  btr_page_set_next(buf_block_get_frame(prev_block),
2745  buf_block_get_page_zip(prev_block),
2746  lower_page_no, mtr);
2747  }
2748 
2749  if (next_page_no != FIL_NULL) {
2750  buf_block_t* next_block = btr_block_get(
2751  space, zip_size, next_page_no, RW_X_LATCH, index, mtr);
2752 #ifdef UNIV_BTR_DEBUG
2753  ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
2754  ut_a(btr_page_get_prev(next_block->frame, mtr)
2755  == page_get_page_no(page));
2756 #endif /* UNIV_BTR_DEBUG */
2757 
2758  btr_page_set_prev(buf_block_get_frame(next_block),
2759  buf_block_get_page_zip(next_block),
2760  upper_page_no, mtr);
2761  }
2762 
2763  btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
2764  btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
2765 
2766  btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
2767  btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
2768 }
2769 
2770 /*************************************************************/
2773 static __attribute__((nonnull, warn_unused_result))
2774 bool
2775 btr_page_tuple_smaller(
2776 /*===================*/
2777  btr_cur_t* cursor,
2778  const dtuple_t* tuple,
2779  ulint** offsets,
2780  ulint n_uniq,
2782  mem_heap_t** heap)
2783 {
2784  buf_block_t* block;
2785  const rec_t* first_rec;
2786  page_cur_t pcur;
2787 
2788  /* Read the first user record in the page. */
2789  block = btr_cur_get_block(cursor);
2790  page_cur_set_before_first(block, &pcur);
2791  page_cur_move_to_next(&pcur);
2792  first_rec = page_cur_get_rec(&pcur);
2793 
2794  *offsets = rec_get_offsets(
2795  first_rec, cursor->index, *offsets,
2796  n_uniq, heap);
2797 
2798  return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
2799 }
2800 
2801 /*************************************************************/
2810 UNIV_INTERN
2811 rec_t*
2813 /*======================*/
2814  ulint flags,
2815  btr_cur_t* cursor,
2818  ulint** offsets,
2819  mem_heap_t** heap,
2820  const dtuple_t* tuple,
2821  ulint n_ext,
2822  mtr_t* mtr)
2823 {
2824  buf_block_t* block;
2825  page_t* page;
2826  page_zip_des_t* page_zip;
2827  ulint page_no;
2828  byte direction;
2829  ulint hint_page_no;
2830  buf_block_t* new_block;
2831  page_t* new_page;
2832  page_zip_des_t* new_page_zip;
2833  rec_t* split_rec;
2834  buf_block_t* left_block;
2835  buf_block_t* right_block;
2836  buf_block_t* insert_block;
2837  page_cur_t* page_cursor;
2838  rec_t* first_rec;
2839  byte* buf = 0; /* remove warning */
2840  rec_t* move_limit;
2841  ibool insert_will_fit;
2842  ibool insert_left;
2843  ulint n_iterations = 0;
2844  rec_t* rec;
2845  ulint n_uniq;
2846 
2847  if (!*heap) {
2848  *heap = mem_heap_create(1024);
2849  }
2850  n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
2851 func_start:
2852  mem_heap_empty(*heap);
2853  *offsets = NULL;
2854 
2855  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
2856  MTR_MEMO_X_LOCK));
2858  || (flags & BTR_CREATE_FLAG)
2859  || dict_index_is_clust(cursor->index));
2860 #ifdef UNIV_SYNC_DEBUG
2861  ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
2862 #endif /* UNIV_SYNC_DEBUG */
2863 
2864  block = btr_cur_get_block(cursor);
2865  page = buf_block_get_frame(block);
2866  page_zip = buf_block_get_page_zip(block);
2867 
2868  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2869  ut_ad(!page_is_empty(page));
2870 
2871  page_no = buf_block_get_page_no(block);
2872 
2873  /* 1. Decide the split record; split_rec == NULL means that the
2874  tuple to be inserted should be the first record on the upper
2875  half-page */
2876  insert_left = FALSE;
2877 
2878  if (n_iterations > 0) {
2879  direction = FSP_UP;
2880  hint_page_no = page_no + 1;
2881  split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
2882 
2883  if (split_rec == NULL) {
2884  insert_left = btr_page_tuple_smaller(
2885  cursor, tuple, offsets, n_uniq, heap);
2886  }
2887  } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
2888  direction = FSP_UP;
2889  hint_page_no = page_no + 1;
2890 
2891  } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
2892  direction = FSP_DOWN;
2893  hint_page_no = page_no - 1;
2894  ut_ad(split_rec);
2895  } else {
2896  direction = FSP_UP;
2897  hint_page_no = page_no + 1;
2898 
2899  /* If there is only one record in the index page, we
2900  can't split the node in the middle by default. We need
2901  to determine whether the new record will be inserted
2902  to the left or right. */
2903 
2904  if (page_get_n_recs(page) > 1) {
2905  split_rec = page_get_middle_rec(page);
2906  } else if (btr_page_tuple_smaller(cursor, tuple,
2907  offsets, n_uniq, heap)) {
2908  split_rec = page_rec_get_next(
2909  page_get_infimum_rec(page));
2910  } else {
2911  split_rec = NULL;
2912  }
2913  }
2914 
2915  /* 2. Allocate a new page to the index */
2916  new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
2917  btr_page_get_level(page, mtr), mtr, mtr);
2918  new_page = buf_block_get_frame(new_block);
2919  new_page_zip = buf_block_get_page_zip(new_block);
2920  btr_page_create(new_block, new_page_zip, cursor->index,
2921  btr_page_get_level(page, mtr), mtr);
2922 
2923  /* 3. Calculate the first record on the upper half-page, and the
2924  first record (move_limit) on original page which ends up on the
2925  upper half */
2926 
2927  if (split_rec) {
2928  first_rec = move_limit = split_rec;
2929 
2930  *offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
2931  n_uniq, heap);
2932 
2933  insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
2934 
2935  if (!insert_left && new_page_zip && n_iterations > 0) {
2936  /* If a compressed page has already been split,
2937  avoid further splits by inserting the record
2938  to an empty page. */
2939  split_rec = NULL;
2940  goto insert_empty;
2941  }
2942  } else if (insert_left) {
2943  ut_a(n_iterations > 0);
2944  first_rec = page_rec_get_next(page_get_infimum_rec(page));
2945  move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
2946  } else {
2947 insert_empty:
2948  ut_ad(!split_rec);
2949  ut_ad(!insert_left);
2950  buf = (byte*) mem_alloc(rec_get_converted_size(cursor->index,
2951  tuple, n_ext));
2952 
2953  first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
2954  tuple, n_ext);
2955  move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
2956  }
2957 
2958  /* 4. Do first the modifications in the tree structure */
2959 
2960  btr_attach_half_pages(flags, cursor->index, block,
2961  first_rec, new_block, direction, mtr);
2962 
2963  /* If the split is made on the leaf level and the insert will fit
2964  on the appropriate half-page, we may release the tree x-latch.
2965  We can then move the records after releasing the tree latch,
2966  thus reducing the tree latch contention. */
2967 
2968  if (split_rec) {
2969  insert_will_fit = !new_page_zip
2970  && btr_page_insert_fits(cursor, split_rec,
2971  offsets, tuple, n_ext, heap);
2972  } else {
2973  if (!insert_left) {
2974  mem_free(buf);
2975  buf = NULL;
2976  }
2977 
2978  insert_will_fit = !new_page_zip
2979  && btr_page_insert_fits(cursor, NULL,
2980  offsets, tuple, n_ext, heap);
2981  }
2982 
2983  if (insert_will_fit && page_is_leaf(page)
2984  && !dict_index_is_online_ddl(cursor->index)) {
2985 
2987  MTR_MEMO_X_LOCK);
2988  }
2989 
2990  /* 5. Move then the records to the new page */
2991  if (direction == FSP_DOWN) {
2992  /* fputs("Split left\n", stderr); */
2993 
2994  if (0
2995 #ifdef UNIV_ZIP_COPY
2996  || page_zip
2997 #endif /* UNIV_ZIP_COPY */
2998  || !page_move_rec_list_start(new_block, block, move_limit,
2999  cursor->index, mtr)) {
3000  /* For some reason, compressing new_page failed,
3001  even though it should contain fewer records than
3002  the original page. Copy the page byte for byte
3003  and then delete the records from both pages
3004  as appropriate. Deleting will always succeed. */
3005  ut_a(new_page_zip);
3006 
3007  page_zip_copy_recs(new_page_zip, new_page,
3008  page_zip, page, cursor->index, mtr);
3009  page_delete_rec_list_end(move_limit - page + new_page,
3010  new_block, cursor->index,
3011  ULINT_UNDEFINED,
3012  ULINT_UNDEFINED, mtr);
3013 
3014  /* Update the lock table and possible hash index. */
3015 
3017  new_block, block, move_limit,
3018  new_page + PAGE_NEW_INFIMUM);
3019 
3021  new_block, block, cursor->index);
3022 
3023  /* Delete the records from the source page. */
3024 
3025  page_delete_rec_list_start(move_limit, block,
3026  cursor->index, mtr);
3027  }
3028 
3029  left_block = new_block;
3030  right_block = block;
3031 
3032  lock_update_split_left(right_block, left_block);
3033  } else {
3034  /* fputs("Split right\n", stderr); */
3035 
3036  if (0
3037 #ifdef UNIV_ZIP_COPY
3038  || page_zip
3039 #endif /* UNIV_ZIP_COPY */
3040  || !page_move_rec_list_end(new_block, block, move_limit,
3041  cursor->index, mtr)) {
3042  /* For some reason, compressing new_page failed,
3043  even though it should contain fewer records than
3044  the original page. Copy the page byte for byte
3045  and then delete the records from both pages
3046  as appropriate. Deleting will always succeed. */
3047  ut_a(new_page_zip);
3048 
3049  page_zip_copy_recs(new_page_zip, new_page,
3050  page_zip, page, cursor->index, mtr);
3051  page_delete_rec_list_start(move_limit - page
3052  + new_page, new_block,
3053  cursor->index, mtr);
3054 
3055  /* Update the lock table and possible hash index. */
3056 
3057  lock_move_rec_list_end(new_block, block, move_limit);
3058 
3060  new_block, block, cursor->index);
3061 
3062  /* Delete the records from the source page. */
3063 
3064  page_delete_rec_list_end(move_limit, block,
3065  cursor->index,
3066  ULINT_UNDEFINED,
3067  ULINT_UNDEFINED, mtr);
3068  }
3069 
3070  left_block = block;
3071  right_block = new_block;
3072 
3073  lock_update_split_right(right_block, left_block);
3074  }
3075 
3076 #ifdef UNIV_ZIP_DEBUG
3077  if (page_zip) {
3078  ut_a(page_zip_validate(page_zip, page, cursor->index));
3079  ut_a(page_zip_validate(new_page_zip, new_page, cursor->index));
3080  }
3081 #endif /* UNIV_ZIP_DEBUG */
3082 
3083  /* At this point, split_rec, move_limit and first_rec may point
3084  to garbage on the old page. */
3085 
3086  /* 6. The split and the tree modification is now completed. Decide the
3087  page where the tuple should be inserted */
3088 
3089  if (insert_left) {
3090  insert_block = left_block;
3091  } else {
3092  insert_block = right_block;
3093  }
3094 
3095  /* 7. Reposition the cursor for insert and try insertion */
3096  page_cursor = btr_cur_get_page_cur(cursor);
3097 
3098  page_cur_search(insert_block, cursor->index, tuple,
3099  PAGE_CUR_LE, page_cursor);
3100 
3101  rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
3102  offsets, heap, n_ext, mtr);
3103 
3104 #ifdef UNIV_ZIP_DEBUG
3105  {
3106  page_t* insert_page
3107  = buf_block_get_frame(insert_block);
3108 
3109  page_zip_des_t* insert_page_zip
3110  = buf_block_get_page_zip(insert_block);
3111 
3112  ut_a(!insert_page_zip
3113  || page_zip_validate(insert_page_zip, insert_page,
3114  cursor->index));
3115  }
3116 #endif /* UNIV_ZIP_DEBUG */
3117 
3118  if (rec != NULL) {
3119 
3120  goto func_exit;
3121  }
3122 
3123  /* 8. If insert did not fit, try page reorganization.
3124  For compressed pages, page_cur_tuple_insert() will have
3125  attempted this already. */
3126 
3127  if (page_cur_get_page_zip(page_cursor)
3128  || !btr_page_reorganize(page_cursor, cursor->index, mtr)) {
3129 
3130  goto insert_failed;
3131  }
3132 
3133  rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
3134  offsets, heap, n_ext, mtr);
3135 
3136  if (rec == NULL) {
3137  /* The insert did not fit on the page: loop back to the
3138  start of the function for a new split */
3139 insert_failed:
3140  /* We play safe and reset the free bits */
3141  if (!dict_index_is_clust(cursor->index)) {
3142  ibuf_reset_free_bits(new_block);
3143  ibuf_reset_free_bits(block);
3144  }
3145 
3146  /* fprintf(stderr, "Split second round %lu\n",
3147  page_get_page_no(page)); */
3148  n_iterations++;
3149  ut_ad(n_iterations < 2
3150  || buf_block_get_page_zip(insert_block));
3151  ut_ad(!insert_will_fit);
3152 
3153  goto func_start;
3154  }
3155 
3156 func_exit:
3157  /* Insert fit on the page: update the free bits for the
3158  left and right pages in the same mtr */
3159 
3160  if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
3162  buf_block_get_zip_size(left_block),
3163  left_block, right_block, mtr);
3164  }
3165 
3166 #if 0
3167  fprintf(stderr, "Split and insert done %lu %lu\n",
3168  buf_block_get_page_no(left_block),
3169  buf_block_get_page_no(right_block));
3170 #endif
3171  MONITOR_INC(MONITOR_INDEX_SPLIT);
3172 
3173  ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
3174  ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
3175 
3176  ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
3177  return(rec);
3178 }
3179 
3180 #ifdef UNIV_SYNC_DEBUG
3181 /*************************************************************/
3188 # define btr_level_list_remove(space,zip_size,page,index,mtr) \
3189  btr_level_list_remove_func(space,zip_size,page,index,mtr)
3190 #else /* UNIV_SYNC_DEBUG */
3191 /*************************************************************/
3198 # define btr_level_list_remove(space,zip_size,page,index,mtr) \
3199  btr_level_list_remove_func(space,zip_size,page,mtr)
3200 #endif /* UNIV_SYNC_DEBUG */
3201 
3202 /*************************************************************/
3204 static __attribute__((nonnull))
3205 void
3206 btr_level_list_remove_func(
3207 /*=======================*/
3208  ulint space,
3209  ulint zip_size,
3211  page_t* page,
3212 #ifdef UNIV_SYNC_DEBUG
3213  const dict_index_t* index,
3214 #endif /* UNIV_SYNC_DEBUG */
3215  mtr_t* mtr)
3216 {
3217  ulint prev_page_no;
3218  ulint next_page_no;
3219 
3220  ut_ad(page && mtr);
3221  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
3222  ut_ad(space == page_get_space_id(page));
3223  /* Get the previous and next page numbers of page */
3224 
3225  prev_page_no = btr_page_get_prev(page, mtr);
3226  next_page_no = btr_page_get_next(page, mtr);
3227 
3228  /* Update page links of the level */
3229 
3230  if (prev_page_no != FIL_NULL) {
3231  buf_block_t* prev_block
3232  = btr_block_get(space, zip_size, prev_page_no,
3233  RW_X_LATCH, index, mtr);
3234  page_t* prev_page
3235  = buf_block_get_frame(prev_block);
3236 #ifdef UNIV_BTR_DEBUG
3237  ut_a(page_is_comp(prev_page) == page_is_comp(page));
3238  ut_a(btr_page_get_next(prev_page, mtr)
3239  == page_get_page_no(page));
3240 #endif /* UNIV_BTR_DEBUG */
3241 
3242  btr_page_set_next(prev_page,
3243  buf_block_get_page_zip(prev_block),
3244  next_page_no, mtr);
3245  }
3246 
3247  if (next_page_no != FIL_NULL) {
3248  buf_block_t* next_block
3249  = btr_block_get(space, zip_size, next_page_no,
3250  RW_X_LATCH, index, mtr);
3251  page_t* next_page
3252  = buf_block_get_frame(next_block);
3253 #ifdef UNIV_BTR_DEBUG
3254  ut_a(page_is_comp(next_page) == page_is_comp(page));
3255  ut_a(btr_page_get_prev(next_page, mtr)
3256  == page_get_page_no(page));
3257 #endif /* UNIV_BTR_DEBUG */
3258 
3259  btr_page_set_prev(next_page,
3260  buf_block_get_page_zip(next_block),
3261  prev_page_no, mtr);
3262  }
3263 }
3264 
3265 /****************************************************************/
3268 UNIV_INLINE
3269 void
3270 btr_set_min_rec_mark_log(
3271 /*=====================*/
3272  rec_t* rec,
3273  byte type,
3274  mtr_t* mtr)
3275 {
3276  mlog_write_initial_log_record(rec, type, mtr);
3277 
3278  /* Write rec offset as a 2-byte ulint */
3280 }
3281 #else /* !UNIV_HOTBACKUP */
3282 # define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
3283 #endif /* !UNIV_HOTBACKUP */
3284 
3285 /****************************************************************/
3289 UNIV_INTERN
3290 byte*
3292 /*=======================*/
3293  byte* ptr,
3294  byte* end_ptr,
3295  ulint comp,
3296  page_t* page,
3297  mtr_t* mtr)
3298 {
3299  rec_t* rec;
3300 
3301  if (end_ptr < ptr + 2) {
3302 
3303  return(NULL);
3304  }
3305 
3306  if (page) {
3307  ut_a(!page_is_comp(page) == !comp);
3308 
3309  rec = page + mach_read_from_2(ptr);
3310 
3311  btr_set_min_rec_mark(rec, mtr);
3312  }
3313 
3314  return(ptr + 2);
3315 }
3316 
3317 /****************************************************************/
3319 UNIV_INTERN
3320 void
3322 /*=================*/
3323  rec_t* rec,
3324  mtr_t* mtr)
3325 {
3326  ulint info_bits;
3327 
3328  if (page_rec_is_comp(rec)) {
3329  info_bits = rec_get_info_bits(rec, TRUE);
3330 
3331  rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
3332 
3333  btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
3334  } else {
3335  info_bits = rec_get_info_bits(rec, FALSE);
3336 
3337  rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
3338 
3339  btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
3340  }
3341 }
3342 
3343 #ifndef UNIV_HOTBACKUP
3344 /*************************************************************/
3346 UNIV_INTERN
3347 void
3349 /*================*/
3350  dict_index_t* index,
3351  buf_block_t* block,
3352  mtr_t* mtr)
3353 {
3354  btr_cur_t cursor;
3355  ibool compressed;
3356  dberr_t err;
3357 
3358  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3359 
3360  /* Delete node pointer on father page */
3361  btr_page_get_father(index, block, mtr, &cursor);
3362 
3363  compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
3364  BTR_CREATE_FLAG, RB_NONE, mtr);
3365  ut_a(err == DB_SUCCESS);
3366 
3367  if (!compressed) {
3368  btr_cur_compress_if_useful(&cursor, FALSE, mtr);
3369  }
3370 }
3371 
3372 /*************************************************************/
3376 static
3377 buf_block_t*
3378 btr_lift_page_up(
3379 /*=============*/
3380  dict_index_t* index,
3381  buf_block_t* block,
3385  mtr_t* mtr)
3386 {
3387  buf_block_t* father_block;
3388  page_t* father_page;
3389  ulint page_level;
3390  page_zip_des_t* father_page_zip;
3391  page_t* page = buf_block_get_frame(block);
3392  ulint root_page_no;
3393  buf_block_t* blocks[BTR_MAX_LEVELS];
3394  ulint n_blocks;
3395  ulint i;
3396  bool lift_father_up;
3397  buf_block_t* block_orig = block;
3398 
3399  ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
3400  ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
3401  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3402 
3403  page_level = btr_page_get_level(page, mtr);
3404  root_page_no = dict_index_get_page(index);
3405 
3406  {
3407  btr_cur_t cursor;
3408  ulint* offsets = NULL;
3409  mem_heap_t* heap = mem_heap_create(
3410  sizeof(*offsets)
3411  * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
3412  buf_block_t* b;
3413 
3414  offsets = btr_page_get_father_block(offsets, heap, index,
3415  block, mtr, &cursor);
3416  father_block = btr_cur_get_block(&cursor);
3417  father_page_zip = buf_block_get_page_zip(father_block);
3418  father_page = buf_block_get_frame(father_block);
3419 
3420  n_blocks = 0;
3421 
3422  /* Store all ancestor pages so we can reset their
3423  levels later on. We have to do all the searches on
3424  the tree now because later on, after we've replaced
3425  the first level, the tree is in an inconsistent state
3426  and can not be searched. */
3427  for (b = father_block;
3428  buf_block_get_page_no(b) != root_page_no; ) {
3429  ut_a(n_blocks < BTR_MAX_LEVELS);
3430 
3431  offsets = btr_page_get_father_block(offsets, heap,
3432  index, b,
3433  mtr, &cursor);
3434 
3435  blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
3436  }
3437 
3438  lift_father_up = (n_blocks && page_level == 0);
3439  if (lift_father_up) {
3440  /* The father page also should be the only on its level (not
3441  root). We should lift up the father page at first.
3442  Because the leaf page should be lifted up only for root page.
3443  The freeing page is based on page_level (==0 or !=0)
3444  to choose segment. If the page_level is changed ==0 from !=0,
3445  later freeing of the page doesn't find the page allocation
3446  to be freed.*/
3447 
3448  block = father_block;
3449  page = buf_block_get_frame(block);
3450  page_level = btr_page_get_level(page, mtr);
3451 
3452  ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
3453  ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
3454  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3455 
3456  father_block = blocks[0];
3457  father_page_zip = buf_block_get_page_zip(father_block);
3458  father_page = buf_block_get_frame(father_block);
3459  }
3460 
3461  mem_heap_free(heap);
3462  }
3463 
3465 
3466  /* Make the father empty */
3467  btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
3468  page_level++;
3469 
3470  /* Copy the records to the father page one by one. */
3471  if (0
3472 #ifdef UNIV_ZIP_COPY
3473  || father_page_zip
3474 #endif /* UNIV_ZIP_COPY */
3475  || !page_copy_rec_list_end(father_block, block,
3476  page_get_infimum_rec(page),
3477  index, mtr)) {
3478  const page_zip_des_t* page_zip
3479  = buf_block_get_page_zip(block);
3480  ut_a(father_page_zip);
3481  ut_a(page_zip);
3482 
3483  /* Copy the page byte for byte. */
3484  page_zip_copy_recs(father_page_zip, father_page,
3485  page_zip, page, index, mtr);
3486 
3487  /* Update the lock table and possible hash index. */
3488 
3489  lock_move_rec_list_end(father_block, block,
3490  page_get_infimum_rec(page));
3491 
3492  btr_search_move_or_delete_hash_entries(father_block, block,
3493  index);
3494  }
3495 
3496  btr_blob_dbg_remove(page, index, "btr_lift_page_up");
3497  lock_update_copy_and_discard(father_block, block);
3498 
3499  /* Go upward to root page, decrementing levels by one. */
3500  for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
3501  page_t* page = buf_block_get_frame(blocks[i]);
3502  page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
3503 
3504  ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
3505 
3506  btr_page_set_level(page, page_zip, page_level, mtr);
3507 #ifdef UNIV_ZIP_DEBUG
3508  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
3509 #endif /* UNIV_ZIP_DEBUG */
3510  }
3511 
3512  /* Free the file page */
3513  btr_page_free(index, block, mtr);
3514 
3515  /* We play it safe and reset the free bits for the father */
3516  if (!dict_index_is_clust(index)) {
3517  ibuf_reset_free_bits(father_block);
3518  }
3519  ut_ad(page_validate(father_page, index));
3520  ut_ad(btr_check_node_ptr(index, father_block, mtr));
3521 
3522  return(lift_father_up ? block_orig : father_block);
3523 }
3524 
3525 /*************************************************************/
3535 UNIV_INTERN
3536 ibool
3537 btr_compress(
3538 /*=========*/
3539  btr_cur_t* cursor,
3543  ibool adjust,
3545  mtr_t* mtr)
3546 {
3548  ulint space;
3549  ulint zip_size;
3550  ulint left_page_no;
3551  ulint right_page_no;
3552  buf_block_t* merge_block;
3553  page_t* merge_page = NULL;
3554  page_zip_des_t* merge_page_zip;
3555  ibool is_left;
3556  buf_block_t* block;
3557  page_t* page;
3558  btr_cur_t father_cursor;
3559  mem_heap_t* heap;
3560  ulint* offsets;
3561  ulint nth_rec = 0; /* remove bogus warning */
3562  DBUG_ENTER("btr_compress");
3563 
3564  block = btr_cur_get_block(cursor);
3565  page = btr_cur_get_page(cursor);
3566  index = btr_cur_get_index(cursor);
3567 
3568  btr_assert_not_corrupted(block, index);
3569 
3570  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3571  MTR_MEMO_X_LOCK));
3572  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3573  space = dict_index_get_space(index);
3574  zip_size = dict_table_zip_size(index->table);
3575 
3576  left_page_no = btr_page_get_prev(page, mtr);
3577  right_page_no = btr_page_get_next(page, mtr);
3578 
3579 #ifdef UNIV_DEBUG
3580  if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
3581  ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
3582  page_rec_get_next(page_get_infimum_rec(page)),
3583  page_is_comp(page)));
3584  }
3585 #endif /* UNIV_DEBUG */
3586 
3587  heap = mem_heap_create(100);
3588  offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
3589  &father_cursor);
3590 
3591  if (adjust) {
3592  nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
3593  ut_ad(nth_rec > 0);
3594  }
3595 
3596  if (left_page_no == FIL_NULL && right_page_no == FIL_NULL) {
3597  /* The page is the only one on the level, lift the records
3598  to the father */
3599 
3600  merge_block = btr_lift_page_up(index, block, mtr);
3601  goto func_exit;
3602  }
3603 
3604  /* Decide the page to which we try to merge and which will inherit
3605  the locks */
3606 
3607  is_left = btr_can_merge_with_page(cursor, left_page_no,
3608  &merge_block, mtr);
3609 
3610  DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;);
3611 
3612  if(!is_left
3613  && !btr_can_merge_with_page(cursor, right_page_no, &merge_block,
3614  mtr)) {
3615  goto err_exit;
3616  }
3617 
3618  merge_page = buf_block_get_frame(merge_block);
3619 
3620 #ifdef UNIV_BTR_DEBUG
3621  if (is_left) {
3622  ut_a(btr_page_get_next(merge_page, mtr)
3623  == buf_block_get_page_no(block));
3624  } else {
3625  ut_a(btr_page_get_prev(merge_page, mtr)
3626  == buf_block_get_page_no(block));
3627  }
3628 #endif /* UNIV_BTR_DEBUG */
3629 
3630  ut_ad(page_validate(merge_page, index));
3631 
3632  merge_page_zip = buf_block_get_page_zip(merge_block);
3633 #ifdef UNIV_ZIP_DEBUG
3634  if (merge_page_zip) {
3635  const page_zip_des_t* page_zip
3636  = buf_block_get_page_zip(block);
3637  ut_a(page_zip);
3638  ut_a(page_zip_validate(merge_page_zip, merge_page, index));
3639  ut_a(page_zip_validate(page_zip, page, index));
3640  }
3641 #endif /* UNIV_ZIP_DEBUG */
3642 
3643  /* Move records to the merge page */
3644  if (is_left) {
3645  rec_t* orig_pred = page_copy_rec_list_start(
3646  merge_block, block, page_get_supremum_rec(page),
3647  index, mtr);
3648 
3649  if (!orig_pred) {
3650  goto err_exit;
3651  }
3652 
3654 
3655  /* Remove the page from the level list */
3656  btr_level_list_remove(space, zip_size, page, index, mtr);
3657 
3658  btr_node_ptr_delete(index, block, mtr);
3659  lock_update_merge_left(merge_block, orig_pred, block);
3660 
3661  if (adjust) {
3662  nth_rec += page_rec_get_n_recs_before(orig_pred);
3663  }
3664  } else {
3665  rec_t* orig_succ;
3666  ibool compressed;
3667  dberr_t err;
3668  btr_cur_t cursor2;
3669  /* father cursor pointing to node ptr
3670  of the right sibling */
3671 #ifdef UNIV_BTR_DEBUG
3672  byte fil_page_prev[4];
3673 #endif /* UNIV_BTR_DEBUG */
3674 
3675  btr_page_get_father(index, merge_block, mtr, &cursor2);
3676 
3677  if (merge_page_zip && left_page_no == FIL_NULL) {
3678 
3679  /* The function page_zip_compress(), which will be
3680  invoked by page_copy_rec_list_end() below,
3681  requires that FIL_PAGE_PREV be FIL_NULL.
3682  Clear the field, but prepare to restore it. */
3683 #ifdef UNIV_BTR_DEBUG
3684  memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
3685 #endif /* UNIV_BTR_DEBUG */
3686 #if FIL_NULL != 0xffffffff
3687 # error "FIL_NULL != 0xffffffff"
3688 #endif
3689  memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
3690  }
3691 
3692  orig_succ = page_copy_rec_list_end(merge_block, block,
3693  page_get_infimum_rec(page),
3694  cursor->index, mtr);
3695 
3696  if (!orig_succ) {
3697  ut_a(merge_page_zip);
3698 #ifdef UNIV_BTR_DEBUG
3699  if (left_page_no == FIL_NULL) {
3700  /* FIL_PAGE_PREV was restored from
3701  merge_page_zip. */
3702  ut_a(!memcmp(fil_page_prev,
3703  merge_page + FIL_PAGE_PREV, 4));
3704  }
3705 #endif /* UNIV_BTR_DEBUG */
3706  goto err_exit;
3707  }
3708 
3710 
3711 #ifdef UNIV_BTR_DEBUG
3712  if (merge_page_zip && left_page_no == FIL_NULL) {
3713 
3714  /* Restore FIL_PAGE_PREV in order to avoid an assertion
3715  failure in btr_level_list_remove(), which will set
3716  the field again to FIL_NULL. Even though this makes
3717  merge_page and merge_page_zip inconsistent for a
3718  split second, it is harmless, because the pages
3719  are X-latched. */
3720  memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
3721  }
3722 #endif /* UNIV_BTR_DEBUG */
3723 
3724  /* Remove the page from the level list */
3725  btr_level_list_remove(space, zip_size, page, index, mtr);
3726 
3727  /* Replace the address of the old child node (= page) with the
3728  address of the merge page to the right */
3730  btr_cur_get_rec(&father_cursor),
3731  btr_cur_get_page_zip(&father_cursor),
3732  offsets, right_page_no, mtr);
3733 
3734  compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2,
3735  BTR_CREATE_FLAG,
3736  RB_NONE, mtr);
3737  ut_a(err == DB_SUCCESS);
3738 
3739  if (!compressed) {
3740  btr_cur_compress_if_useful(&cursor2, FALSE, mtr);
3741  }
3742 
3743  lock_update_merge_right(merge_block, orig_succ, block);
3744  }
3745 
3746  btr_blob_dbg_remove(page, index, "btr_compress");
3747 
3748  if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
3749  /* Update the free bits of the B-tree page in the
3750  insert buffer bitmap. This has to be done in a
3751  separate mini-transaction that is committed before the
3752  main mini-transaction. We cannot update the insert
3753  buffer bitmap in this mini-transaction, because
3754  btr_compress() can be invoked recursively without
3755  committing the mini-transaction in between. Since
3756  insert buffer bitmap pages have a lower rank than
3757  B-tree pages, we must not access other pages in the
3758  same mini-transaction after accessing an insert buffer
3759  bitmap page. */
3760 
3761  /* The free bits in the insert buffer bitmap must
3762  never exceed the free space on a page. It is safe to
3763  decrement or reset the bits in the bitmap in a
3764  mini-transaction that is committed before the
3765  mini-transaction that affects the free space. */
3766 
3767  /* It is unsafe to increment the bits in a separately
3768  committed mini-transaction, because in crash recovery,
3769  the free bits could momentarily be set too high. */
3770 
3771  if (zip_size) {
3772  /* Because the free bits may be incremented
3773  and we cannot update the insert buffer bitmap
3774  in the same mini-transaction, the only safe
3775  thing we can do here is the pessimistic
3776  approach: reset the free bits. */
3777  ibuf_reset_free_bits(merge_block);
3778  } else {
3779  /* On uncompressed pages, the free bits will
3780  never increase here. Thus, it is safe to
3781  write the bits accurately in a separate
3782  mini-transaction. */
3783  ibuf_update_free_bits_if_full(merge_block,
3784  UNIV_PAGE_SIZE,
3785  ULINT_UNDEFINED);
3786  }
3787  }
3788 
3789  ut_ad(page_validate(merge_page, index));
3790 #ifdef UNIV_ZIP_DEBUG
3791  ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page,
3792  index));
3793 #endif /* UNIV_ZIP_DEBUG */
3794 
3795  /* Free the file page */
3796  btr_page_free(index, block, mtr);
3797 
3798  ut_ad(btr_check_node_ptr(index, merge_block, mtr));
3799 func_exit:
3800  mem_heap_free(heap);
3801 
3802  if (adjust) {
3803  ut_ad(nth_rec > 0);
3805  index,
3806  page_rec_get_nth(merge_block->frame, nth_rec),
3807  merge_block, cursor);
3808  }
3809  DBUG_RETURN(TRUE);
3810 
3811 err_exit:
3812  /* We play it safe and reset the free bits. */
3813  if (zip_size
3814  && merge_page
3815  && page_is_leaf(merge_page)
3816  && !dict_index_is_clust(index)) {
3817  ibuf_reset_free_bits(merge_block);
3818  }
3819 
3820  mem_heap_free(heap);
3821  DBUG_RETURN(FALSE);
3822 }
3823 
3824 /*************************************************************/
3829 static
3830 void
3831 btr_discard_only_page_on_level(
3832 /*===========================*/
3833  dict_index_t* index,
3834  buf_block_t* block,
3835  mtr_t* mtr)
3836 {
3837  ulint page_level = 0;
3838  trx_id_t max_trx_id;
3839 
3840  /* Save the PAGE_MAX_TRX_ID from the leaf page. */
3841  max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
3842 
3843  while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
3844  btr_cur_t cursor;
3845  buf_block_t* father;
3846  const page_t* page = buf_block_get_frame(block);
3847 
3848  ut_a(page_get_n_recs(page) == 1);
3849  ut_a(page_level == btr_page_get_level(page, mtr));
3850  ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
3851  ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
3852 
3853  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3855 
3856  btr_page_get_father(index, block, mtr, &cursor);
3857  father = btr_cur_get_block(&cursor);
3858 
3859  lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
3860 
3861  /* Free the file page */
3862  btr_page_free(index, block, mtr);
3863 
3864  block = father;
3865  page_level++;
3866  }
3867 
3868  /* block is the root page, which must be empty, except
3869  for the node pointer to the (now discarded) block(s). */
3870 
3871 #ifdef UNIV_BTR_DEBUG
3872  if (!dict_index_is_ibuf(index)) {
3873  const page_t* root = buf_block_get_frame(block);
3874  const ulint space = dict_index_get_space(index);
3875  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
3876  + root, space));
3877  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
3878  + root, space));
3879  }
3880 #endif /* UNIV_BTR_DEBUG */
3881 
3882  btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
3883  ut_ad(page_is_leaf(buf_block_get_frame(block)));
3884 
3885  if (!dict_index_is_clust(index)) {
3886  /* We play it safe and reset the free bits for the root */
3887  ibuf_reset_free_bits(block);
3888 
3889  ut_a(max_trx_id);
3890  page_set_max_trx_id(block,
3891  buf_block_get_page_zip(block),
3892  max_trx_id, mtr);
3893  }
3894 }
3895 
3896 /*************************************************************/
3900 UNIV_INTERN
3901 void
3903 /*=============*/
3904  btr_cur_t* cursor,
3906  mtr_t* mtr)
3907 {
3909  ulint space;
3910  ulint zip_size;
3911  ulint left_page_no;
3912  ulint right_page_no;
3913  buf_block_t* merge_block;
3914  page_t* merge_page;
3915  buf_block_t* block;
3916  page_t* page;
3917  rec_t* node_ptr;
3918 
3919  block = btr_cur_get_block(cursor);
3920  index = btr_cur_get_index(cursor);
3921 
3923  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3924  MTR_MEMO_X_LOCK));
3925  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3926  space = dict_index_get_space(index);
3927  zip_size = dict_table_zip_size(index->table);
3928 
3929  /* Decide the page which will inherit the locks */
3930 
3931  left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
3932  right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
3933 
3934  if (left_page_no != FIL_NULL) {
3935  merge_block = btr_block_get(space, zip_size, left_page_no,
3936  RW_X_LATCH, index, mtr);
3937  merge_page = buf_block_get_frame(merge_block);
3938 #ifdef UNIV_BTR_DEBUG
3939  ut_a(btr_page_get_next(merge_page, mtr)
3940  == buf_block_get_page_no(block));
3941 #endif /* UNIV_BTR_DEBUG */
3942  } else if (right_page_no != FIL_NULL) {
3943  merge_block = btr_block_get(space, zip_size, right_page_no,
3944  RW_X_LATCH, index, mtr);
3945  merge_page = buf_block_get_frame(merge_block);
3946 #ifdef UNIV_BTR_DEBUG
3947  ut_a(btr_page_get_prev(merge_page, mtr)
3948  == buf_block_get_page_no(block));
3949 #endif /* UNIV_BTR_DEBUG */
3950  } else {
3951  btr_discard_only_page_on_level(index, block, mtr);
3952 
3953  return;
3954  }
3955 
3956  page = buf_block_get_frame(block);
3957  ut_a(page_is_comp(merge_page) == page_is_comp(page));
3959 
3960  if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
3961 
3962  /* We have to mark the leftmost node pointer on the right
3963  side page as the predefined minimum record */
3964  node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
3965 
3966  ut_ad(page_rec_is_user_rec(node_ptr));
3967 
3968  /* This will make page_zip_validate() fail on merge_page
3969  until btr_level_list_remove() completes. This is harmless,
3970  because everything will take place within a single
3971  mini-transaction and because writing to the redo log
3972  is an atomic operation (performed by mtr_commit()). */
3973  btr_set_min_rec_mark(node_ptr, mtr);
3974  }
3975 
3976  btr_node_ptr_delete(index, block, mtr);
3977 
3978  /* Remove the page from the level list */
3979  btr_level_list_remove(space, zip_size, page, index, mtr);
3980 #ifdef UNIV_ZIP_DEBUG
3981  {
3982  page_zip_des_t* merge_page_zip
3983  = buf_block_get_page_zip(merge_block);
3984  ut_a(!merge_page_zip
3985  || page_zip_validate(merge_page_zip, merge_page, index));
3986  }
3987 #endif /* UNIV_ZIP_DEBUG */
3988 
3989  if (left_page_no != FIL_NULL) {
3990  lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
3991  block);
3992  } else {
3993  lock_update_discard(merge_block,
3994  lock_get_min_heap_no(merge_block),
3995  block);
3996  }
3997 
3998  btr_blob_dbg_remove(page, index, "btr_discard_page");
3999 
4000  /* Free the file page */
4001  btr_page_free(index, block, mtr);
4002 
4003  ut_ad(btr_check_node_ptr(index, merge_block, mtr));
4004 }
4005 
4006 #ifdef UNIV_BTR_PRINT
4007 /*************************************************************/
4009 UNIV_INTERN
4010 void
4011 btr_print_size(
4012 /*===========*/
4013  dict_index_t* index)
4014 {
4015  page_t* root;
4016  fseg_header_t* seg;
4017  mtr_t mtr;
4018 
4019  if (dict_index_is_ibuf(index)) {
4020  fputs("Sorry, cannot print info of an ibuf tree:"
4021  " use ibuf functions\n", stderr);
4022 
4023  return;
4024  }
4025 
4026  mtr_start(&mtr);
4027 
4028  root = btr_root_get(index, &mtr);
4029 
4030  seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
4031 
4032  fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
4033  fseg_print(seg, &mtr);
4034 
4035  if (!dict_index_is_univ(index)) {
4036 
4037  seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
4038 
4039  fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
4040  fseg_print(seg, &mtr);
4041  }
4042 
4043  mtr_commit(&mtr);
4044 }
4045 
4046 /************************************************************/
4048 static
4049 void
4050 btr_print_recursive(
4051 /*================*/
4052  dict_index_t* index,
4053  buf_block_t* block,
4054  ulint width,
4056  mem_heap_t** heap,
4057  ulint** offsets,
4058  mtr_t* mtr)
4059 {
4060  const page_t* page = buf_block_get_frame(block);
4062  ulint n_recs;
4063  ulint i = 0;
4064  mtr_t mtr2;
4065 
4066  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4067  fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
4068  (ulong) btr_page_get_level(page, mtr),
4069  (ulong) buf_block_get_page_no(block));
4070 
4071  page_print(block, index, width, width);
4072 
4073  n_recs = page_get_n_recs(page);
4074 
4075  page_cur_set_before_first(block, &cursor);
4076  page_cur_move_to_next(&cursor);
4077 
4078  while (!page_cur_is_after_last(&cursor)) {
4079 
4080  if (page_is_leaf(page)) {
4081 
4082  /* If this is the leaf level, do nothing */
4083 
4084  } else if ((i <= width) || (i >= n_recs - width)) {
4085 
4086  const rec_t* node_ptr;
4087 
4088  mtr_start(&mtr2);
4089 
4090  node_ptr = page_cur_get_rec(&cursor);
4091 
4092  *offsets = rec_get_offsets(node_ptr, index, *offsets,
4093  ULINT_UNDEFINED, heap);
4094  btr_print_recursive(index,
4095  btr_node_ptr_get_child(node_ptr,
4096  index,
4097  *offsets,
4098  &mtr2),
4099  width, heap, offsets, &mtr2);
4100  mtr_commit(&mtr2);
4101  }
4102 
4103  page_cur_move_to_next(&cursor);
4104  i++;
4105  }
4106 }
4107 
4108 /**************************************************************/
4110 UNIV_INTERN
4111 void
4112 btr_print_index(
4113 /*============*/
4114  dict_index_t* index,
4115  ulint width)
4117 {
4118  mtr_t mtr;
4119  buf_block_t* root;
4120  mem_heap_t* heap = NULL;
4121  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4122  ulint* offsets = offsets_;
4123  rec_offs_init(offsets_);
4124 
4125  fputs("--------------------------\n"
4126  "INDEX TREE PRINT\n", stderr);
4127 
4128  mtr_start(&mtr);
4129 
4130  root = btr_root_block_get(index, RW_X_LATCH, &mtr);
4131 
4132  btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
4133  if (heap) {
4134  mem_heap_free(heap);
4135  }
4136 
4137  mtr_commit(&mtr);
4138 
4139  btr_validate_index(index, 0);
4140 }
4141 #endif /* UNIV_BTR_PRINT */
4142 
4143 #ifdef UNIV_DEBUG
4144 /************************************************************/
4147 UNIV_INTERN
4148 ibool
4149 btr_check_node_ptr(
4150 /*===============*/
4151  dict_index_t* index,
4152  buf_block_t* block,
4153  mtr_t* mtr)
4154 {
4155  mem_heap_t* heap;
4156  dtuple_t* tuple;
4157  ulint* offsets;
4158  btr_cur_t cursor;
4159  page_t* page = buf_block_get_frame(block);
4160 
4161  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4162  if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
4163 
4164  return(TRUE);
4165  }
4166 
4167  heap = mem_heap_create(256);
4168  offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
4169  &cursor);
4170 
4171  if (page_is_leaf(page)) {
4172 
4173  goto func_exit;
4174  }
4175 
4176  tuple = dict_index_build_node_ptr(
4177  index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
4178  btr_page_get_level(page, mtr));
4179 
4180  ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
4181 func_exit:
4182  mem_heap_free(heap);
4183 
4184  return(TRUE);
4185 }
4186 #endif /* UNIV_DEBUG */
4187 
4188 /************************************************************/
4190 static
4191 void
4192 btr_index_rec_validate_report(
4193 /*==========================*/
4194  const page_t* page,
4195  const rec_t* rec,
4196  const dict_index_t* index)
4197 {
4198  fputs("InnoDB: Record in ", stderr);
4199  dict_index_name_print(stderr, NULL, index);
4200  fprintf(stderr, ", page %lu, at offset %lu\n",
4201  page_get_page_no(page), (ulint) page_offset(rec));
4202 }
4203 
4204 /************************************************************/
4208 UNIV_INTERN
4209 ibool
4211 /*===================*/
4212  const rec_t* rec,
4213  const dict_index_t* index,
4214  ibool dump_on_error)
4217 {
4218  ulint len;
4219  ulint n;
4220  ulint i;
4221  const page_t* page;
4222  mem_heap_t* heap = NULL;
4223  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4224  ulint* offsets = offsets_;
4225  rec_offs_init(offsets_);
4226 
4227  page = page_align(rec);
4228 
4229  if (dict_index_is_univ(index)) {
4230  /* The insert buffer index tree can contain records from any
4231  other index: we cannot check the number of fields or
4232  their length */
4233 
4234  return(TRUE);
4235  }
4236 
4237  if ((ibool)!!page_is_comp(page) != dict_table_is_comp(index->table)) {
4238  btr_index_rec_validate_report(page, rec, index);
4239  fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
4240  (ulong) !!page_is_comp(page),
4241  (ulong) dict_table_is_comp(index->table));
4242 
4243  return(FALSE);
4244  }
4245 
4246  n = dict_index_get_n_fields(index);
4247 
4248  if (!page_is_comp(page) && rec_get_n_fields_old(rec) != n) {
4249  btr_index_rec_validate_report(page, rec, index);
4250  fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
4251  (ulong) rec_get_n_fields_old(rec), (ulong) n);
4252 
4253  if (dump_on_error) {
4255 
4256  fputs("InnoDB: corrupt record ", stderr);
4257  rec_print_old(stderr, rec);
4258  putc('\n', stderr);
4259  }
4260  return(FALSE);
4261  }
4262 
4263  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
4264 
4265  for (i = 0; i < n; i++) {
4266  ulint fixed_size = dict_col_get_fixed_size(
4267  dict_index_get_nth_col(index, i), page_is_comp(page));
4268 
4269  rec_get_nth_field_offs(offsets, i, &len);
4270 
4271  /* Note that if fixed_size != 0, it equals the
4272  length of a fixed-size column in the clustered index.
4273  A prefix index of the column is of fixed, but different
4274  length. When fixed_size == 0, prefix_len is the maximum
4275  length of the prefix index column. */
4276 
4277  if ((dict_index_get_nth_field(index, i)->prefix_len == 0
4278  && len != UNIV_SQL_NULL && fixed_size
4279  && len != fixed_size)
4280  || (dict_index_get_nth_field(index, i)->prefix_len > 0
4281  && len != UNIV_SQL_NULL
4282  && len
4283  > dict_index_get_nth_field(index, i)->prefix_len)) {
4284 
4285  btr_index_rec_validate_report(page, rec, index);
4286  fprintf(stderr,
4287  "InnoDB: field %lu len is %lu,"
4288  " should be %lu\n",
4289  (ulong) i, (ulong) len, (ulong) fixed_size);
4290 
4291  if (dump_on_error) {
4292  buf_page_print(page, 0,
4294 
4295  fputs("InnoDB: corrupt record ", stderr);
4296  rec_print_new(stderr, rec, offsets);
4297  putc('\n', stderr);
4298  }
4299  if (heap) {
4300  mem_heap_free(heap);
4301  }
4302  return(FALSE);
4303  }
4304  }
4305 
4306  if (heap) {
4307  mem_heap_free(heap);
4308  }
4309  return(TRUE);
4310 }
4311 
4312 /************************************************************/
4316 static
4317 ibool
4318 btr_index_page_validate(
4319 /*====================*/
4320  buf_block_t* block,
4321  dict_index_t* index)
4322 {
4323  page_cur_t cur;
4324  ibool ret = TRUE;
4325 #ifndef DBUG_OFF
4326  ulint nth = 1;
4327 #endif /* !DBUG_OFF */
4328 
4329  page_cur_set_before_first(block, &cur);
4330 
4331  /* Directory slot 0 should only contain the infimum record. */
4332  DBUG_EXECUTE_IF("check_table_rec_next",
4334  page_cur_get_page(&cur), 0)
4335  == cur.rec);
4337  page_dir_get_nth_slot(
4338  page_cur_get_page(&cur), 0))
4339  == 1););
4340 
4341  page_cur_move_to_next(&cur);
4342 
4343  for (;;) {
4344  if (page_cur_is_after_last(&cur)) {
4345 
4346  break;
4347  }
4348 
4349  if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
4350 
4351  return(FALSE);
4352  }
4353 
4354  /* Verify that page_rec_get_nth_const() is correctly
4355  retrieving each record. */
4356  DBUG_EXECUTE_IF("check_table_rec_next",
4358  page_cur_get_page(&cur),
4360  cur.rec)));
4362  cur.rec)););
4363 
4364  page_cur_move_to_next(&cur);
4365  }
4366 
4367  return(ret);
4368 }
4369 
4370 /************************************************************/
4372 static
4373 void
4374 btr_validate_report1(
4375 /*=================*/
4376  dict_index_t* index,
4377  ulint level,
4378  const buf_block_t* block)
4379 {
4380  fprintf(stderr, "InnoDB: Error in page %lu of ",
4381  buf_block_get_page_no(block));
4382  dict_index_name_print(stderr, NULL, index);
4383  if (level) {
4384  fprintf(stderr, ", index tree level %lu", level);
4385  }
4386  putc('\n', stderr);
4387 }
4388 
4389 /************************************************************/
4391 static
4392 void
4393 btr_validate_report2(
4394 /*=================*/
4395  const dict_index_t* index,
4396  ulint level,
4397  const buf_block_t* block1,
4398  const buf_block_t* block2)
4399 {
4400  fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
4401  buf_block_get_page_no(block1),
4402  buf_block_get_page_no(block2));
4403  dict_index_name_print(stderr, NULL, index);
4404  if (level) {
4405  fprintf(stderr, ", index tree level %lu", level);
4406  }
4407  putc('\n', stderr);
4408 }
4409 
4410 /************************************************************/
4413 static
4414 bool
4415 btr_validate_level(
4416 /*===============*/
4417  dict_index_t* index,
4418  const trx_t* trx,
4419  ulint level)
4420 {
4421  ulint space;
4422  ulint space_flags;
4423  ulint zip_size;
4424  buf_block_t* block;
4425  page_t* page;
4426  buf_block_t* right_block = 0; /* remove warning */
4427  page_t* right_page = 0; /* remove warning */
4428  page_t* father_page;
4429  btr_cur_t node_cur;
4430  btr_cur_t right_node_cur;
4431  rec_t* rec;
4432  ulint right_page_no;
4433  ulint left_page_no;
4435  dtuple_t* node_ptr_tuple;
4436  bool ret = true;
4437  mtr_t mtr;
4438  mem_heap_t* heap = mem_heap_create(256);
4439  fseg_header_t* seg;
4440  ulint* offsets = NULL;
4441  ulint* offsets2= NULL;
4442 #ifdef UNIV_ZIP_DEBUG
4443  page_zip_des_t* page_zip;
4444 #endif /* UNIV_ZIP_DEBUG */
4445 
4446  mtr_start(&mtr);
4447 
4448  mtr_x_lock(dict_index_get_lock(index), &mtr);
4449 
4450  block = btr_root_block_get(index, RW_X_LATCH, &mtr);
4451  page = buf_block_get_frame(block);
4452  seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
4453 
4454  space = dict_index_get_space(index);
4455  zip_size = dict_table_zip_size(index->table);
4456 
4457  fil_space_get_latch(space, &space_flags);
4458 
4459  if (zip_size != dict_tf_get_zip_size(space_flags)) {
4460 
4461  ib_logf(IB_LOG_LEVEL_WARN,
4462  "Flags mismatch: table=%lu, tablespace=%lu",
4463  (ulint) index->table->flags, (ulint) space_flags);
4464 
4465  mtr_commit(&mtr);
4466 
4467  return(false);
4468  }
4469 
4470  while (level != btr_page_get_level(page, &mtr)) {
4471  const rec_t* node_ptr;
4472 
4473  if (fseg_page_is_free(seg,
4474  block->page.space, block->page.offset)) {
4475 
4476  btr_validate_report1(index, level, block);
4477 
4478  ib_logf(IB_LOG_LEVEL_WARN, "page is free");
4479 
4480  ret = false;
4481  }
4482 
4483  ut_a(space == buf_block_get_space(block));
4484  ut_a(space == page_get_space_id(page));
4485 #ifdef UNIV_ZIP_DEBUG
4486  page_zip = buf_block_get_page_zip(block);
4487  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
4488 #endif /* UNIV_ZIP_DEBUG */
4489  ut_a(!page_is_leaf(page));
4490 
4491  page_cur_set_before_first(block, &cursor);
4492  page_cur_move_to_next(&cursor);
4493 
4494  node_ptr = page_cur_get_rec(&cursor);
4495  offsets = rec_get_offsets(node_ptr, index, offsets,
4496  ULINT_UNDEFINED, &heap);
4497  block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
4498  page = buf_block_get_frame(block);
4499  }
4500 
4501  /* Now we are on the desired level. Loop through the pages on that
4502  level. */
4503 
4504  if (level == 0) {
4505  /* Leaf pages are managed in their own file segment. */
4506  seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
4507  }
4508 
4509 loop:
4510  mem_heap_empty(heap);
4511  offsets = offsets2 = NULL;
4512  mtr_x_lock(dict_index_get_lock(index), &mtr);
4513 
4514 #ifdef UNIV_ZIP_DEBUG
4515  page_zip = buf_block_get_page_zip(block);
4516  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
4517 #endif /* UNIV_ZIP_DEBUG */
4518 
4519  ut_a(block->page.space == space);
4520 
4521  if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
4522 
4523  btr_validate_report1(index, level, block);
4524 
4525  ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
4526  ret = false;
4527 
4528  } else if (btr_page_get_index_id(page) != index->id) {
4529 
4530  ib_logf(IB_LOG_LEVEL_ERROR,
4531  "Page index id " IB_ID_FMT " != data dictionary "
4532  "index id " IB_ID_FMT,
4533  btr_page_get_index_id(page), index->id);
4534 
4535  ret = false;
4536 
4537  } else if (!page_validate(page, index)) {
4538 
4539  btr_validate_report1(index, level, block);
4540  ret = false;
4541 
4542  } else if (level == 0 && !btr_index_page_validate(block, index)) {
4543 
4544  /* We are on level 0. Check that the records have the right
4545  number of fields, and field lengths are right. */
4546 
4547  ret = false;
4548  }
4549 
4550  ut_a(btr_page_get_level(page, &mtr) == level);
4551 
4552  right_page_no = btr_page_get_next(page, &mtr);
4553  left_page_no = btr_page_get_prev(page, &mtr);
4554 
4555  ut_a(!page_is_empty(page)
4556  || (level == 0
4557  && page_get_page_no(page) == dict_index_get_page(index)));
4558 
4559  if (right_page_no != FIL_NULL) {
4560  const rec_t* right_rec;
4561  right_block = btr_block_get(space, zip_size, right_page_no,
4562  RW_X_LATCH, index, &mtr);
4563  right_page = buf_block_get_frame(right_block);
4564  if (btr_page_get_prev(right_page, &mtr)
4565  != page_get_page_no(page)) {
4566 
4567  btr_validate_report2(index, level, block, right_block);
4568  fputs("InnoDB: broken FIL_PAGE_NEXT"
4569  " or FIL_PAGE_PREV links\n", stderr);
4571  buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
4572 
4573  ret = false;
4574  }
4575 
4576  if (page_is_comp(right_page) != page_is_comp(page)) {
4577  btr_validate_report2(index, level, block, right_block);
4578  fputs("InnoDB: 'compact' flag mismatch\n", stderr);
4580  buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
4581 
4582  ret = false;
4583 
4584  goto node_ptr_fails;
4585  }
4586 
4587  rec = page_rec_get_prev(page_get_supremum_rec(page));
4588  right_rec = page_rec_get_next(page_get_infimum_rec(
4589  right_page));
4590  offsets = rec_get_offsets(rec, index,
4591  offsets, ULINT_UNDEFINED, &heap);
4592  offsets2 = rec_get_offsets(right_rec, index,
4593  offsets2, ULINT_UNDEFINED, &heap);
4594  if (cmp_rec_rec(rec, right_rec, offsets, offsets2,
4595  index) >= 0) {
4596 
4597  btr_validate_report2(index, level, block, right_block);
4598 
4599  fputs("InnoDB: records in wrong order"
4600  " on adjacent pages\n", stderr);
4601 
4603  buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
4604 
4605  fputs("InnoDB: record ", stderr);
4606  rec = page_rec_get_prev(page_get_supremum_rec(page));
4607  rec_print(stderr, rec, index);
4608  putc('\n', stderr);
4609  fputs("InnoDB: record ", stderr);
4610  rec = page_rec_get_next(
4611  page_get_infimum_rec(right_page));
4612  rec_print(stderr, rec, index);
4613  putc('\n', stderr);
4614 
4615  ret = false;
4616  }
4617  }
4618 
4619  if (level > 0 && left_page_no == FIL_NULL) {
4620  ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
4621  page_rec_get_next(page_get_infimum_rec(page)),
4622  page_is_comp(page)));
4623  }
4624 
4625  if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
4626 
4627  /* Check father node pointers */
4628 
4629  rec_t* node_ptr;
4630 
4631  offsets = btr_page_get_father_block(offsets, heap, index,
4632  block, &mtr, &node_cur);
4633  father_page = btr_cur_get_page(&node_cur);
4634  node_ptr = btr_cur_get_rec(&node_cur);
4635 
4637  index, page_rec_get_prev(page_get_supremum_rec(page)),
4638  block, &node_cur);
4639  offsets = btr_page_get_father_node_ptr(offsets, heap,
4640  &node_cur, &mtr);
4641 
4642  if (node_ptr != btr_cur_get_rec(&node_cur)
4643  || btr_node_ptr_get_child_page_no(node_ptr, offsets)
4644  != buf_block_get_page_no(block)) {
4645 
4646  btr_validate_report1(index, level, block);
4647 
4648  fputs("InnoDB: node pointer to the page is wrong\n",
4649  stderr);
4650 
4651  buf_page_print(father_page, 0, BUF_PAGE_PRINT_NO_CRASH);
4653 
4654  fputs("InnoDB: node ptr ", stderr);
4655  rec_print(stderr, node_ptr, index);
4656 
4657  rec = btr_cur_get_rec(&node_cur);
4658  fprintf(stderr, "\n"
4659  "InnoDB: node ptr child page n:o %lu\n",
4661  rec, offsets));
4662 
4663  fputs("InnoDB: record on page ", stderr);
4664  rec_print_new(stderr, rec, offsets);
4665  putc('\n', stderr);
4666  ret = false;
4667 
4668  goto node_ptr_fails;
4669  }
4670 
4671  if (!page_is_leaf(page)) {
4672  node_ptr_tuple = dict_index_build_node_ptr(
4673  index,
4674  page_rec_get_next(page_get_infimum_rec(page)),
4675  0, heap, btr_page_get_level(page, &mtr));
4676 
4677  if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
4678  offsets)) {
4679  const rec_t* first_rec = page_rec_get_next(
4680  page_get_infimum_rec(page));
4681 
4682  btr_validate_report1(index, level, block);
4683 
4684  buf_page_print(father_page, 0,
4686  buf_page_print(page, 0,
4688 
4689  fputs("InnoDB: Error: node ptrs differ"
4690  " on levels > 0\n"
4691  "InnoDB: node ptr ", stderr);
4692  rec_print_new(stderr, node_ptr, offsets);
4693  fputs("InnoDB: first rec ", stderr);
4694  rec_print(stderr, first_rec, index);
4695  putc('\n', stderr);
4696  ret = false;
4697 
4698  goto node_ptr_fails;
4699  }
4700  }
4701 
4702  if (left_page_no == FIL_NULL) {
4703  ut_a(node_ptr == page_rec_get_next(
4704  page_get_infimum_rec(father_page)));
4705  ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
4706  }
4707 
4708  if (right_page_no == FIL_NULL) {
4709  ut_a(node_ptr == page_rec_get_prev(
4710  page_get_supremum_rec(father_page)));
4711  ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
4712  } else {
4713  const rec_t* right_node_ptr
4714  = page_rec_get_next(node_ptr);
4715 
4716  offsets = btr_page_get_father_block(
4717  offsets, heap, index, right_block,
4718  &mtr, &right_node_cur);
4719  if (right_node_ptr
4720  != page_get_supremum_rec(father_page)) {
4721 
4722  if (btr_cur_get_rec(&right_node_cur)
4723  != right_node_ptr) {
4724  ret = false;
4725  fputs("InnoDB: node pointer to"
4726  " the right page is wrong\n",
4727  stderr);
4728 
4729  btr_validate_report1(index, level,
4730  block);
4731 
4733  father_page, 0,
4736  page, 0,
4739  right_page, 0,
4741  }
4742  } else {
4743  page_t* right_father_page
4744  = btr_cur_get_page(&right_node_cur);
4745 
4746  if (btr_cur_get_rec(&right_node_cur)
4747  != page_rec_get_next(
4748  page_get_infimum_rec(
4749  right_father_page))) {
4750  ret = false;
4751  fputs("InnoDB: node pointer 2 to"
4752  " the right page is wrong\n",
4753  stderr);
4754 
4755  btr_validate_report1(index, level,
4756  block);
4757 
4759  father_page, 0,
4762  right_father_page, 0,
4765  page, 0,
4768  right_page, 0,
4770  }
4771 
4772  if (page_get_page_no(right_father_page)
4773  != btr_page_get_next(father_page, &mtr)) {
4774 
4775  ret = false;
4776  fputs("InnoDB: node pointer 3 to"
4777  " the right page is wrong\n",
4778  stderr);
4779 
4780  btr_validate_report1(index, level,
4781  block);
4782 
4784  father_page, 0,
4787  right_father_page, 0,
4790  page, 0,
4793  right_page, 0,
4795  }
4796  }
4797  }
4798  }
4799 
4800 node_ptr_fails:
4801  /* Commit the mini-transaction to release the latch on 'page'.
4802  Re-acquire the latch on right_page, which will become 'page'
4803  on the next loop. The page has already been checked. */
4804  mtr_commit(&mtr);
4805 
4806  if (trx_is_interrupted(trx)) {
4807  /* On interrupt, return the current status. */
4808  } else if (right_page_no != FIL_NULL) {
4809 
4810  mtr_start(&mtr);
4811 
4812  block = btr_block_get(
4813  space, zip_size, right_page_no,
4814  RW_X_LATCH, index, &mtr);
4815 
4816  page = buf_block_get_frame(block);
4817 
4818  goto loop;
4819  }
4820 
4821  mem_heap_free(heap);
4822 
4823  return(ret);
4824 }
4825 
4826 /**************************************************************/
4829 UNIV_INTERN
4830 bool
4832 /*===============*/
4833  dict_index_t* index,
4834  const trx_t* trx)
4835 {
4836  /* Full Text index are implemented by auxiliary tables,
4837  not the B-tree */
4838  if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
4839  return(true);
4840  }
4841 
4842  mtr_t mtr;
4843 
4844  mtr_start(&mtr);
4845 
4846  mtr_x_lock(dict_index_get_lock(index), &mtr);
4847 
4848  bool ok = true;
4849  page_t* root = btr_root_get(index, &mtr);
4850  ulint n = btr_page_get_level(root, &mtr);
4851 
4852  for (ulint i = 0; i <= n; ++i) {
4853 
4854  if (!btr_validate_level(index, trx, n - i)) {
4855  ok = false;
4856  break;
4857  }
4858  }
4859 
4860  mtr_commit(&mtr);
4861 
4862  return(ok);
4863 }
4864 
4865 /**************************************************************/
4869 UNIV_INTERN
4870 ibool
4872 /*====================*/
4873  btr_cur_t* cursor,
4874  ulint page_no,
4875  buf_block_t** merge_block,
4876  mtr_t* mtr)
4877 {
4879  page_t* page;
4880  ulint space;
4881  ulint zip_size;
4882  ulint n_recs;
4883  ulint data_size;
4884  ulint max_ins_size_reorg;
4885  ulint max_ins_size;
4886  buf_block_t* mblock;
4887  page_t* mpage;
4888  DBUG_ENTER("btr_can_merge_with_page");
4889 
4890  if (page_no == FIL_NULL) {
4891  goto error;
4892  }
4893 
4894  index = btr_cur_get_index(cursor);
4895  page = btr_cur_get_page(cursor);
4896  space = dict_index_get_space(index);
4897  zip_size = dict_table_zip_size(index->table);
4898 
4899  mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index,
4900  mtr);
4901  mpage = buf_block_get_frame(mblock);
4902 
4903  n_recs = page_get_n_recs(page);
4904  data_size = page_get_data_size(page);
4905 
4906  max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
4907  mpage, n_recs);
4908 
4909  if (data_size > max_ins_size_reorg) {
4910  goto error;
4911  }
4912 
4913  /* If compression padding tells us that merging will result in
4914  too packed up page i.e.: which is likely to cause compression
4915  failure then don't merge the pages. */
4916  if (zip_size && page_is_leaf(mpage)
4917  && (page_get_data_size(mpage) + data_size
4919 
4920  goto error;
4921  }
4922 
4923 
4924  max_ins_size = page_get_max_insert_size(mpage, n_recs);
4925 
4926  if (data_size > max_ins_size) {
4927 
4928  /* We have to reorganize mpage */
4929 
4930  if (!btr_page_reorganize_block(
4931  false, page_zip_level, mblock, index, mtr)) {
4932 
4933  goto error;
4934  }
4935 
4936  max_ins_size = page_get_max_insert_size(mpage, n_recs);
4937 
4938  ut_ad(page_validate(mpage, index));
4939  ut_ad(max_ins_size == max_ins_size_reorg);
4940 
4941  if (data_size > max_ins_size) {
4942 
4943  /* Add fault tolerance, though this should
4944  never happen */
4945 
4946  goto error;
4947  }
4948  }
4949 
4950  *merge_block = mblock;
4951  DBUG_RETURN(TRUE);
4952 
4953 error:
4954  *merge_block = NULL;
4955  DBUG_RETURN(FALSE);
4956 }
4957 
4958 #endif /* !UNIV_HOTBACKUP */