MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ibuf0ibuf.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "ibuf0ibuf.h"
27 
28 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
29 UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
30 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
31 
33 #define IBUF_BITS_PER_PAGE 4
34 #if IBUF_BITS_PER_PAGE % 2
35 # error "IBUF_BITS_PER_PAGE must be an even number!"
36 #endif
37 
38 #define IBUF_BITMAP PAGE_DATA
39 
40 #ifdef UNIV_NONINL
41 #include "ibuf0ibuf.ic"
42 #endif
43 
44 #ifndef UNIV_HOTBACKUP
45 
46 #include "buf0buf.h"
47 #include "buf0rea.h"
48 #include "fsp0fsp.h"
49 #include "trx0sys.h"
50 #include "fil0fil.h"
51 #include "rem0rec.h"
52 #include "btr0cur.h"
53 #include "btr0pcur.h"
54 #include "btr0btr.h"
55 #include "row0upd.h"
56 #include "sync0sync.h"
57 #include "dict0boot.h"
58 #include "fut0lst.h"
59 #include "lock0lock.h"
60 #include "log0recv.h"
61 #include "que0que.h"
62 #include "srv0start.h" /* srv_shutdown_state */
63 #include "ha_prototypes.h"
64 #include "rem0cmp.h"
65 
66 /* STRUCTURE OF AN INSERT BUFFER RECORD
67 
68 In versions < 4.1.x:
69 
70 1. The first field is the page number.
71 2. The second field is an array which stores type info for each subsequent
72  field. We store the information which affects the ordering of records, and
73  also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
74  is 10 bytes.
75 3. Next we have the fields of the actual index record.
76 
77 In versions >= 4.1.x:
78 
79 Note that contary to what we planned in the 1990's, there will only be one
80 insert buffer tree, and that is in the system tablespace of InnoDB.
81 
82 1. The first field is the space id.
83 2. The second field is a one-byte marker (0) which differentiates records from
84  the < 4.1.x storage format.
85 3. The third field is the page number.
86 4. The fourth field contains the type info, where we have also added 2 bytes to
87  store the charset. In the compressed table format of 5.0.x we must add more
88  information here so that we can build a dummy 'index' struct which 5.0.x
89  can use in the binary search on the index page in the ibuf merge phase.
90 5. The rest of the fields contain the fields of the actual index record.
91 
92 In versions >= 5.0.3:
93 
94 The first byte of the fourth field is an additional marker (0) if the record
95 is in the compact format. The presence of this marker can be detected by
96 looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
97 
98 The high-order bit of the character set field in the type info is the
99 "nullable" flag for the field.
100 
101 In versions >= 5.5:
102 
103 The optional marker byte at the start of the fourth field is replaced by
104 mandatory 3 fields, totaling 4 bytes:
105 
106  1. 2 bytes: Counter field, used to sort records within a (space id, page
107  no) in the order they were added. This is needed so that for example the
108  sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
109  correctly.
110 
111  2. 1 byte: Operation type (see ibuf_op_t).
112 
113  3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
114 
115 To ensure older records, which do not have counters to enforce correct
116 sorting, are merged before any new records, ibuf_insert checks if we're
117 trying to insert to a position that contains old-style records, and if so,
118 refuses the insert. Thus, ibuf pages are gradually converted to the new
119 format as their corresponding buffer pool pages are read into memory.
120 */
121 
122 
123 /* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
124 
125 If an OS thread performs any operation that brings in disk pages from
126 non-system tablespaces into the buffer pool, or creates such a page there,
127 then the operation may have as a side effect an insert buffer index tree
128 compression. Thus, the tree latch of the insert buffer tree may be acquired
129 in the x-mode, and also the file space latch of the system tablespace may
130 be acquired in the x-mode.
131 
132 Also, an insert to an index in a non-system tablespace can have the same
133 effect. How do we know this cannot lead to a deadlock of OS threads? There
134 is a problem with the i\o-handler threads: they break the latching order
135 because they own x-latches to pages which are on a lower level than the
136 insert buffer tree latch, its page latches, and the tablespace latch an
137 insert buffer operation can reserve.
138 
139 The solution is the following: Let all the tree and page latches connected
140 with the insert buffer be later in the latching order than the fsp latch and
141 fsp page latches.
142 
143 Insert buffer pages must be such that the insert buffer is never invoked
144 when these pages are accessed as this would result in a recursion violating
145 the latching order. We let a special i/o-handler thread take care of i/o to
146 the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
147 pages and the first inode page, which contains the inode of the ibuf tree: let
148 us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
149 access both non-ibuf and ibuf pages.
150 
151 Then an i/o-handler for the insert buffer never needs to access recursively the
152 insert buffer tree and thus obeys the latching order. On the other hand, other
153 i/o-handlers for other tablespaces may require access to the insert buffer,
154 but because all kinds of latches they need to access there are later in the
155 latching order, no violation of the latching order occurs in this case,
156 either.
157 
158 A problem is how to grow and contract an insert buffer tree. As it is later
159 in the latching order than the fsp management, we have to reserve the fsp
160 latch first, before adding or removing pages from the insert buffer tree.
161 We let the insert buffer tree have its own file space management: a free
162 list of pages linked to the tree root. To prevent recursive using of the
163 insert buffer when adding pages to the tree, we must first load these pages
164 to memory, obtaining a latch on them, and only after that add them to the
165 free list of the insert buffer tree. More difficult is removing of pages
166 from the free list. If there is an excess of pages in the free list of the
167 ibuf tree, they might be needed if some thread reserves the fsp latch,
168 intending to allocate more file space. So we do the following: if a thread
169 reserves the fsp latch, we check the writer count field of the latch. If
170 this field has value 1, it means that the thread did not own the latch
171 before entering the fsp system, and the mtr of the thread contains no
172 modifications to the fsp pages. Now we are free to reserve the ibuf latch,
173 and check if there is an excess of pages in the free list. We can then, in a
174 separate mini-transaction, take them out of the free list and free them to
175 the fsp system.
176 
177 To avoid deadlocks in the ibuf system, we divide file pages into three levels:
178 
179 (1) non-ibuf pages,
180 (2) ibuf tree pages and the pages in the ibuf tree free list, and
181 (3) ibuf bitmap pages.
182 
183 No OS thread is allowed to access higher level pages if it has latches to
184 lower level pages; even if the thread owns a B-tree latch it must not access
185 the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
186 is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
187 exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
188 level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
189 it uses synchronous aio, it can access any pages, as long as it obeys the
190 access order rules. */
191 
193 #define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
194 
196 UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL;
197 
198 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
199 
200 UNIV_INTERN uint ibuf_debug;
201 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
202 
204 UNIV_INTERN ibuf_t* ibuf = NULL;
205 
206 #ifdef UNIV_PFS_MUTEX
207 UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
208 UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key;
209 UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key;
210 #endif /* UNIV_PFS_MUTEX */
211 
212 #ifdef UNIV_IBUF_COUNT_DEBUG
213 
214 #define IBUF_COUNT_N_SPACES 4
215 
216 #define IBUF_COUNT_N_PAGES 130000
217 
219 static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
220 
221 /******************************************************************/
223 UNIV_INLINE
224 void
225 ibuf_count_check(
226 /*=============*/
227  ulint space_id,
228  ulint page_no)
229 {
230  if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
231  return;
232  }
233 
234  fprintf(stderr,
235  "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
236  "InnoDB: and breaks crash recovery.\n"
237  "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
238  "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
239  (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
240  (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
241  ut_error;
242 }
243 #endif
244 
246 /* @{ */
247 #define IBUF_BITMAP_FREE 0
249 #define IBUF_BITMAP_BUFFERED 2
251 #define IBUF_BITMAP_IBUF 3
255 /* @} */
256 
257 #define IBUF_REC_FIELD_SPACE 0
259 #define IBUF_REC_FIELD_MARKER 1
261 #define IBUF_REC_FIELD_PAGE 2
263 #define IBUF_REC_FIELD_METADATA 3 /* the metadata field */
264 #define IBUF_REC_FIELD_USER 4 /* first user field */
265 
266 /* Various constants for checking the type of an ibuf record and extracting
267 data from it. For details, see the description of the record format at the
268 top of this file. */
269 
273 /* @{ */
274 #define IBUF_REC_INFO_SIZE 4
276 #if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
277 # error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
278 #endif
279 
280 /* Offsets for the fields at the beginning of the fourth field */
281 #define IBUF_REC_OFFSET_COUNTER 0
282 #define IBUF_REC_OFFSET_TYPE 2
283 #define IBUF_REC_OFFSET_FLAGS 3
285 /* Record flag masks */
286 #define IBUF_REC_COMPACT 0x1
293 static ib_mutex_t ibuf_pessimistic_insert_mutex;
294 
296 static ib_mutex_t ibuf_mutex;
297 
299 static ib_mutex_t ibuf_bitmap_mutex;
300 
302 #define IBUF_MERGE_AREA 8UL
303 
307 #define IBUF_MERGE_THRESHOLD 4
308 
311 #define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
312 
316 #define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
317 
321 #define IBUF_CONTRACT_ON_INSERT_SYNC 5
322 
326 #define IBUF_CONTRACT_DO_NOT_INSERT 10
327 
328 /* TODO: how to cope with drop table if there are records in the insert
329 buffer for the indexes of the table? Is there actually any problem,
330 because ibuf merge is done to a page when it is read in, and it is
331 still physically like the index page even if the index would have been
332 dropped! So, there seems to be no problem. */
333 
334 /******************************************************************/
337 UNIV_INLINE
338 void
340 /*=======*/
341  mtr_t* mtr)
342 {
343  ut_ad(!mtr->inside_ibuf);
344  mtr->inside_ibuf = TRUE;
345 }
346 
347 /******************************************************************/
350 UNIV_INLINE
351 void
353 /*======*/
354  mtr_t* mtr)
355 {
356  ut_ad(mtr->inside_ibuf);
357  mtr->inside_ibuf = FALSE;
358 }
359 
360 /**************************************************************/
363 UNIV_INLINE
364 void
366 /*=============================*/
367  btr_pcur_t* pcur,
368  mtr_t* mtr)
369 {
370  ut_d(ibuf_exit(mtr));
371  btr_pcur_commit_specify_mtr(pcur, mtr);
372 }
373 
374 /******************************************************************/
377 static
378 page_t*
379 ibuf_header_page_get(
380 /*=================*/
381  mtr_t* mtr)
382 {
384 
385  ut_ad(!ibuf_inside(mtr));
386 
387  block = buf_page_get(
388  IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
389  buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
390 
391  return(buf_block_get_frame(block));
392 }
393 
394 /******************************************************************/
397 static
398 page_t*
399 ibuf_tree_root_get(
400 /*===============*/
401  mtr_t* mtr)
402 {
404  page_t* root;
405 
406  ut_ad(ibuf_inside(mtr));
407  ut_ad(mutex_own(&ibuf_mutex));
408 
409  mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
410 
411  block = buf_page_get(
412  IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
413 
414  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
415 
416  root = buf_block_get_frame(block);
417 
418  ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
419  ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
420  ut_ad(ibuf->empty == page_is_empty(root));
421 
422  return(root);
423 }
424 
425 #ifdef UNIV_IBUF_COUNT_DEBUG
426 /******************************************************************/
430 UNIV_INTERN
431 ulint
432 ibuf_count_get(
433 /*===========*/
434  ulint space,
435  ulint page_no)
436 {
437  ibuf_count_check(space, page_no);
438 
439  return(ibuf_counts[space][page_no]);
440 }
441 
442 /******************************************************************/
444 static
445 void
446 ibuf_count_set(
447 /*===========*/
448  ulint space,
449  ulint page_no,
450  ulint val)
451 {
452  ibuf_count_check(space, page_no);
453  ut_a(val < UNIV_PAGE_SIZE);
454 
455  ibuf_counts[space][page_no] = val;
456 }
457 #endif
458 
459 /******************************************************************/
461 UNIV_INTERN
462 void
464 /*============*/
465 {
466  mutex_free(&ibuf_pessimistic_insert_mutex);
467  memset(&ibuf_pessimistic_insert_mutex,
468  0x0, sizeof(ibuf_pessimistic_insert_mutex));
469 
470  mutex_free(&ibuf_mutex);
471  memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
472 
473  mutex_free(&ibuf_bitmap_mutex);
474  memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
475 
476  mem_free(ibuf);
477  ibuf = NULL;
478 }
479 
480 /******************************************************************/
483 static
484 void
485 ibuf_size_update(
486 /*=============*/
487  const page_t* root,
488  mtr_t* mtr)
489 {
490  ut_ad(mutex_own(&ibuf_mutex));
491 
492  ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
493  + PAGE_BTR_IBUF_FREE_LIST, mtr);
494 
495  ibuf->height = 1 + btr_page_get_level(root, mtr);
496 
497  /* the '1 +' is the ibuf header page */
498  ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
499 }
500 
501 /******************************************************************/
504 UNIV_INTERN
505 void
507 /*=======================*/
508 {
509  page_t* root;
510  mtr_t mtr;
512  mem_heap_t* heap;
514  ulint n_used;
515  page_t* header_page;
516  dberr_t error;
517 
518  ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
519 
520  /* At startup we intialize ibuf to have a maximum of
521  CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
522  buffer pool size. Once ibuf struct is initialized this
523  value is updated with the user supplied size by calling
524  ibuf_max_size_update(). */
525  ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
527 
528  mutex_create(ibuf_pessimistic_insert_mutex_key,
529  &ibuf_pessimistic_insert_mutex,
530  SYNC_IBUF_PESS_INSERT_MUTEX);
531 
532  mutex_create(ibuf_mutex_key,
533  &ibuf_mutex, SYNC_IBUF_MUTEX);
534 
535  mutex_create(ibuf_bitmap_mutex_key,
536  &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
537 
538  mtr_start(&mtr);
539 
540  mutex_enter(&ibuf_mutex);
541 
542  mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
543 
544  header_page = ibuf_header_page_get(&mtr);
545 
546  fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
547  &n_used, &mtr);
548  ibuf_enter(&mtr);
549 
550  ut_ad(n_used >= 2);
551 
552  ibuf->seg_size = n_used;
553 
554  {
556 
557  block = buf_page_get(
558  IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
559  RW_X_LATCH, &mtr);
560  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
561 
562  root = buf_block_get_frame(block);
563  }
564 
565  ibuf_size_update(root, &mtr);
566  mutex_exit(&ibuf_mutex);
567 
568  ibuf->empty = page_is_empty(root);
569  ibuf_mtr_commit(&mtr);
570 
571  heap = mem_heap_create(450);
572 
573  /* Use old-style record format for the insert buffer. */
574  table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0);
575 
576  dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
577 
578  table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
579 
580  dict_table_add_to_cache(table, FALSE, heap);
581  mem_heap_free(heap);
582 
583  index = dict_mem_index_create(
584  IBUF_TABLE_NAME, "CLUST_IND",
585  IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
586 
587  dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
588 
589  index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
590 
591  error = dict_index_add_to_cache(table, index,
592  FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
593  ut_a(error == DB_SUCCESS);
594 
595  ibuf->index = dict_table_get_first_index(table);
596 }
597 
598 /*********************************************************************/
600 UNIV_INTERN
601 void
603 /*=================*/
604  ulint new_val)
606 {
607  ulint new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
608  * new_val) / 100;
609  mutex_enter(&ibuf_mutex);
610  ibuf->max_size = new_size;
611  mutex_exit(&ibuf_mutex);
612 }
613 
614 
615 #endif /* !UNIV_HOTBACKUP */
616 /*********************************************************************/
618 UNIV_INTERN
619 void
621 /*==================*/
622  buf_block_t* block,
623  mtr_t* mtr)
624 {
625  page_t* page;
626  ulint byte_offset;
627  ulint zip_size = buf_block_get_zip_size(block);
628 
629  ut_a(ut_is_2pow(zip_size));
630 
631  page = buf_block_get_frame(block);
633 
634  /* Write all zeros to the bitmap */
635 
636  if (!zip_size) {
637  byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
639  } else {
640  byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
641  }
642 
643  memset(page + IBUF_BITMAP, 0, byte_offset);
644 
645  /* The remaining area (up to the page trailer) is uninitialized. */
646 
647 #ifndef UNIV_HOTBACKUP
649 #endif /* !UNIV_HOTBACKUP */
650 }
651 
652 /*********************************************************************/
655 UNIV_INTERN
656 byte*
658 /*===================*/
659  byte* ptr,
660  byte* end_ptr __attribute__((unused)),
661  buf_block_t* block,
662  mtr_t* mtr)
663 {
664  ut_ad(ptr && end_ptr);
665 
666  if (block) {
667  ibuf_bitmap_page_init(block, mtr);
668  }
669 
670  return(ptr);
671 }
672 #ifndef UNIV_HOTBACKUP
673 # ifdef UNIV_DEBUG
674 
681 # define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
682  ibuf_bitmap_page_get_bits_low(page, offset, zs, \
683  MTR_MEMO_PAGE_X_FIX, mtr, bit)
684 # else /* UNIV_DEBUG */
685 
692 # define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
693  ibuf_bitmap_page_get_bits_low(page, offset, zs, bit)
694 # endif /* UNIV_DEBUG */
695 
696 /********************************************************************/
699 UNIV_INLINE
700 ulint
702 /*==========================*/
703  const page_t* page,
704  ulint page_no,
705  ulint zip_size,
707 #ifdef UNIV_DEBUG
708  ulint latch_type,
711  mtr_t* mtr,
713 #endif /* UNIV_DEBUG */
714  ulint bit)
716 {
717  ulint byte_offset;
718  ulint bit_offset;
719  ulint map_byte;
720  ulint value;
721 
722  ut_ad(bit < IBUF_BITS_PER_PAGE);
723 #if IBUF_BITS_PER_PAGE % 2
724 # error "IBUF_BITS_PER_PAGE % 2 != 0"
725 #endif
726  ut_ad(ut_is_2pow(zip_size));
727  ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
728 
729  if (!zip_size) {
730  bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
731  + bit;
732  } else {
733  bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
734  + bit;
735  }
736 
737  byte_offset = bit_offset / 8;
738  bit_offset = bit_offset % 8;
739 
740  ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
741 
742  map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
743 
744  value = ut_bit_get_nth(map_byte, bit_offset);
745 
746  if (bit == IBUF_BITMAP_FREE) {
747  ut_ad(bit_offset + 1 < 8);
748 
749  value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
750  }
751 
752  return(value);
753 }
754 
755 /********************************************************************/
757 static
758 void
759 ibuf_bitmap_page_set_bits(
760 /*======================*/
761  page_t* page,
762  ulint page_no,
763  ulint zip_size,
765  ulint bit,
766  ulint val,
767  mtr_t* mtr)
768 {
769  ulint byte_offset;
770  ulint bit_offset;
771  ulint map_byte;
772 
773  ut_ad(bit < IBUF_BITS_PER_PAGE);
774 #if IBUF_BITS_PER_PAGE % 2
775 # error "IBUF_BITS_PER_PAGE % 2 != 0"
776 #endif
777  ut_ad(ut_is_2pow(zip_size));
778  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
779 #ifdef UNIV_IBUF_COUNT_DEBUG
780  ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
781  || (0 == ibuf_count_get(page_get_space_id(page),
782  page_no)));
783 #endif
784  if (!zip_size) {
785  bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
786  + bit;
787  } else {
788  bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
789  + bit;
790  }
791 
792  byte_offset = bit_offset / 8;
793  bit_offset = bit_offset % 8;
794 
795  ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
796 
797  map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
798 
799  if (bit == IBUF_BITMAP_FREE) {
800  ut_ad(bit_offset + 1 < 8);
801  ut_ad(val <= 3);
802 
803  map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
804  map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
805  } else {
806  ut_ad(val <= 1);
807  map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
808  }
809 
810  mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
811  MLOG_1BYTE, mtr);
812 }
813 
814 /********************************************************************/
817 UNIV_INLINE
818 ulint
820 /*=====================*/
821  ulint zip_size,
823  ulint page_no)
824 {
825  ut_ad(ut_is_2pow(zip_size));
826 
827  if (!zip_size) {
828  return(FSP_IBUF_BITMAP_OFFSET
829  + (page_no & ~(UNIV_PAGE_SIZE - 1)));
830  } else {
831  return(FSP_IBUF_BITMAP_OFFSET
832  + (page_no & ~(zip_size - 1)));
833  }
834 }
835 
836 /********************************************************************/
842 static
843 page_t*
844 ibuf_bitmap_get_map_page_func(
845 /*==========================*/
846  ulint space,
847  ulint page_no,
848  ulint zip_size,
850  const char* file,
851  ulint line,
852  mtr_t* mtr)
853 {
855 
856  block = buf_page_get_gen(space, zip_size,
857  ibuf_bitmap_page_no_calc(zip_size, page_no),
858  RW_X_LATCH, NULL, BUF_GET,
859  file, line, mtr);
860  buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
861 
862  return(buf_block_get_frame(block));
863 }
864 
865 /********************************************************************/
875 #define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
876  ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
877  __FILE__, __LINE__, mtr)
878 
879 /************************************************************************/
884 UNIV_INLINE
885 void
887 /*===================*/
888  ulint zip_size,
890  const buf_block_t* block,
893  ulint val,
894  mtr_t* mtr)
895 {
896  page_t* bitmap_page;
897  ulint space;
898  ulint page_no;
899 
900  if (!page_is_leaf(buf_block_get_frame(block))) {
901 
902  return;
903  }
904 
905  space = buf_block_get_space(block);
906  page_no = buf_block_get_page_no(block);
907  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
908 #ifdef UNIV_IBUF_DEBUG
909 # if 0
910  fprintf(stderr,
911  "Setting space %lu page %lu free bits to %lu should be %lu\n",
912  space, page_no, val,
913  ibuf_index_page_calc_free(zip_size, block));
914 # endif
915 
916  ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
917 #endif /* UNIV_IBUF_DEBUG */
918  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
919  IBUF_BITMAP_FREE, val, mtr);
920 }
921 
922 /************************************************************************/
927 UNIV_INTERN
928 void
930 /*====================*/
931  buf_block_t* block,
933 #ifdef UNIV_IBUF_DEBUG
934  ulint max_val,
937 #endif /* UNIV_IBUF_DEBUG */
938  ulint val)
939 {
940  mtr_t mtr;
941  page_t* page;
942  page_t* bitmap_page;
943  ulint space;
944  ulint page_no;
945  ulint zip_size;
946 
947  page = buf_block_get_frame(block);
948 
949  if (!page_is_leaf(page)) {
950 
951  return;
952  }
953 
954  mtr_start(&mtr);
955 
956  space = buf_block_get_space(block);
957  page_no = buf_block_get_page_no(block);
958  zip_size = buf_block_get_zip_size(block);
959  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
960 
961 #ifdef UNIV_IBUF_DEBUG
962  if (max_val != ULINT_UNDEFINED) {
963  ulint old_val;
964 
965  old_val = ibuf_bitmap_page_get_bits(
966  bitmap_page, page_no, zip_size,
967  IBUF_BITMAP_FREE, &mtr);
968 # if 0
969  if (old_val != max_val) {
970  fprintf(stderr,
971  "Ibuf: page %lu old val %lu max val %lu\n",
972  page_get_page_no(page),
973  old_val, max_val);
974  }
975 # endif
976 
977  ut_a(old_val <= max_val);
978  }
979 # if 0
980  fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
981  page_get_page_no(page), val,
982  ibuf_index_page_calc_free(zip_size, block));
983 # endif
984 
985  ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
986 #endif /* UNIV_IBUF_DEBUG */
987  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
988  IBUF_BITMAP_FREE, val, &mtr);
989  mtr_commit(&mtr);
990 }
991 
992 /************************************************************************/
1001 UNIV_INTERN
1002 void
1004 /*=================*/
1005  buf_block_t* block)
1008 {
1009  ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
1010 }
1011 
1012 /**********************************************************************/
1020 UNIV_INTERN
1021 void
1023 /*======================*/
1024  const buf_block_t* block,
1025  ulint max_ins_size,
1030  mtr_t* mtr)
1031 {
1032  ulint before;
1033  ulint after;
1034 
1035  ut_a(!buf_block_get_page_zip(block));
1036 
1037  before = ibuf_index_page_calc_free_bits(0, max_ins_size);
1038 
1039  after = ibuf_index_page_calc_free(0, block);
1040 
1041  /* This approach cannot be used on compressed pages, since the
1042  computed value of "before" often does not match the current
1043  state of the bitmap. This is because the free space may
1044  increase or decrease when a compressed page is reorganized. */
1045  if (before != after) {
1046  ibuf_set_free_bits_low(0, block, after, mtr);
1047  }
1048 }
1049 
1050 /**********************************************************************/
1058 UNIV_INTERN
1059 void
1061 /*======================*/
1062  buf_block_t* block,
1063  mtr_t* mtr)
1064 {
1065  page_t* bitmap_page;
1066  ulint space;
1067  ulint page_no;
1068  ulint zip_size;
1069  ulint after;
1070 
1071  space = buf_block_get_space(block);
1072  page_no = buf_block_get_page_no(block);
1073  zip_size = buf_block_get_zip_size(block);
1074 
1075  ut_a(page_is_leaf(buf_block_get_frame(block)));
1076  ut_a(zip_size);
1077 
1078  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
1079 
1080  after = ibuf_index_page_calc_free_zip(zip_size, block);
1081 
1082  if (after == 0) {
1083  /* We move the page to the front of the buffer pool LRU list:
1084  the purpose of this is to prevent those pages to which we
1085  cannot make inserts using the insert buffer from slipping
1086  out of the buffer pool */
1087 
1088  buf_page_make_young(&block->page);
1089  }
1090 
1091  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
1092  IBUF_BITMAP_FREE, after, mtr);
1093 }
1094 
1095 /**********************************************************************/
1102 UNIV_INTERN
1103 void
1105 /*====================================*/
1106  ulint zip_size,
1108  buf_block_t* block1,
1109  buf_block_t* block2,
1110  mtr_t* mtr)
1111 {
1112  ulint state;
1113 
1114  /* As we have to x-latch two random bitmap pages, we have to acquire
1115  the bitmap mutex to prevent a deadlock with a similar operation
1116  performed by another OS thread. */
1117 
1118  mutex_enter(&ibuf_bitmap_mutex);
1119 
1120  state = ibuf_index_page_calc_free(zip_size, block1);
1121 
1122  ibuf_set_free_bits_low(zip_size, block1, state, mtr);
1123 
1124  state = ibuf_index_page_calc_free(zip_size, block2);
1125 
1126  ibuf_set_free_bits_low(zip_size, block2, state, mtr);
1127 
1128  mutex_exit(&ibuf_bitmap_mutex);
1129 }
1130 
1131 /**********************************************************************/
1134 UNIV_INLINE
1135 ibool
1137 /*=================*/
1138  ulint space,
1139  ulint zip_size,
1141  ulint page_no)
1142 {
1143  return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
1144  || ibuf_bitmap_page(zip_size, page_no));
1145 }
1146 
1147 /***********************************************************************/
1151 UNIV_INTERN
1152 ibool
1154 /*==========*/
1155  ulint space,
1156  ulint zip_size,
1157  ulint page_no,
1158 #ifdef UNIV_DEBUG
1159  ibool x_latch,
1161 #endif /* UNIV_DEBUG */
1162  const char* file,
1163  ulint line,
1164  mtr_t* mtr)
1169 {
1170  ibool ret;
1171  mtr_t local_mtr;
1172  page_t* bitmap_page;
1173 
1175  ut_ad(x_latch || mtr == NULL);
1176 
1177  if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
1178 
1179  return(TRUE);
1180  } else if (space != IBUF_SPACE_ID) {
1181 
1182  return(FALSE);
1183  }
1184 
1185  ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
1186 
1187 #ifdef UNIV_DEBUG
1188  if (!x_latch) {
1189  mtr_start(&local_mtr);
1190 
1191  /* Get the bitmap page without a page latch, so that
1192  we will not be violating the latching order when
1193  another bitmap page has already been latched by this
1194  thread. The page will be buffer-fixed, and thus it
1195  cannot be removed or relocated while we are looking at
1196  it. The contents of the page could change, but the
1197  IBUF_BITMAP_IBUF bit that we are interested in should
1198  not be modified by any other thread. Nobody should be
1199  calling ibuf_add_free_page() or ibuf_remove_free_page()
1200  while the page is linked to the insert buffer b-tree. */
1201 
1202  bitmap_page = buf_block_get_frame(
1204  space, zip_size,
1205  ibuf_bitmap_page_no_calc(zip_size, page_no),
1206  RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
1207  file, line, &local_mtr));
1208 
1210  bitmap_page, page_no, zip_size,
1211  MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
1212 
1213  mtr_commit(&local_mtr);
1214  return(ret);
1215  }
1216 #endif /* UNIV_DEBUG */
1217 
1218  if (mtr == NULL) {
1219  mtr = &local_mtr;
1220  mtr_start(mtr);
1221  }
1222 
1223  bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size,
1224  file, line, mtr);
1225 
1226  ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
1227  IBUF_BITMAP_IBUF, mtr);
1228 
1229  if (mtr == &local_mtr) {
1230  mtr_commit(mtr);
1231  }
1232 
1233  return(ret);
1234 }
1235 
1236 #ifdef UNIV_DEBUG
1237 # define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(mtr,rec)
1238 #else /* UNIV_DEBUG */
1239 # define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(rec)
1240 #endif /* UNIV_DEBUG */
1241 
1242 /********************************************************************/
1245 static
1246 ulint
1247 ibuf_rec_get_page_no_func(
1248 /*======================*/
1249 #ifdef UNIV_DEBUG
1250  mtr_t* mtr,
1251 #endif /* UNIV_DEBUG */
1252  const rec_t* rec)
1253 {
1254  const byte* field;
1255  ulint len;
1256 
1257  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
1258  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
1259  ut_ad(ibuf_inside(mtr));
1260  ut_ad(rec_get_n_fields_old(rec) > 2);
1261 
1262  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
1263 
1264  ut_a(len == 1);
1265 
1266  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
1267 
1268  ut_a(len == 4);
1269 
1270  return(mach_read_from_4(field));
1271 }
1272 
1273 #ifdef UNIV_DEBUG
1274 # define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(mtr,rec)
1275 #else /* UNIV_DEBUG */
1276 # define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(rec)
1277 #endif /* UNIV_DEBUG */
1278 
1279 /********************************************************************/
1283 static
1284 ulint
1285 ibuf_rec_get_space_func(
1286 /*====================*/
1287 #ifdef UNIV_DEBUG
1288  mtr_t* mtr,
1289 #endif /* UNIV_DEBUG */
1290  const rec_t* rec)
1291 {
1292  const byte* field;
1293  ulint len;
1294 
1295  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
1296  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
1297  ut_ad(ibuf_inside(mtr));
1298  ut_ad(rec_get_n_fields_old(rec) > 2);
1299 
1300  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
1301 
1302  ut_a(len == 1);
1303 
1304  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
1305 
1306  ut_a(len == 4);
1307 
1308  return(mach_read_from_4(field));
1309 }
1310 
1311 #ifdef UNIV_DEBUG
1312 # define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
1313  ibuf_rec_get_info_func(mtr,rec,op,comp,info_len,counter)
1314 #else /* UNIV_DEBUG */
1315 # define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
1316  ibuf_rec_get_info_func(rec,op,comp,info_len,counter)
1317 #endif
1318 /****************************************************************/
1320 static
1321 void
1322 ibuf_rec_get_info_func(
1323 /*===================*/
1324 #ifdef UNIV_DEBUG
1325  mtr_t* mtr,
1326 #endif /* UNIV_DEBUG */
1327  const rec_t* rec,
1328  ibuf_op_t* op,
1329  ibool* comp,
1330  ulint* info_len,
1333  ulint* counter)
1334 {
1335  const byte* types;
1336  ulint fields;
1337  ulint len;
1338 
1339  /* Local variables to shadow arguments. */
1340  ibuf_op_t op_local;
1341  ibool comp_local;
1342  ulint info_len_local;
1343  ulint counter_local;
1344 
1345  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
1346  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
1347  ut_ad(ibuf_inside(mtr));
1348  fields = rec_get_n_fields_old(rec);
1349  ut_a(fields > IBUF_REC_FIELD_USER);
1350 
1351  types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
1352 
1353  info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1354 
1355  switch (info_len_local) {
1356  case 0:
1357  case 1:
1358  op_local = IBUF_OP_INSERT;
1359  comp_local = info_len_local;
1360  ut_ad(!counter);
1361  counter_local = ULINT_UNDEFINED;
1362  break;
1363 
1364  case IBUF_REC_INFO_SIZE:
1365  op_local = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
1366  comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
1367  counter_local = mach_read_from_2(
1368  types + IBUF_REC_OFFSET_COUNTER);
1369  break;
1370 
1371  default:
1372  ut_error;
1373  }
1374 
1375  ut_a(op_local < IBUF_OP_COUNT);
1376  ut_a((len - info_len_local) ==
1377  (fields - IBUF_REC_FIELD_USER)
1378  * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1379 
1380  if (op) {
1381  *op = op_local;
1382  }
1383 
1384  if (comp) {
1385  *comp = comp_local;
1386  }
1387 
1388  if (info_len) {
1389  *info_len = info_len_local;
1390  }
1391 
1392  if (counter) {
1393  *counter = counter_local;
1394  }
1395 }
1396 
1397 #ifdef UNIV_DEBUG
1398 # define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(mtr,rec)
1399 #else /* UNIV_DEBUG */
1400 # define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(rec)
1401 #endif
1402 
1403 /****************************************************************/
1406 static
1407 ibuf_op_t
1408 ibuf_rec_get_op_type_func(
1409 /*======================*/
1410 #ifdef UNIV_DEBUG
1411  mtr_t* mtr,
1412 #endif /* UNIV_DEBUG */
1413  const rec_t* rec)
1414 {
1415  ulint len;
1416 
1417  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
1418  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
1419  ut_ad(ibuf_inside(mtr));
1420  ut_ad(rec_get_n_fields_old(rec) > 2);
1421 
1422  (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
1423 
1424  if (len > 1) {
1425  /* This is a < 4.1.x format record */
1426 
1427  return(IBUF_OP_INSERT);
1428  } else {
1429  ibuf_op_t op;
1430 
1431  ibuf_rec_get_info(mtr, rec, &op, NULL, NULL, NULL);
1432 
1433  return(op);
1434  }
1435 }
1436 
1437 /****************************************************************/
1442 UNIV_INTERN
1443 ulint
1445 /*=================*/
1446  const rec_t* rec)
1447 {
1448  const byte* ptr;
1449  ulint len;
1450 
1451  if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) {
1452 
1453  return(ULINT_UNDEFINED);
1454  }
1455 
1456  ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
1457 
1458  if (len >= 2) {
1459 
1460  return(mach_read_from_2(ptr));
1461  } else {
1462 
1463  return(ULINT_UNDEFINED);
1464  }
1465 }
1466 
1467 /****************************************************************/
1470 static
1471 void
1472 ibuf_add_ops(
1473 /*=========*/
1474  ulint* arr,
1475  const ulint* ops)
1477 {
1478  ulint i;
1479 
1480 #ifndef HAVE_ATOMIC_BUILTINS
1481  ut_ad(mutex_own(&ibuf_mutex));
1482 #endif /* !HAVE_ATOMIC_BUILTINS */
1483 
1484  for (i = 0; i < IBUF_OP_COUNT; i++) {
1485 #ifdef HAVE_ATOMIC_BUILTINS
1486  os_atomic_increment_ulint(&arr[i], ops[i]);
1487 #else /* HAVE_ATOMIC_BUILTINS */
1488  arr[i] += ops[i];
1489 #endif /* HAVE_ATOMIC_BUILTINS */
1490  }
1491 }
1492 
1493 /****************************************************************/
1495 static
1496 void
1497 ibuf_print_ops(
1498 /*===========*/
1499  const ulint* ops,
1500  FILE* file)
1501 {
1502  static const char* op_names[] = {
1503  "insert",
1504  "delete mark",
1505  "delete"
1506  };
1507  ulint i;
1508 
1509  ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
1510 
1511  for (i = 0; i < IBUF_OP_COUNT; i++) {
1512  fprintf(file, "%s %lu%s", op_names[i],
1513  (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
1514  }
1515 
1516  putc('\n', file);
1517 }
1518 
1519 /********************************************************************/
1522 static
1523 dict_index_t*
1524 ibuf_dummy_index_create(
1525 /*====================*/
1526  ulint n,
1527  ibool comp)
1528 {
1531 
1532  table = dict_mem_table_create("IBUF_DUMMY",
1533  DICT_HDR_SPACE, n,
1534  comp ? DICT_TF_COMPACT : 0, 0);
1535 
1536  index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
1537  DICT_HDR_SPACE, 0, n);
1538 
1539  index->table = table;
1540 
1541  /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1542  index->cached = TRUE;
1543 
1544  return(index);
1545 }
1546 /********************************************************************/
1548 static
1549 void
1550 ibuf_dummy_index_add_col(
1551 /*=====================*/
1552  dict_index_t* index,
1553  const dtype_t* type,
1554  ulint len)
1555 {
1556  ulint i = index->table->n_def;
1557  dict_mem_table_add_col(index->table, NULL, NULL,
1558  dtype_get_mtype(type),
1559  dtype_get_prtype(type),
1560  dtype_get_len(type));
1561  dict_index_add_col(index, index->table,
1562  dict_table_get_nth_col(index->table, i), len);
1563 }
1564 /********************************************************************/
1566 static
1567 void
1568 ibuf_dummy_index_free(
1569 /*==================*/
1570  dict_index_t* index)
1571 {
1572  dict_table_t* table = index->table;
1573 
1574  dict_mem_index_free(index);
1575  dict_mem_table_free(table);
1576 }
1577 
1578 #ifdef UNIV_DEBUG
1579 # define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
1580  ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex)
1581 #else /* UNIV_DEBUG */
1582 # define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
1583  ibuf_build_entry_from_ibuf_rec_func(ibuf_rec,heap,pindex)
1584 #endif
1585 
1586 /*********************************************************************/
1602 static
1603 dtuple_t*
1604 ibuf_build_entry_from_ibuf_rec_func(
1605 /*================================*/
1606 #ifdef UNIV_DEBUG
1607  mtr_t* mtr,
1608 #endif /* UNIV_DEBUG */
1609  const rec_t* ibuf_rec,
1610  mem_heap_t* heap,
1611  dict_index_t** pindex)
1613 {
1614  dtuple_t* tuple;
1615  dfield_t* field;
1616  ulint n_fields;
1617  const byte* types;
1618  const byte* data;
1619  ulint len;
1620  ulint info_len;
1621  ulint i;
1622  ulint comp;
1624 
1625  ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
1626  || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
1627  ut_ad(ibuf_inside(mtr));
1628 
1629  data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
1630 
1631  ut_a(len == 1);
1632  ut_a(*data == 0);
1633  ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER);
1634 
1635  n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER;
1636 
1637  tuple = dtuple_create(heap, n_fields);
1638 
1639  types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
1640 
1641  ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL);
1642 
1643  index = ibuf_dummy_index_create(n_fields, comp);
1644 
1645  len -= info_len;
1646  types += info_len;
1647 
1648  ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1649 
1650  for (i = 0; i < n_fields; i++) {
1651  field = dtuple_get_nth_field(tuple, i);
1652 
1653  data = rec_get_nth_field_old(
1654  ibuf_rec, i + IBUF_REC_FIELD_USER, &len);
1655 
1656  dfield_set_data(field, data, len);
1657 
1659  dfield_get_type(field),
1660  types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1661 
1662  ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
1663  }
1664 
1665  /* Prevent an ut_ad() failure in page_zip_write_rec() by
1666  adding system columns to the dummy table pointed to by the
1667  dummy secondary index. The insert buffer is only used for
1668  secondary indexes, whose records never contain any system
1669  columns, such as DB_TRX_ID. */
1671 
1672  *pindex = index;
1673 
1674  return(tuple);
1675 }
1676 
1677 /******************************************************************/
1680 UNIV_INLINE
1681 ulint
1683 /*==============*/
1684  const rec_t* rec,
1685  const byte* types,
1686  ulint n_fields,
1687  ulint comp)
1689 {
1690  ulint i;
1691  ulint field_offset;
1692  ulint types_offset;
1693  ulint size = 0;
1694 
1695  field_offset = IBUF_REC_FIELD_USER;
1696  types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1697 
1698  for (i = 0; i < n_fields; i++) {
1699  ulint len;
1700  dtype_t dtype;
1701 
1702  rec_get_nth_field_offs_old(rec, i + field_offset, &len);
1703 
1704  if (len != UNIV_SQL_NULL) {
1705  size += len;
1706  } else {
1708 
1709  size += dtype_get_sql_null_size(&dtype, comp);
1710  }
1711 
1712  types += types_offset;
1713  }
1714 
1715  return(size);
1716 }
1717 
1718 #ifdef UNIV_DEBUG
1719 # define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(mtr,rec)
1720 #else /* UNIV_DEBUG */
1721 # define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(rec)
1722 #endif
1723 
1724 /********************************************************************/
1729 static
1730 ulint
1731 ibuf_rec_get_volume_func(
1732 /*=====================*/
1733 #ifdef UNIV_DEBUG
1734  mtr_t* mtr,
1735 #endif /* UNIV_DEBUG */
1736  const rec_t* ibuf_rec)
1737 {
1738  ulint len;
1739  const byte* data;
1740  const byte* types;
1741  ulint n_fields;
1742  ulint data_size;
1743  ulint comp;
1744  ibuf_op_t op;
1745  ulint info_len;
1746 
1747  ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
1748  || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
1749  ut_ad(ibuf_inside(mtr));
1750  ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
1751 
1752  data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
1753  ut_a(len == 1);
1754  ut_a(*data == 0);
1755 
1756  types = rec_get_nth_field_old(
1757  ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
1758 
1759  ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL);
1760 
1761  if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
1762  /* Delete-marking a record doesn't take any
1763  additional space, and while deleting a record
1764  actually frees up space, we have to play it safe and
1765  pretend it takes no additional space (the record
1766  might not exist, etc.). */
1767 
1768  return(0);
1769  } else if (comp) {
1770  dtuple_t* entry;
1771  ulint volume;
1772  dict_index_t* dummy_index;
1773  mem_heap_t* heap = mem_heap_create(500);
1774 
1775  entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec,
1776  heap, &dummy_index);
1777 
1778  volume = rec_get_converted_size(dummy_index, entry, 0);
1779 
1780  ibuf_dummy_index_free(dummy_index);
1781  mem_heap_free(heap);
1782 
1783  return(volume + page_dir_calc_reserved_space(1));
1784  }
1785 
1786  types += info_len;
1787  n_fields = rec_get_n_fields_old(ibuf_rec)
1788  - IBUF_REC_FIELD_USER;
1789 
1790  data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp);
1791 
1792  return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1794 }
1795 
1796 /*********************************************************************/
1804 static
1805 dtuple_t*
1806 ibuf_entry_build(
1807 /*=============*/
1808  ibuf_op_t op,
1809  dict_index_t* index,
1810  const dtuple_t* entry,
1811  ulint space,
1812  ulint page_no,
1814  ulint counter,
1816  mem_heap_t* heap)
1817 {
1818  dtuple_t* tuple;
1819  dfield_t* field;
1820  const dfield_t* entry_field;
1821  ulint n_fields;
1822  byte* buf;
1823  byte* ti;
1824  byte* type_info;
1825  ulint i;
1826 
1827  ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
1828  ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
1829  ut_ad(op < IBUF_OP_COUNT);
1830 
1831  /* We have to build a tuple with the following fields:
1832 
1833  1-4) These are described at the top of this file.
1834 
1835  5) The rest of the fields are copied from the entry.
1836 
1837  All fields in the tuple are ordered like the type binary in our
1838  insert buffer tree. */
1839 
1840  n_fields = dtuple_get_n_fields(entry);
1841 
1842  tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER);
1843 
1844  /* 1) Space Id */
1845 
1846  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
1847 
1848  buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
1849 
1850  mach_write_to_4(buf, space);
1851 
1852  dfield_set_data(field, buf, 4);
1853 
1854  /* 2) Marker byte */
1855 
1856  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
1857 
1858  buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
1859 
1860  /* We set the marker byte zero */
1861 
1862  mach_write_to_1(buf, 0);
1863 
1864  dfield_set_data(field, buf, 1);
1865 
1866  /* 3) Page number */
1867 
1868  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
1869 
1870  buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
1871 
1872  mach_write_to_4(buf, page_no);
1873 
1874  dfield_set_data(field, buf, 4);
1875 
1876  /* 4) Type info, part #1 */
1877 
1878  if (counter == ULINT_UNDEFINED) {
1879  i = dict_table_is_comp(index->table) ? 1 : 0;
1880  } else {
1881  ut_ad(counter <= 0xFFFF);
1882  i = IBUF_REC_INFO_SIZE;
1883  }
1884 
1885  ti = type_info = static_cast<byte*>(
1887  heap,
1888  i + n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE));
1889 
1890  switch (i) {
1891  default:
1892  ut_error;
1893  break;
1894  case 1:
1895  /* set the flag for ROW_FORMAT=COMPACT */
1896  *ti++ = 0;
1897  /* fall through */
1898  case 0:
1899  /* the old format does not allow delete buffering */
1900  ut_ad(op == IBUF_OP_INSERT);
1901  break;
1902  case IBUF_REC_INFO_SIZE:
1904 
1905  ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
1907  ? IBUF_REC_COMPACT : 0;
1908  ti += IBUF_REC_INFO_SIZE;
1909  break;
1910  }
1911 
1912  /* 5+) Fields from the entry */
1913 
1914  for (i = 0; i < n_fields; i++) {
1915  ulint fixed_len;
1916  const dict_field_t* ifield;
1917 
1918  field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER);
1919  entry_field = dtuple_get_nth_field(entry, i);
1920  dfield_copy(field, entry_field);
1921 
1922  ifield = dict_index_get_nth_field(index, i);
1923  /* Prefix index columns of fixed-length columns are of
1924  fixed length. However, in the function call below,
1925  dfield_get_type(entry_field) contains the fixed length
1926  of the column in the clustered index. Replace it with
1927  the fixed length of the secondary index column. */
1928  fixed_len = ifield->fixed_len;
1929 
1930 #ifdef UNIV_DEBUG
1931  if (fixed_len) {
1932  /* dict_index_add_col() should guarantee these */
1933  ut_ad(fixed_len <= (ulint)
1934  dfield_get_type(entry_field)->len);
1935  if (ifield->prefix_len) {
1936  ut_ad(ifield->prefix_len == fixed_len);
1937  } else {
1938  ut_ad(fixed_len == (ulint)
1939  dfield_get_type(entry_field)->len);
1940  }
1941  }
1942 #endif /* UNIV_DEBUG */
1943 
1945  ti, dfield_get_type(entry_field), fixed_len);
1946  ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1947  }
1948 
1949  /* 4) Type info, part #2 */
1950 
1951  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA);
1952 
1953  dfield_set_data(field, type_info, ti - type_info);
1954 
1955  /* Set all the types in the new tuple binary */
1956 
1957  dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER);
1958 
1959  return(tuple);
1960 }
1961 
1962 /*********************************************************************/
1966 static
1967 dtuple_t*
1968 ibuf_search_tuple_build(
1969 /*====================*/
1970  ulint space,
1971  ulint page_no,
1972  mem_heap_t* heap)
1973 {
1974  dtuple_t* tuple;
1975  dfield_t* field;
1976  byte* buf;
1977 
1978  tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA);
1979 
1980  /* Store the space id in tuple */
1981 
1982  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
1983 
1984  buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
1985 
1986  mach_write_to_4(buf, space);
1987 
1988  dfield_set_data(field, buf, 4);
1989 
1990  /* Store the new format record marker byte */
1991 
1992  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
1993 
1994  buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
1995 
1996  mach_write_to_1(buf, 0);
1997 
1998  dfield_set_data(field, buf, 1);
1999 
2000  /* Store the page number in tuple */
2001 
2002  field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
2003 
2004  buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
2005 
2006  mach_write_to_4(buf, page_no);
2007 
2008  dfield_set_data(field, buf, 4);
2009 
2010  dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA);
2011 
2012  return(tuple);
2013 }
2014 
2015 /*********************************************************************/
2019 UNIV_INLINE
2020 ibool
2022 /*==================================*/
2023 {
2024  ut_ad(mutex_own(&ibuf_mutex));
2025 
2026  /* We want a big margin of free pages, because a B-tree can sometimes
2027  grow in size also if records are deleted from it, as the node pointers
2028  can change, and we must make sure that we are able to delete the
2029  inserts buffered for pages that we read to the buffer pool, without
2030  any risk of running out of free space in the insert buffer. */
2031 
2032  return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
2033 }
2034 
2035 /*********************************************************************/
2039 UNIV_INLINE
2040 ibool
2042 /*=========================*/
2043 {
2044  ut_ad(mutex_own(&ibuf_mutex));
2045 
2046  return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
2047 }
2048 
2049 /*********************************************************************/
2053 static
2054 ibool
2055 ibuf_add_free_page(void)
2056 /*====================*/
2057 {
2058  mtr_t mtr;
2059  page_t* header_page;
2060  ulint flags;
2061  ulint zip_size;
2062  buf_block_t* block;
2063  page_t* page;
2064  page_t* root;
2065  page_t* bitmap_page;
2066 
2067  mtr_start(&mtr);
2068 
2069  /* Acquire the fsp latch before the ibuf header, obeying the latching
2070  order */
2071  mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
2072  zip_size = fsp_flags_get_zip_size(flags);
2073 
2074  header_page = ibuf_header_page_get(&mtr);
2075 
2076  /* Allocate a new page: NOTE that if the page has been a part of a
2077  non-clustered index which has subsequently been dropped, then the
2078  page may have buffered inserts in the insert buffer, and these
2079  should be deleted from there. These get deleted when the page
2080  allocation creates the page in buffer. Thus the call below may end
2081  up calling the insert buffer routines and, as we yet have no latches
2082  to insert buffer tree pages, these routines can run without a risk
2083  of a deadlock. This is the reason why we created a special ibuf
2084  header page apart from the ibuf tree. */
2085 
2086  block = fseg_alloc_free_page(
2087  header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
2088  &mtr);
2089 
2090  if (block == NULL) {
2091  mtr_commit(&mtr);
2092 
2093  return(FALSE);
2094  }
2095 
2096  ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
2097  ibuf_enter(&mtr);
2098  mutex_enter(&ibuf_mutex);
2099  root = ibuf_tree_root_get(&mtr);
2100 
2101  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
2102  page = buf_block_get_frame(block);
2103 
2104  /* Add the page to the free list and update the ibuf size data */
2105 
2106  flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
2107  page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
2108 
2110  MLOG_2BYTES, &mtr);
2111 
2112  ibuf->seg_size++;
2113  ibuf->free_list_len++;
2114 
2115  /* Set the bit indicating that this page is now an ibuf tree page
2116  (level 2 page) */
2117 
2118  bitmap_page = ibuf_bitmap_get_map_page(
2119  IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr);
2120 
2121  mutex_exit(&ibuf_mutex);
2122 
2123  ibuf_bitmap_page_set_bits(
2124  bitmap_page, buf_block_get_page_no(block), zip_size,
2125  IBUF_BITMAP_IBUF, TRUE, &mtr);
2126 
2127  ibuf_mtr_commit(&mtr);
2128 
2129  return(TRUE);
2130 }
2131 
2132 /*********************************************************************/
2134 static
2135 void
2136 ibuf_remove_free_page(void)
2137 /*=======================*/
2138 {
2139  mtr_t mtr;
2140  mtr_t mtr2;
2141  page_t* header_page;
2142  ulint flags;
2143  ulint zip_size;
2144  ulint page_no;
2145  page_t* page;
2146  page_t* root;
2147  page_t* bitmap_page;
2148 
2149  mtr_start(&mtr);
2150 
2151  /* Acquire the fsp latch before the ibuf header, obeying the latching
2152  order */
2153  mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
2154  zip_size = fsp_flags_get_zip_size(flags);
2155 
2156  header_page = ibuf_header_page_get(&mtr);
2157 
2158  /* Prevent pessimistic inserts to insert buffer trees for a while */
2159  ibuf_enter(&mtr);
2160  mutex_enter(&ibuf_pessimistic_insert_mutex);
2161  mutex_enter(&ibuf_mutex);
2162 
2163  if (!ibuf_data_too_much_free()) {
2164 
2165  mutex_exit(&ibuf_mutex);
2166  mutex_exit(&ibuf_pessimistic_insert_mutex);
2167 
2168  ibuf_mtr_commit(&mtr);
2169 
2170  return;
2171  }
2172 
2173  ibuf_mtr_start(&mtr2);
2174 
2175  root = ibuf_tree_root_get(&mtr2);
2176 
2177  mutex_exit(&ibuf_mutex);
2178 
2179  page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
2180  &mtr2).page;
2181 
2182  /* NOTE that we must release the latch on the ibuf tree root
2183  because in fseg_free_page we access level 1 pages, and the root
2184  is a level 2 page. */
2185 
2186  ibuf_mtr_commit(&mtr2);
2187  ibuf_exit(&mtr);
2188 
2189  /* Since pessimistic inserts were prevented, we know that the
2190  page is still in the free list. NOTE that also deletes may take
2191  pages from the free list, but they take them from the start, and
2192  the free list was so long that they cannot have taken the last
2193  page from it. */
2194 
2195  fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
2196  IBUF_SPACE_ID, page_no, &mtr);
2197 
2198 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
2199  buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
2200 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
2201 
2202  ibuf_enter(&mtr);
2203 
2204  mutex_enter(&ibuf_mutex);
2205 
2206  root = ibuf_tree_root_get(&mtr);
2207 
2208  ut_ad(page_no == flst_get_last(root + PAGE_HEADER
2209  + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
2210 
2211  {
2212  buf_block_t* block;
2213 
2214  block = buf_page_get(
2215  IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
2216 
2217  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
2218 
2219  page = buf_block_get_frame(block);
2220  }
2221 
2222  /* Remove the page from the free list and update the ibuf size data */
2223 
2224  flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
2225  page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
2226 
2227  mutex_exit(&ibuf_pessimistic_insert_mutex);
2228 
2229  ibuf->seg_size--;
2230  ibuf->free_list_len--;
2231 
2232  /* Set the bit indicating that this page is no more an ibuf tree page
2233  (level 2 page) */
2234 
2235  bitmap_page = ibuf_bitmap_get_map_page(
2236  IBUF_SPACE_ID, page_no, zip_size, &mtr);
2237 
2238  mutex_exit(&ibuf_mutex);
2239 
2240  ibuf_bitmap_page_set_bits(
2241  bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
2242 
2243 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
2244  buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
2245 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
2246  ibuf_mtr_commit(&mtr);
2247 }
2248 
2249 /***********************************************************************/
2253 UNIV_INTERN
2254 void
2256 /*========================*/
2257 {
2258  ulint i;
2259 
2260 #ifdef UNIV_SYNC_DEBUG
2261  ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
2262  RW_LOCK_EX));
2263 #endif /* UNIV_SYNC_DEBUG */
2264 
2266  fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
2267 
2268  /* NOTE: We require that the thread did not own the latch before,
2269  because then we know that we can obey the correct latching order
2270  for ibuf latches */
2271 
2272  if (!ibuf) {
2273  /* Not yet initialized; not sure if this is possible, but
2274  does no harm to check for it. */
2275 
2276  return;
2277  }
2278 
2279  /* Free at most a few pages at a time, so that we do not delay the
2280  requested service too much */
2281 
2282  for (i = 0; i < 4; i++) {
2283 
2284  ibool too_much_free;
2285 
2286  mutex_enter(&ibuf_mutex);
2287  too_much_free = ibuf_data_too_much_free();
2288  mutex_exit(&ibuf_mutex);
2289 
2290  if (!too_much_free) {
2291  return;
2292  }
2293 
2294  ibuf_remove_free_page();
2295  }
2296 }
2297 
2298 #ifdef UNIV_DEBUG
2299 # define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
2300  ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored)
2301 #else /* UNIV_DEBUG */
2302 # define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
2303  ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored)
2304 #endif /* UNIV_DEBUG */
2305 
2306 /*********************************************************************/
2310 static
2311 ulint
2312 ibuf_get_merge_page_nos_func(
2313 /*=========================*/
2314  ibool contract,
2318  const rec_t* rec,
2319 #ifdef UNIV_DEBUG
2320  mtr_t* mtr,
2321 #endif /* UNIV_DEBUG */
2322  ulint* space_ids,
2323  ib_int64_t* space_versions,
2326  ulint* page_nos,
2329  ulint* n_stored)
2331 {
2332  ulint prev_page_no;
2333  ulint prev_space_id;
2334  ulint first_page_no;
2335  ulint first_space_id;
2336  ulint rec_page_no;
2337  ulint rec_space_id;
2338  ulint sum_volumes;
2339  ulint volume_for_page;
2340  ulint rec_volume;
2341  ulint limit;
2342  ulint n_pages;
2343 
2344  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
2345  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
2346  ut_ad(ibuf_inside(mtr));
2347 
2348  *n_stored = 0;
2349 
2351 
2352  if (page_rec_is_supremum(rec)) {
2353 
2354  rec = page_rec_get_prev_const(rec);
2355  }
2356 
2357  if (page_rec_is_infimum(rec)) {
2358 
2359  rec = page_rec_get_next_const(rec);
2360  }
2361 
2362  if (page_rec_is_supremum(rec)) {
2363 
2364  return(0);
2365  }
2366 
2367  first_page_no = ibuf_rec_get_page_no(mtr, rec);
2368  first_space_id = ibuf_rec_get_space(mtr, rec);
2369  n_pages = 0;
2370  prev_page_no = 0;
2371  prev_space_id = 0;
2372 
2373  /* Go backwards from the first rec until we reach the border of the
2374  'merge area', or the page start or the limit of storeable pages is
2375  reached */
2376 
2377  while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
2378 
2379  rec_page_no = ibuf_rec_get_page_no(mtr, rec);
2380  rec_space_id = ibuf_rec_get_space(mtr, rec);
2381 
2382  if (rec_space_id != first_space_id
2383  || (rec_page_no / IBUF_MERGE_AREA)
2384  != (first_page_no / IBUF_MERGE_AREA)) {
2385 
2386  break;
2387  }
2388 
2389  if (rec_page_no != prev_page_no
2390  || rec_space_id != prev_space_id) {
2391  n_pages++;
2392  }
2393 
2394  prev_page_no = rec_page_no;
2395  prev_space_id = rec_space_id;
2396 
2397  rec = page_rec_get_prev_const(rec);
2398  }
2399 
2400  rec = page_rec_get_next_const(rec);
2401 
2402  /* At the loop start there is no prev page; we mark this with a pair
2403  of space id, page no (0, 0) for which there can never be entries in
2404  the insert buffer */
2405 
2406  prev_page_no = 0;
2407  prev_space_id = 0;
2408  sum_volumes = 0;
2409  volume_for_page = 0;
2410 
2411  while (*n_stored < limit) {
2412  if (page_rec_is_supremum(rec)) {
2413  /* When no more records available, mark this with
2414  another 'impossible' pair of space id, page no */
2415  rec_page_no = 1;
2416  rec_space_id = 0;
2417  } else {
2418  rec_page_no = ibuf_rec_get_page_no(mtr, rec);
2419  rec_space_id = ibuf_rec_get_space(mtr, rec);
2420  /* In the system tablespace, the smallest
2421  possible secondary index leaf page number is
2422  bigger than IBUF_TREE_ROOT_PAGE_NO (4). In
2423  other tablespaces, the clustered index tree is
2424  created at page 3, which makes page 4 the
2425  smallest possible secondary index leaf page
2426  (and that only after DROP INDEX). */
2427  ut_ad(rec_page_no
2428  > (ulint) IBUF_TREE_ROOT_PAGE_NO
2429  - (rec_space_id != 0));
2430  }
2431 
2432 #ifdef UNIV_IBUF_DEBUG
2433  ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
2434 #endif
2435  if ((rec_space_id != prev_space_id
2436  || rec_page_no != prev_page_no)
2437  && (prev_space_id != 0 || prev_page_no != 0)) {
2438 
2439  if (contract
2440  || (prev_page_no == first_page_no
2441  && prev_space_id == first_space_id)
2442  || (volume_for_page
2443  > ((IBUF_MERGE_THRESHOLD - 1)
2444  * 4 * UNIV_PAGE_SIZE
2445  / IBUF_PAGE_SIZE_PER_FREE_SPACE)
2446  / IBUF_MERGE_THRESHOLD)) {
2447 
2448  space_ids[*n_stored] = prev_space_id;
2449  space_versions[*n_stored]
2450  = fil_space_get_version(prev_space_id);
2451  page_nos[*n_stored] = prev_page_no;
2452 
2453  (*n_stored)++;
2454 
2455  sum_volumes += volume_for_page;
2456  }
2457 
2458  if (rec_space_id != first_space_id
2459  || rec_page_no / IBUF_MERGE_AREA
2460  != first_page_no / IBUF_MERGE_AREA) {
2461 
2462  break;
2463  }
2464 
2465  volume_for_page = 0;
2466  }
2467 
2468  if (rec_page_no == 1 && rec_space_id == 0) {
2469  /* Supremum record */
2470 
2471  break;
2472  }
2473 
2474  rec_volume = ibuf_rec_get_volume(mtr, rec);
2475 
2476  volume_for_page += rec_volume;
2477 
2478  prev_page_no = rec_page_no;
2479  prev_space_id = rec_space_id;
2480 
2481  rec = page_rec_get_next_const(rec);
2482  }
2483 
2484 #ifdef UNIV_IBUF_DEBUG
2485  ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
2486 #endif
2487 #if 0
2488  fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
2489  *n_stored, sum_volumes);
2490 #endif
2491  return(sum_volumes);
2492 }
2493 
2494 /*******************************************************************/
2497 static __attribute__((nonnull, warn_unused_result))
2498 const rec_t*
2499 ibuf_get_user_rec(
2500 /*===============*/
2501  btr_pcur_t* pcur,
2502  mtr_t* mtr)
2503 {
2504  do {
2505  const rec_t* rec = btr_pcur_get_rec(pcur);
2506 
2507  if (page_rec_is_user_rec(rec)) {
2508  return(rec);
2509  }
2510  } while (btr_pcur_move_to_next(pcur, mtr));
2511 
2512  return(NULL);
2513 }
2514 
2515 /*********************************************************************/
2519 static __attribute__((nonnull, warn_unused_result))
2520 ulint
2521 ibuf_get_merge_pages(
2522 /*=================*/
2523  btr_pcur_t* pcur,
2524  ulint space,
2525  ulint limit,
2526  ulint* pages,
2527  ulint* spaces,
2528  ib_int64_t* versions,
2529  ulint* n_pages,
2530  mtr_t* mtr)
2531 {
2532  const rec_t* rec;
2533  ulint volume = 0;
2534  ib_int64_t version = fil_space_get_version(space);
2535 
2536  ut_a(space != ULINT_UNDEFINED);
2537 
2538  *n_pages = 0;
2539 
2540  while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0
2541  && ibuf_rec_get_space(mtr, rec) == space
2542  && *n_pages < limit) {
2543 
2544  ulint page_no = ibuf_rec_get_page_no(mtr, rec);
2545 
2546  if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
2547  spaces[*n_pages] = space;
2548  pages[*n_pages] = page_no;
2549  versions[*n_pages] = version;
2550  ++*n_pages;
2551  }
2552 
2553  volume += ibuf_rec_get_volume(mtr, rec);
2554 
2555  btr_pcur_move_to_next(pcur, mtr);
2556  }
2557 
2558  return(volume);
2559 }
2560 
2561 /*********************************************************************/
2566 static
2567 ulint
2568 ibuf_merge_pages(
2569 /*=============*/
2570  ulint* n_pages,
2571  bool sync)
2574 {
2575  mtr_t mtr;
2576  btr_pcur_t pcur;
2577  ulint sum_sizes;
2578  ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2579  ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2580  ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
2581 
2582  *n_pages = 0;
2583 
2584  ibuf_mtr_start(&mtr);
2585 
2586  /* Open a cursor to a randomly chosen leaf of the tree, at a random
2587  position within the leaf */
2588 
2589  btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
2590 
2591  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
2592 
2593  if (page_is_empty(btr_pcur_get_page(&pcur))) {
2594  /* If a B-tree page is empty, it must be the root page
2595  and the whole B-tree must be empty. InnoDB does not
2596  allow empty B-tree pages other than the root. */
2597  ut_ad(ibuf->empty);
2598  ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
2599  == IBUF_SPACE_ID);
2600  ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
2601  == FSP_IBUF_TREE_ROOT_PAGE_NO);
2602 
2603  ibuf_mtr_commit(&mtr);
2604  btr_pcur_close(&pcur);
2605 
2606  return(0);
2607  }
2608 
2609  sum_sizes = ibuf_get_merge_page_nos(TRUE,
2610  btr_pcur_get_rec(&pcur), &mtr,
2611  space_ids, space_versions,
2612  page_nos, n_pages);
2613 #if 0 /* defined UNIV_IBUF_DEBUG */
2614  fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
2615  sync, *n_pages, sum_sizes);
2616 #endif
2617  ibuf_mtr_commit(&mtr);
2618  btr_pcur_close(&pcur);
2619 
2621  sync, space_ids, space_versions, page_nos, *n_pages);
2622 
2623  return(sum_sizes + 1);
2624 }
2625 
2626 /*********************************************************************/
2629 static __attribute__((warn_unused_result))
2630 dict_table_t*
2631 ibuf_get_table(
2632 /*===========*/
2633  table_id_t table_id)
2634 {
2635  rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
2636 
2638  table_id, FALSE, DICT_TABLE_OP_NORMAL);
2639 
2640  rw_lock_s_unlock_gen(&dict_operation_lock, 0);
2641 
2642  return(table);
2643 }
2644 
2645 /*********************************************************************/
2650 static
2651 ulint
2652 ibuf_merge_space(
2653 /*=============*/
2654  ulint space,
2655  ulint* n_pages)
2656 {
2657  mtr_t mtr;
2658  btr_pcur_t pcur;
2659  mem_heap_t* heap = mem_heap_create(512);
2660  dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap);
2661 
2662  ibuf_mtr_start(&mtr);
2663 
2664  /* Position the cursor on the first matching record. */
2665 
2666  btr_pcur_open(
2667  ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur,
2668  &mtr);
2669 
2670  mem_heap_free(heap);
2671 
2672  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
2673 
2674  ulint sum_sizes = 0;
2675  ulint pages[IBUF_MAX_N_PAGES_MERGED];
2676  ulint spaces[IBUF_MAX_N_PAGES_MERGED];
2677  ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED];
2678 
2679  if (page_is_empty(btr_pcur_get_page(&pcur))) {
2680  /* If a B-tree page is empty, it must be the root page
2681  and the whole B-tree must be empty. InnoDB does not
2682  allow empty B-tree pages other than the root. */
2683  ut_ad(ibuf->empty);
2684  ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
2685  == IBUF_SPACE_ID);
2686  ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
2687  == FSP_IBUF_TREE_ROOT_PAGE_NO);
2688 
2689  } else {
2690 
2691  sum_sizes = ibuf_get_merge_pages(
2692  &pcur, space, IBUF_MAX_N_PAGES_MERGED,
2693  &pages[0], &spaces[0], &versions[0], n_pages,
2694  &mtr);
2695 
2696  ++sum_sizes;
2697  }
2698 
2699  ibuf_mtr_commit(&mtr);
2700 
2701  btr_pcur_close(&pcur);
2702 
2703  if (sum_sizes > 0) {
2704 
2705  ut_a(*n_pages > 0 || sum_sizes == 1);
2706 
2707 #ifdef UNIV_DEBUG
2708  ut_ad(*n_pages <= UT_ARR_SIZE(pages));
2709 
2710  for (ulint i = 0; i < *n_pages; ++i) {
2711  ut_ad(spaces[i] == space);
2712  ut_ad(i == 0 || versions[i] == versions[i - 1]);
2713  }
2714 #endif /* UNIV_DEBUG */
2715 
2717  true, spaces, versions, pages, *n_pages);
2718  }
2719 
2720  return(sum_sizes);
2721 }
2722 
2723 /*********************************************************************/
2728 static __attribute__((nonnull, warn_unused_result))
2729 ulint
2730 ibuf_merge(
2731 /*=======*/
2732  table_id_t table_id,
2736  ulint* n_pages,
2738  bool sync)
2742 {
2744 
2745  *n_pages = 0;
2746 
2747  /* We perform a dirty read of ibuf->empty, without latching
2748  the insert buffer root page. We trust this dirty read except
2749  when a slow shutdown is being executed. During a slow
2750  shutdown, the insert buffer merge must be completed. */
2751 
2752  if (ibuf->empty && !srv_shutdown_state) {
2753  return(0);
2754  } else if (table_id == 0) {
2755  return(ibuf_merge_pages(n_pages, sync));
2756  } else if ((table = ibuf_get_table(table_id)) == 0) {
2757  /* Table has been dropped. */
2758  return(0);
2759  }
2760 
2761  ulint volume = ibuf_merge_space(table->space, n_pages);
2762 
2763  dict_table_close(table, FALSE, FALSE);
2764 
2765  return(volume);
2766 }
2767 
2768 /*********************************************************************/
2773 static
2774 ulint
2775 ibuf_contract(
2776 /*==========*/
2777  ibool sync)
2780 {
2781  ulint n_pages;
2782 
2783  return(ibuf_merge(0, &n_pages, sync));
2784 }
2785 
2786 /*********************************************************************/
2791 UNIV_INTERN
2792 ulint
2794 /*========================*/
2795  table_id_t table_id,
2798  ibool full)
2803 {
2804  ulint sum_bytes = 0;
2805  ulint sum_pages = 0;
2806  ulint n_pag2;
2807  ulint n_pages;
2808 
2809 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
2810  if (srv_ibuf_disable_background_merge && table_id == 0) {
2811  return(0);
2812  }
2813 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
2814 
2815  if (full) {
2816  /* Caller has requested a full batch */
2817  n_pages = PCT_IO(100);
2818  } else {
2819  /* By default we do a batch of 5% of the io_capacity */
2820  n_pages = PCT_IO(5);
2821 
2822  mutex_enter(&ibuf_mutex);
2823 
2824  /* If the ibuf->size is more than half the max_size
2825  then we make more agreesive contraction.
2826  +1 is to avoid division by zero. */
2827  if (ibuf->size > ibuf->max_size / 2) {
2828  ulint diff = ibuf->size - ibuf->max_size / 2;
2829  n_pages += PCT_IO((diff * 100)
2830  / (ibuf->max_size + 1));
2831  }
2832 
2833  mutex_exit(&ibuf_mutex);
2834  }
2835 
2836  while (sum_pages < n_pages) {
2837  ulint n_bytes;
2838 
2839  n_bytes = ibuf_merge(table_id, &n_pag2, FALSE);
2840 
2841  if (n_bytes == 0) {
2842  return(sum_bytes);
2843  }
2844 
2845  sum_bytes += n_bytes;
2846  sum_pages += n_pag2;
2847  }
2848 
2849  return(sum_bytes);
2850 }
2851 
2852 /*********************************************************************/
2854 UNIV_INLINE
2855 void
2856 ibuf_contract_after_insert(
2857 /*=======================*/
2858  ulint entry_size)
2860 {
2861  ibool sync;
2862  ulint sum_sizes;
2863  ulint size;
2864  ulint max_size;
2865 
2866  /* Perform dirty reads of ibuf->size and ibuf->max_size, to
2867  reduce ibuf_mutex contention. ibuf->max_size remains constant
2868  after ibuf_init_at_db_start(), but ibuf->size should be
2869  protected by ibuf_mutex. Given that ibuf->size fits in a
2870  machine word, this should be OK; at worst we are doing some
2871  excessive ibuf_contract() or occasionally skipping a
2872  ibuf_contract(). */
2873  size = ibuf->size;
2874  max_size = ibuf->max_size;
2875 
2876  if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2877  return;
2878  }
2879 
2880  sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
2881 
2882  /* Contract at least entry_size many bytes */
2883  sum_sizes = 0;
2884  size = 1;
2885 
2886  do {
2887 
2888  size = ibuf_contract(sync);
2889  sum_sizes += size;
2890  } while (size > 0 && sum_sizes < entry_size);
2891 }
2892 
2893 /*********************************************************************/
2896 static
2897 ibool
2898 ibuf_get_volume_buffered_hash(
2899 /*==========================*/
2900  const rec_t* rec,
2901  const byte* types,
2902  const byte* data,
2903  ulint comp,
2905  ulint* hash,
2906  ulint size)
2907 {
2908  ulint len;
2909  ulint fold;
2910  ulint bitmask;
2911 
2912  len = ibuf_rec_get_size(
2913  rec, types,
2914  rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, comp);
2915  fold = ut_fold_binary(data, len);
2916 
2917  hash += (fold / (CHAR_BIT * sizeof *hash)) % size;
2918  bitmask = 1 << (fold % (CHAR_BIT * sizeof *hash));
2919 
2920  if (*hash & bitmask) {
2921 
2922  return(FALSE);
2923  }
2924 
2925  /* We have not seen this record yet. Insert it. */
2926  *hash |= bitmask;
2927 
2928  return(TRUE);
2929 }
2930 
2931 #ifdef UNIV_DEBUG
2932 # define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
2933  ibuf_get_volume_buffered_count_func(mtr,rec,hash,size,n_recs)
2934 #else /* UNIV_DEBUG */
2935 # define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
2936  ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs)
2937 #endif
2938 /*********************************************************************/
2943 static
2944 ulint
2945 ibuf_get_volume_buffered_count_func(
2946 /*================================*/
2947 #ifdef UNIV_DEBUG
2948  mtr_t* mtr,
2949 #endif /* UNIV_DEBUG */
2950  const rec_t* rec,
2951  ulint* hash,
2952  ulint size,
2953  lint* n_recs)
2955 {
2956  ulint len;
2957  ibuf_op_t ibuf_op;
2958  const byte* types;
2959  ulint n_fields;
2960 
2961  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
2962  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
2963  ut_ad(ibuf_inside(mtr));
2964 
2965  n_fields = rec_get_n_fields_old(rec);
2966  ut_ad(n_fields > IBUF_REC_FIELD_USER);
2967  n_fields -= IBUF_REC_FIELD_USER;
2968 
2969  rec_get_nth_field_offs_old(rec, 1, &len);
2970  /* This function is only invoked when buffering new
2971  operations. All pre-4.1 records should have been merged
2972  when the database was started up. */
2973  ut_a(len == 1);
2974 
2975  if (rec_get_deleted_flag(rec, 0)) {
2976  /* This record has been merged already,
2977  but apparently the system crashed before
2978  the change was discarded from the buffer.
2979  Pretend that the record does not exist. */
2980  return(0);
2981  }
2982 
2983  types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
2984 
2985  switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
2986  IBUF_REC_INFO_SIZE)) {
2987  default:
2988  ut_error;
2989  case 0:
2990  /* This ROW_TYPE=REDUNDANT record does not include an
2991  operation counter. Exclude it from the *n_recs,
2992  because deletes cannot be buffered if there are
2993  old-style inserts buffered for the page. */
2994 
2995  len = ibuf_rec_get_size(rec, types, n_fields, 0);
2996 
2997  return(len
2998  + rec_get_converted_extra_size(len, n_fields, 0)
3000  case 1:
3001  /* This ROW_TYPE=COMPACT record does not include an
3002  operation counter. Exclude it from the *n_recs,
3003  because deletes cannot be buffered if there are
3004  old-style inserts buffered for the page. */
3005  goto get_volume_comp;
3006 
3007  case IBUF_REC_INFO_SIZE:
3008  ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
3009  break;
3010  }
3011 
3012  switch (ibuf_op) {
3013  case IBUF_OP_INSERT:
3014  /* Inserts can be done by updating a delete-marked record.
3015  Because delete-mark and insert operations can be pointing to
3016  the same records, we must not count duplicates. */
3017  case IBUF_OP_DELETE_MARK:
3018  /* There must be a record to delete-mark.
3019  See if this record has been already buffered. */
3020  if (n_recs && ibuf_get_volume_buffered_hash(
3021  rec, types + IBUF_REC_INFO_SIZE,
3022  types + len,
3024  hash, size)) {
3025  (*n_recs)++;
3026  }
3027 
3028  if (ibuf_op == IBUF_OP_DELETE_MARK) {
3029  /* Setting the delete-mark flag does not
3030  affect the available space on the page. */
3031  return(0);
3032  }
3033  break;
3034  case IBUF_OP_DELETE:
3035  /* A record will be removed from the page. */
3036  if (n_recs) {
3037  (*n_recs)--;
3038  }
3039  /* While deleting a record actually frees up space,
3040  we have to play it safe and pretend that it takes no
3041  additional space (the record might not exist, etc.). */
3042  return(0);
3043  default:
3044  ut_error;
3045  }
3046 
3047  ut_ad(ibuf_op == IBUF_OP_INSERT);
3048 
3049 get_volume_comp:
3050  {
3051  dtuple_t* entry;
3052  ulint volume;
3053  dict_index_t* dummy_index;
3054  mem_heap_t* heap = mem_heap_create(500);
3055 
3056  entry = ibuf_build_entry_from_ibuf_rec(
3057  mtr, rec, heap, &dummy_index);
3058 
3059  volume = rec_get_converted_size(dummy_index, entry, 0);
3060 
3061  ibuf_dummy_index_free(dummy_index);
3062  mem_heap_free(heap);
3063 
3064  return(volume + page_dir_calc_reserved_space(1));
3065  }
3066 }
3067 
3068 /*********************************************************************/
3074 static
3075 ulint
3076 ibuf_get_volume_buffered(
3077 /*=====================*/
3078  const btr_pcur_t*pcur,
3083  ulint space,
3084  ulint page_no,
3085  lint* n_recs,
3088  mtr_t* mtr)
3089 {
3090  ulint volume;
3091  const rec_t* rec;
3092  const page_t* page;
3093  ulint prev_page_no;
3094  const page_t* prev_page;
3095  ulint next_page_no;
3096  const page_t* next_page;
3097  /* bitmap of buffered recs */
3098  ulint hash_bitmap[128 / sizeof(ulint)];
3099 
3100  ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
3101  || (pcur->latch_mode == BTR_MODIFY_TREE));
3102 
3103  /* Count the volume of inserts earlier in the alphabetical order than
3104  pcur */
3105 
3106  volume = 0;
3107 
3108  if (n_recs) {
3109  memset(hash_bitmap, 0, sizeof hash_bitmap);
3110  }
3111 
3112  rec = btr_pcur_get_rec(pcur);
3113  page = page_align(rec);
3114  ut_ad(page_validate(page, ibuf->index));
3115 
3116  if (page_rec_is_supremum(rec)) {
3117  rec = page_rec_get_prev_const(rec);
3118  }
3119 
3120  for (; !page_rec_is_infimum(rec);
3121  rec = page_rec_get_prev_const(rec)) {
3122  ut_ad(page_align(rec) == page);
3123 
3124  if (page_no != ibuf_rec_get_page_no(mtr, rec)
3125  || space != ibuf_rec_get_space(mtr, rec)) {
3126 
3127  goto count_later;
3128  }
3129 
3130  volume += ibuf_get_volume_buffered_count(
3131  mtr, rec,
3132  hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
3133  }
3134 
3135  /* Look at the previous page */
3136 
3137  prev_page_no = btr_page_get_prev(page, mtr);
3138 
3139  if (prev_page_no == FIL_NULL) {
3140 
3141  goto count_later;
3142  }
3143 
3144  {
3145  buf_block_t* block;
3146 
3147  block = buf_page_get(
3148  IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH,
3149  mtr);
3150 
3151  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
3152 
3153 
3154  prev_page = buf_block_get_frame(block);
3155  ut_ad(page_validate(prev_page, ibuf->index));
3156  }
3157 
3158 #ifdef UNIV_BTR_DEBUG
3159  ut_a(btr_page_get_next(prev_page, mtr) == page_get_page_no(page));
3160 #endif /* UNIV_BTR_DEBUG */
3161 
3162  rec = page_get_supremum_rec(prev_page);
3163  rec = page_rec_get_prev_const(rec);
3164 
3165  for (;; rec = page_rec_get_prev_const(rec)) {
3166  ut_ad(page_align(rec) == prev_page);
3167 
3168  if (page_rec_is_infimum(rec)) {
3169 
3170  /* We cannot go to yet a previous page, because we
3171  do not have the x-latch on it, and cannot acquire one
3172  because of the latching order: we have to give up */
3173 
3174  return(UNIV_PAGE_SIZE);
3175  }
3176 
3177  if (page_no != ibuf_rec_get_page_no(mtr, rec)
3178  || space != ibuf_rec_get_space(mtr, rec)) {
3179 
3180  goto count_later;
3181  }
3182 
3183  volume += ibuf_get_volume_buffered_count(
3184  mtr, rec,
3185  hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
3186  }
3187 
3188 count_later:
3189  rec = btr_pcur_get_rec(pcur);
3190 
3191  if (!page_rec_is_supremum(rec)) {
3192  rec = page_rec_get_next_const(rec);
3193  }
3194 
3195  for (; !page_rec_is_supremum(rec);
3196  rec = page_rec_get_next_const(rec)) {
3197  if (page_no != ibuf_rec_get_page_no(mtr, rec)
3198  || space != ibuf_rec_get_space(mtr, rec)) {
3199 
3200  return(volume);
3201  }
3202 
3203  volume += ibuf_get_volume_buffered_count(
3204  mtr, rec,
3205  hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
3206  }
3207 
3208  /* Look at the next page */
3209 
3210  next_page_no = btr_page_get_next(page, mtr);
3211 
3212  if (next_page_no == FIL_NULL) {
3213 
3214  return(volume);
3215  }
3216 
3217  {
3218  buf_block_t* block;
3219 
3220  block = buf_page_get(
3221  IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH,
3222  mtr);
3223 
3224  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
3225 
3226 
3227  next_page = buf_block_get_frame(block);
3228  ut_ad(page_validate(next_page, ibuf->index));
3229  }
3230 
3231 #ifdef UNIV_BTR_DEBUG
3232  ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
3233 #endif /* UNIV_BTR_DEBUG */
3234 
3235  rec = page_get_infimum_rec(next_page);
3236  rec = page_rec_get_next_const(rec);
3237 
3238  for (;; rec = page_rec_get_next_const(rec)) {
3239  ut_ad(page_align(rec) == next_page);
3240 
3241  if (page_rec_is_supremum(rec)) {
3242 
3243  /* We give up */
3244 
3245  return(UNIV_PAGE_SIZE);
3246  }
3247 
3248  if (page_no != ibuf_rec_get_page_no(mtr, rec)
3249  || space != ibuf_rec_get_space(mtr, rec)) {
3250 
3251  return(volume);
3252  }
3253 
3254  volume += ibuf_get_volume_buffered_count(
3255  mtr, rec,
3256  hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
3257  }
3258 }
3259 
3260 /*********************************************************************/
3263 UNIV_INTERN
3264 void
3266 /*===============================*/
3267 {
3268  ulint max_space_id;
3269  const rec_t* rec;
3270  const byte* field;
3271  ulint len;
3272  btr_pcur_t pcur;
3273  mtr_t mtr;
3274 
3275  ut_a(!dict_table_is_comp(ibuf->index->table));
3276 
3277  ibuf_mtr_start(&mtr);
3278 
3280  false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3281 
3282  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
3283 
3284  btr_pcur_move_to_prev(&pcur, &mtr);
3285 
3286  if (btr_pcur_is_before_first_on_page(&pcur)) {
3287  /* The tree is empty */
3288 
3289  max_space_id = 0;
3290  } else {
3291  rec = btr_pcur_get_rec(&pcur);
3292 
3293  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
3294 
3295  ut_a(len == 4);
3296 
3297  max_space_id = mach_read_from_4(field);
3298  }
3299 
3300  ibuf_mtr_commit(&mtr);
3301 
3302  /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
3303 
3304  fil_set_max_space_id_if_bigger(max_space_id);
3305 }
3306 
3307 #ifdef UNIV_DEBUG
3308 # define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
3309  ibuf_get_entry_counter_low_func(mtr,rec,space,page_no)
3310 #else /* UNIV_DEBUG */
3311 # define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
3312  ibuf_get_entry_counter_low_func(rec,space,page_no)
3313 #endif
3314 /****************************************************************/
3321 static
3322 ulint
3323 ibuf_get_entry_counter_low_func(
3324 /*============================*/
3325 #ifdef UNIV_DEBUG
3326  mtr_t* mtr,
3327 #endif /* UNIV_DEBUG */
3328  const rec_t* rec,
3329  ulint space,
3330  ulint page_no)
3331 {
3332  ulint counter;
3333  const byte* field;
3334  ulint len;
3335 
3336  ut_ad(ibuf_inside(mtr));
3337  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
3338  || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
3339  ut_ad(rec_get_n_fields_old(rec) > 2);
3340 
3341  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
3342 
3343  ut_a(len == 1);
3344 
3345  /* Check the tablespace identifier. */
3346  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
3347 
3348  ut_a(len == 4);
3349 
3350  if (mach_read_from_4(field) != space) {
3351 
3352  return(0);
3353  }
3354 
3355  /* Check the page offset. */
3356  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
3357  ut_a(len == 4);
3358 
3359  if (mach_read_from_4(field) != page_no) {
3360 
3361  return(0);
3362  }
3363 
3364  /* Check if the record contains a counter field. */
3365  field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
3366 
3367  switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
3368  default:
3369  ut_error;
3370  case 0: /* ROW_FORMAT=REDUNDANT */
3371  case 1: /* ROW_FORMAT=COMPACT */
3372  return(ULINT_UNDEFINED);
3373 
3374  case IBUF_REC_INFO_SIZE:
3375  counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
3376  ut_a(counter < 0xFFFF);
3377  return(counter + 1);
3378  }
3379 }
3380 
3381 #ifdef UNIV_DEBUG
3382 # define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
3383  ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf)
3384 #else /* UNIV_DEBUG */
3385 # define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
3386  ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf)
3387 #endif
3388 
3389 /****************************************************************/
3394 static
3395 ulint
3396 ibuf_get_entry_counter_func(
3397 /*========================*/
3398  ulint space,
3399  ulint page_no,
3400  const rec_t* rec,
3402 #ifdef UNIV_DEBUG
3403  mtr_t* mtr,
3404 #endif /* UNIV_DEBUG */
3405  ibool only_leaf)
3410 {
3411  ut_ad(ibuf_inside(mtr));
3412  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
3413  ut_ad(page_validate(page_align(rec), ibuf->index));
3414 
3415  if (page_rec_is_supremum(rec)) {
3416  /* This is just for safety. The record should be a
3417  page infimum or a user record. */
3418  ut_ad(0);
3419  return(ULINT_UNDEFINED);
3420  } else if (!page_rec_is_infimum(rec)) {
3421  return(ibuf_get_entry_counter_low(mtr, rec, space, page_no));
3422  } else if (only_leaf
3423  || fil_page_get_prev(page_align(rec)) == FIL_NULL) {
3424  /* The parent node pointer did not contain the
3425  searched for (space, page_no), which means that the
3426  search ended on the correct page regardless of the
3427  counter value, and since we're at the infimum record,
3428  there are no existing records. */
3429  return(0);
3430  } else {
3431  /* We used to read the previous page here. It would
3432  break the latching order, because the caller has
3433  buffer-fixed an insert buffer bitmap page. */
3434  return(ULINT_UNDEFINED);
3435  }
3436 }
3437 
3438 /*********************************************************************/
3442 static __attribute__((nonnull, warn_unused_result))
3443 dberr_t
3444 ibuf_insert_low(
3445 /*============*/
3446  ulint mode,
3447  ibuf_op_t op,
3448  ibool no_counter,
3451  const dtuple_t* entry,
3452  ulint entry_size,
3454  dict_index_t* index,
3456  ulint space,
3457  ulint zip_size,
3458  ulint page_no,
3459  que_thr_t* thr)
3460 {
3461  big_rec_t* dummy_big_rec;
3462  btr_pcur_t pcur;
3463  btr_cur_t* cursor;
3464  dtuple_t* ibuf_entry;
3465  mem_heap_t* offsets_heap = NULL;
3466  mem_heap_t* heap;
3467  ulint* offsets = NULL;
3468  ulint buffered;
3469  lint min_n_recs;
3470  rec_t* ins_rec;
3471  ibool old_bit_value;
3472  page_t* bitmap_page;
3473  buf_block_t* block;
3474  page_t* root;
3475  dberr_t err;
3476  ibool do_merge;
3477  ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
3478  ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
3479  ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
3480  ulint n_stored;
3481  mtr_t mtr;
3482  mtr_t bitmap_mtr;
3483 
3484  ut_a(!dict_index_is_clust(index));
3485  ut_ad(dtuple_check_typed(entry));
3486  ut_ad(ut_is_2pow(zip_size));
3487  ut_ad(!no_counter || op == IBUF_OP_INSERT);
3488  ut_a(op < IBUF_OP_COUNT);
3489 
3490  do_merge = FALSE;
3491 
3492  /* Perform dirty reads of ibuf->size and ibuf->max_size, to
3493  reduce ibuf_mutex contention. Given that ibuf->max_size and
3494  ibuf->size fit in a machine word, this should be OK; at worst
3495  we are doing some excessive ibuf_contract() or occasionally
3496  skipping an ibuf_contract(). */
3497  if (ibuf->max_size == 0) {
3498  return(DB_STRONG_FAIL);
3499  }
3500 
3501  if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
3502  /* Insert buffer is now too big, contract it but do not try
3503  to insert */
3504 
3505 
3506 #ifdef UNIV_IBUF_DEBUG
3507  fputs("Ibuf too big\n", stderr);
3508 #endif
3509  /* Use synchronous contract (== TRUE) */
3510  ibuf_contract(TRUE);
3511 
3512  return(DB_STRONG_FAIL);
3513  }
3514 
3515  heap = mem_heap_create(1024);
3516 
3517  /* Build the entry which contains the space id and the page number
3518  as the first fields and the type information for other fields, and
3519  which will be inserted to the insert buffer. Using a counter value
3520  of 0xFFFF we find the last record for (space, page_no), from which
3521  we can then read the counter value N and use N + 1 in the record we
3522  insert. (We patch the ibuf_entry's counter field to the correct
3523  value just before actually inserting the entry.) */
3524 
3525  ibuf_entry = ibuf_entry_build(
3526  op, index, entry, space, page_no,
3527  no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
3528 
3529  /* Open a cursor to the insert buffer tree to calculate if we can add
3530  the new entry to it without exceeding the free space limit for the
3531  page. */
3532 
3533  if (mode == BTR_MODIFY_TREE) {
3534  for (;;) {
3535  mutex_enter(&ibuf_pessimistic_insert_mutex);
3536  mutex_enter(&ibuf_mutex);
3537 
3538  if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
3539 
3540  break;
3541  }
3542 
3543  mutex_exit(&ibuf_mutex);
3544  mutex_exit(&ibuf_pessimistic_insert_mutex);
3545 
3546  if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
3547 
3548  mem_heap_free(heap);
3549  return(DB_STRONG_FAIL);
3550  }
3551  }
3552  }
3553 
3554  ibuf_mtr_start(&mtr);
3555 
3556  btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
3557  ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
3558 
3559  /* Find out the volume of already buffered inserts for the same index
3560  page */
3561  min_n_recs = 0;
3562  buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
3563  op == IBUF_OP_DELETE
3564  ? &min_n_recs
3565  : NULL, &mtr);
3566 
3567  if (op == IBUF_OP_DELETE
3568  && (min_n_recs < 2
3569  || buf_pool_watch_occurred(space, page_no))) {
3570  /* The page could become empty after the record is
3571  deleted, or the page has been read in to the buffer
3572  pool. Refuse to buffer the operation. */
3573 
3574  /* The buffer pool watch is needed for IBUF_OP_DELETE
3575  because of latching order considerations. We can
3576  check buf_pool_watch_occurred() only after latching
3577  the insert buffer B-tree pages that contain buffered
3578  changes for the page. We never buffer IBUF_OP_DELETE,
3579  unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
3580  been previously buffered for the page. Because there
3581  are buffered operations for the page, the insert
3582  buffer B-tree page latches held by mtr will guarantee
3583  that no changes for the user page will be merged
3584  before mtr_commit(&mtr). We must not mtr_commit(&mtr)
3585  until after the IBUF_OP_DELETE has been buffered. */
3586 
3587 fail_exit:
3588  if (mode == BTR_MODIFY_TREE) {
3589  mutex_exit(&ibuf_mutex);
3590  mutex_exit(&ibuf_pessimistic_insert_mutex);
3591  }
3592 
3593  err = DB_STRONG_FAIL;
3594  goto func_exit;
3595  }
3596 
3597  /* After this point, the page could still be loaded to the
3598  buffer pool, but we do not have to care about it, since we are
3599  holding a latch on the insert buffer leaf page that contains
3600  buffered changes for (space, page_no). If the page enters the
3601  buffer pool, buf_page_io_complete() for (space, page_no) will
3602  have to acquire a latch on the same insert buffer leaf page,
3603  which it cannot do until we have buffered the IBUF_OP_DELETE
3604  and done mtr_commit(&mtr) to release the latch. */
3605 
3606 #ifdef UNIV_IBUF_COUNT_DEBUG
3607  ut_a((buffered == 0) || ibuf_count_get(space, page_no));
3608 #endif
3609  ibuf_mtr_start(&bitmap_mtr);
3610 
3611  bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
3612  zip_size, &bitmap_mtr);
3613 
3614  /* We check if the index page is suitable for buffered entries */
3615 
3616  if (buf_page_peek(space, page_no)
3617  || lock_rec_expl_exist_on_page(space, page_no)) {
3618 
3619  ibuf_mtr_commit(&bitmap_mtr);
3620  goto fail_exit;
3621  }
3622 
3623  if (op == IBUF_OP_INSERT) {
3624  ulint bits = ibuf_bitmap_page_get_bits(
3625  bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
3626  &bitmap_mtr);
3627 
3628  if (buffered + entry_size + page_dir_calc_reserved_space(1)
3629  > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
3630  /* Release the bitmap page latch early. */
3631  ibuf_mtr_commit(&bitmap_mtr);
3632 
3633  /* It may not fit */
3634  do_merge = TRUE;
3635 
3636  ibuf_get_merge_page_nos(FALSE,
3637  btr_pcur_get_rec(&pcur), &mtr,
3638  space_ids, space_versions,
3639  page_nos, &n_stored);
3640 
3641  goto fail_exit;
3642  }
3643  }
3644 
3645  if (!no_counter) {
3646  /* Patch correct counter value to the entry to
3647  insert. This can change the insert position, which can
3648  result in the need to abort in some cases. */
3649  ulint counter = ibuf_get_entry_counter(
3650  space, page_no, btr_pcur_get_rec(&pcur), &mtr,
3651  btr_pcur_get_btr_cur(&pcur)->low_match
3652  < IBUF_REC_FIELD_METADATA);
3653  dfield_t* field;
3654 
3655  if (counter == ULINT_UNDEFINED) {
3656  ibuf_mtr_commit(&bitmap_mtr);
3657  goto fail_exit;
3658  }
3659 
3660  field = dtuple_get_nth_field(
3661  ibuf_entry, IBUF_REC_FIELD_METADATA);
3663  (byte*) dfield_get_data(field)
3664  + IBUF_REC_OFFSET_COUNTER, counter);
3665  }
3666 
3667  /* Set the bitmap bit denoting that the insert buffer contains
3668  buffered entries for this index page, if the bit is not set yet */
3669 
3670  old_bit_value = ibuf_bitmap_page_get_bits(
3671  bitmap_page, page_no, zip_size,
3672  IBUF_BITMAP_BUFFERED, &bitmap_mtr);
3673 
3674  if (!old_bit_value) {
3675  ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
3676  IBUF_BITMAP_BUFFERED, TRUE,
3677  &bitmap_mtr);
3678  }
3679 
3680  ibuf_mtr_commit(&bitmap_mtr);
3681 
3682  cursor = btr_pcur_get_btr_cur(&pcur);
3683 
3684  if (mode == BTR_MODIFY_PREV) {
3687  cursor, &offsets, &offsets_heap,
3688  ibuf_entry, &ins_rec,
3689  &dummy_big_rec, 0, thr, &mtr);
3690  block = btr_cur_get_block(cursor);
3691  ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
3692 
3693  /* If this is the root page, update ibuf->empty. */
3694  if (UNIV_UNLIKELY(buf_block_get_page_no(block)
3695  == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
3696  const page_t* root = buf_block_get_frame(block);
3697 
3698  ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
3699  ut_ad(page_get_page_no(root)
3700  == FSP_IBUF_TREE_ROOT_PAGE_NO);
3701 
3702  ibuf->empty = page_is_empty(root);
3703  }
3704  } else {
3705  ut_ad(mode == BTR_MODIFY_TREE);
3706 
3707  /* We acquire an x-latch to the root page before the insert,
3708  because a pessimistic insert releases the tree x-latch,
3709  which would cause the x-latching of the root after that to
3710  break the latching order. */
3711 
3712  root = ibuf_tree_root_get(&mtr);
3713 
3716  cursor, &offsets, &offsets_heap,
3717  ibuf_entry, &ins_rec,
3718  &dummy_big_rec, 0, thr, &mtr);
3719 
3720  if (err == DB_FAIL) {
3723  cursor, &offsets, &offsets_heap,
3724  ibuf_entry, &ins_rec,
3725  &dummy_big_rec, 0, thr, &mtr);
3726  }
3727 
3728  mutex_exit(&ibuf_pessimistic_insert_mutex);
3729  ibuf_size_update(root, &mtr);
3730  mutex_exit(&ibuf_mutex);
3731  ibuf->empty = page_is_empty(root);
3732 
3733  block = btr_cur_get_block(cursor);
3734  ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
3735  }
3736 
3737  if (offsets_heap) {
3738  mem_heap_free(offsets_heap);
3739  }
3740 
3741  if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
3742  /* Update the page max trx id field */
3743  page_update_max_trx_id(block, NULL,
3744  thr_get_trx(thr)->id, &mtr);
3745  }
3746 
3747 func_exit:
3748 #ifdef UNIV_IBUF_COUNT_DEBUG
3749  if (err == DB_SUCCESS) {
3750  fprintf(stderr,
3751  "Incrementing ibuf count of space %lu page %lu\n"
3752  "from %lu by 1\n", space, page_no,
3753  ibuf_count_get(space, page_no));
3754 
3755  ibuf_count_set(space, page_no,
3756  ibuf_count_get(space, page_no) + 1);
3757  }
3758 #endif
3759 
3760  ibuf_mtr_commit(&mtr);
3761  btr_pcur_close(&pcur);
3762 
3763  mem_heap_free(heap);
3764 
3765  if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
3766  ibuf_contract_after_insert(entry_size);
3767  }
3768 
3769  if (do_merge) {
3770 #ifdef UNIV_IBUF_DEBUG
3771  ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
3772 #endif
3773  buf_read_ibuf_merge_pages(false, space_ids, space_versions,
3774  page_nos, n_stored);
3775  }
3776 
3777  return(err);
3778 }
3779 
3780 /*********************************************************************/
3785 UNIV_INTERN
3786 ibool
3787 ibuf_insert(
3788 /*========*/
3789  ibuf_op_t op,
3790  const dtuple_t* entry,
3791  dict_index_t* index,
3792  ulint space,
3793  ulint zip_size,
3794  ulint page_no,
3795  que_thr_t* thr)
3796 {
3797  dberr_t err;
3798  ulint entry_size;
3799  ibool no_counter;
3800  /* Read the settable global variable ibuf_use only once in
3801  this function, so that we will have a consistent view of it. */
3802  ibuf_use_t use = ibuf_use;
3803  DBUG_ENTER("ibuf_insert");
3804 
3805  DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld",
3806  op, space, page_no));
3807 
3808  ut_ad(dtuple_check_typed(entry));
3809  ut_ad(ut_is_2pow(zip_size));
3810 
3811  ut_a(!dict_index_is_clust(index));
3812 
3813  no_counter = use <= IBUF_USE_INSERT;
3814 
3815  switch (op) {
3816  case IBUF_OP_INSERT:
3817  switch (use) {
3818  case IBUF_USE_NONE:
3819  case IBUF_USE_DELETE:
3820  case IBUF_USE_DELETE_MARK:
3821  DBUG_RETURN(FALSE);
3822  case IBUF_USE_INSERT:
3823  case IBUF_USE_INSERT_DELETE_MARK:
3824  case IBUF_USE_ALL:
3825  goto check_watch;
3826  case IBUF_USE_COUNT:
3827  break;
3828  }
3829  break;
3830  case IBUF_OP_DELETE_MARK:
3831  switch (use) {
3832  case IBUF_USE_NONE:
3833  case IBUF_USE_INSERT:
3834  DBUG_RETURN(FALSE);
3835  case IBUF_USE_DELETE_MARK:
3836  case IBUF_USE_DELETE:
3837  case IBUF_USE_INSERT_DELETE_MARK:
3838  case IBUF_USE_ALL:
3839  ut_ad(!no_counter);
3840  goto check_watch;
3841  case IBUF_USE_COUNT:
3842  break;
3843  }
3844  break;
3845  case IBUF_OP_DELETE:
3846  switch (use) {
3847  case IBUF_USE_NONE:
3848  case IBUF_USE_INSERT:
3849  case IBUF_USE_INSERT_DELETE_MARK:
3850  DBUG_RETURN(FALSE);
3851  case IBUF_USE_DELETE_MARK:
3852  case IBUF_USE_DELETE:
3853  case IBUF_USE_ALL:
3854  ut_ad(!no_counter);
3855  goto skip_watch;
3856  case IBUF_USE_COUNT:
3857  break;
3858  }
3859  break;
3860  case IBUF_OP_COUNT:
3861  break;
3862  }
3863 
3864  /* unknown op or use */
3865  ut_error;
3866 
3867 check_watch:
3868  /* If a thread attempts to buffer an insert on a page while a
3869  purge is in progress on the same page, the purge must not be
3870  buffered, because it could remove a record that was
3871  re-inserted later. For simplicity, we block the buffering of
3872  all operations on a page that has a purge pending.
3873 
3874  We do not check this in the IBUF_OP_DELETE case, because that
3875  would always trigger the buffer pool watch during purge and
3876  thus prevent the buffering of delete operations. We assume
3877  that the issuer of IBUF_OP_DELETE has called
3878  buf_pool_watch_set(space, page_no). */
3879 
3880  {
3881  buf_page_t* bpage;
3882  buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3883  bpage = buf_page_hash_get(buf_pool, space, page_no);
3884 
3885  if (UNIV_LIKELY_NULL(bpage)) {
3886  /* A buffer pool watch has been set or the
3887  page has been read into the buffer pool.
3888  Do not buffer the request. If a purge operation
3889  is being buffered, have this request executed
3890  directly on the page in the buffer pool after the
3891  buffered entries for this page have been merged. */
3892  DBUG_RETURN(FALSE);
3893  }
3894  }
3895 
3896 skip_watch:
3897  entry_size = rec_get_converted_size(index, entry, 0);
3898 
3899  if (entry_size
3901  / 2) {
3902 
3903  DBUG_RETURN(FALSE);
3904  }
3905 
3906  err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
3907  entry, entry_size,
3908  index, space, zip_size, page_no, thr);
3909  if (err == DB_FAIL) {
3910  err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
3911  entry, entry_size,
3912  index, space, zip_size, page_no, thr);
3913  }
3914 
3915  if (err == DB_SUCCESS) {
3916 #ifdef UNIV_IBUF_DEBUG
3917  /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
3918  page_no, index->name); */
3919 #endif
3920  DBUG_RETURN(TRUE);
3921 
3922  } else {
3923  ut_a(err == DB_STRONG_FAIL || err == DB_TOO_BIG_RECORD);
3924 
3925  DBUG_RETURN(FALSE);
3926  }
3927 }
3928 
3929 /********************************************************************/
3933 static __attribute__((nonnull))
3934 rec_t*
3935 ibuf_insert_to_index_page_low(
3936 /*==========================*/
3937  const dtuple_t* entry,
3938  buf_block_t* block,
3940  dict_index_t* index,
3941  ulint** offsets,
3942  mem_heap_t* heap,
3943  mtr_t* mtr,
3944  page_cur_t* page_cur)
3946 {
3947  const page_t* page;
3948  ulint space;
3949  ulint page_no;
3950  ulint zip_size;
3951  const page_t* bitmap_page;
3952  ulint old_bits;
3953  rec_t* rec;
3954  DBUG_ENTER("ibuf_insert_to_index_page_low");
3955 
3956  rec = page_cur_tuple_insert(page_cur, entry, index,
3957  offsets, &heap, 0, mtr);
3958  if (rec != NULL) {
3959  DBUG_RETURN(rec);
3960  }
3961 
3962  /* Page reorganization or recompression should already have
3963  been attempted by page_cur_tuple_insert(). Besides, per
3964  ibuf_index_page_calc_free_zip() the page should not have been
3965  recompressed or reorganized. */
3966  ut_ad(!buf_block_get_page_zip(block));
3967 
3968  /* If the record did not fit, reorganize */
3969 
3970  btr_page_reorganize(page_cur, index, mtr);
3971 
3972  /* This time the record must fit */
3973 
3974  rec = page_cur_tuple_insert(page_cur, entry, index,
3975  offsets, &heap, 0, mtr);
3976  if (rec != NULL) {
3977  DBUG_RETURN(rec);
3978  }
3979 
3980  page = buf_block_get_frame(block);
3981 
3982  ut_print_timestamp(stderr);
3983 
3984  fprintf(stderr,
3985  " InnoDB: Error: Insert buffer insert fails;"
3986  " page free %lu, dtuple size %lu\n",
3987  (ulong) page_get_max_insert_size(page, 1),
3988  (ulong) rec_get_converted_size(index, entry, 0));
3989  fputs("InnoDB: Cannot insert index record ", stderr);
3990  dtuple_print(stderr, entry);
3991  fputs("\nInnoDB: The table where this index record belongs\n"
3992  "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
3993  "InnoDB: that table.\n", stderr);
3994 
3995  space = page_get_space_id(page);
3996  zip_size = buf_block_get_zip_size(block);
3997  page_no = page_get_page_no(page);
3998 
3999  bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
4000  old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
4001  IBUF_BITMAP_FREE, mtr);
4002 
4003  fprintf(stderr,
4004  "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
4005  (ulong) space, (ulong) page_no,
4006  (ulong) zip_size, (ulong) old_bits);
4007 
4008  fputs("InnoDB: Submit a detailed bug report"
4009  " to http://bugs.mysql.com\n", stderr);
4010  ut_ad(0);
4011  DBUG_RETURN(NULL);
4012 }
4013 
4014 /************************************************************************
4015 During merge, inserts to an index page a secondary index entry extracted
4016 from the insert buffer. */
4017 static
4018 void
4019 ibuf_insert_to_index_page(
4020 /*======================*/
4021  const dtuple_t* entry,
4022  buf_block_t* block,
4024  dict_index_t* index,
4025  mtr_t* mtr)
4026 {
4027  page_cur_t page_cur;
4028  ulint low_match;
4029  page_t* page = buf_block_get_frame(block);
4030  rec_t* rec;
4031  ulint* offsets;
4032  mem_heap_t* heap;
4033 
4034  DBUG_ENTER("ibuf_insert_to_index_page");
4035 
4036  DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block)));
4037  DBUG_PRINT("ibuf", ("index name: %s", index->name));
4038  DBUG_PRINT("ibuf", ("online status: %d",
4040 
4041  ut_ad(ibuf_inside(mtr));
4042  ut_ad(dtuple_check_typed(entry));
4043  ut_ad(!buf_block_align(page)->index);
4044 
4045  if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
4046  != (ibool)!!page_is_comp(page))) {
4047  fputs("InnoDB: Trying to insert a record from"
4048  " the insert buffer to an index page\n"
4049  "InnoDB: but the 'compact' flag does not match!\n",
4050  stderr);
4051  goto dump;
4052  }
4053 
4054  rec = page_rec_get_next(page_get_infimum_rec(page));
4055 
4056  if (page_rec_is_supremum(rec)) {
4057  fputs("InnoDB: Trying to insert a record from"
4058  " the insert buffer to an index page\n"
4059  "InnoDB: but the index page is empty!\n",
4060  stderr);
4061  goto dump;
4062  }
4063 
4064  if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
4065  != dtuple_get_n_fields(entry))) {
4066  fputs("InnoDB: Trying to insert a record from"
4067  " the insert buffer to an index page\n"
4068  "InnoDB: but the number of fields does not match!\n",
4069  stderr);
4070 dump:
4072 
4073  dtuple_print(stderr, entry);
4074  ut_ad(0);
4075 
4076  fputs("InnoDB: The table where where"
4077  " this index record belongs\n"
4078  "InnoDB: is now probably corrupt."
4079  " Please run CHECK TABLE on\n"
4080  "InnoDB: your tables.\n"
4081  "InnoDB: Submit a detailed bug report to"
4082  " http://bugs.mysql.com!\n", stderr);
4083 
4084  DBUG_VOID_RETURN;
4085  }
4086 
4087  low_match = page_cur_search(block, index, entry,
4088  PAGE_CUR_LE, &page_cur);
4089 
4090  heap = mem_heap_create(
4091  sizeof(upd_t)
4092  + REC_OFFS_HEADER_SIZE * sizeof(*offsets)
4093  + dtuple_get_n_fields(entry)
4094  * (sizeof(upd_field_t) + sizeof *offsets));
4095 
4096  if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
4097  upd_t* update;
4098  page_zip_des_t* page_zip;
4099 
4100  rec = page_cur_get_rec(&page_cur);
4101 
4102  /* This is based on
4103  row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
4105 
4106  offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
4107  &heap);
4109  rec, index, offsets, entry, heap);
4110 
4111  page_zip = buf_block_get_page_zip(block);
4112 
4113  if (update->n_fields == 0) {
4114  /* The records only differ in the delete-mark.
4115  Clear the delete-mark, like we did before
4116  Bug #56680 was fixed. */
4118  rec, page_zip, FALSE, mtr);
4119  goto updated_in_place;
4120  }
4121 
4122  /* Copy the info bits. Clear the delete-mark. */
4123  update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
4124  update->info_bits &= ~REC_INFO_DELETED_FLAG;
4125 
4126  /* We cannot invoke btr_cur_optimistic_update() here,
4127  because we do not have a btr_cur_t or que_thr_t,
4128  as the insert buffer merge occurs at a very low level. */
4129  if (!row_upd_changes_field_size_or_external(index, offsets,
4130  update)
4131  && (!page_zip || btr_cur_update_alloc_zip(
4132  page_zip, &page_cur, index, offsets,
4133  rec_offs_size(offsets), false, mtr))) {
4134  /* This is the easy case. Do something similar
4135  to btr_cur_update_in_place(). */
4136  rec = page_cur_get_rec(&page_cur);
4137  row_upd_rec_in_place(rec, index, offsets,
4138  update, page_zip);
4139  goto updated_in_place;
4140  }
4141 
4142  /* btr_cur_update_alloc_zip() may have changed this */
4143  rec = page_cur_get_rec(&page_cur);
4144 
4145  /* A collation may identify values that differ in
4146  storage length.
4147  Some examples (1 or 2 bytes):
4148  utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
4149  utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
4150  utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
4151 
4152  latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
4153 
4154  Examples of a character (3-byte UTF-8 sequence)
4155  identified with 2 or 4 characters (1-byte UTF-8 sequences):
4156 
4157  utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
4158  utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
4159  */
4160 
4161  /* Delete the different-length record, and insert the
4162  buffered one. */
4163 
4164  lock_rec_store_on_page_infimum(block, rec);
4165  page_cur_delete_rec(&page_cur, index, offsets, mtr);
4166  page_cur_move_to_prev(&page_cur);
4167  rec = ibuf_insert_to_index_page_low(entry, block, index,
4168  &offsets, heap, mtr,
4169  &page_cur);
4170 
4171  ut_ad(!cmp_dtuple_rec(entry, rec, offsets));
4172  lock_rec_restore_from_page_infimum(block, rec, block);
4173  } else {
4174  offsets = NULL;
4175  ibuf_insert_to_index_page_low(entry, block, index,
4176  &offsets, heap, mtr,
4177  &page_cur);
4178  }
4179 updated_in_place:
4180  mem_heap_free(heap);
4181 
4182  DBUG_VOID_RETURN;
4183 }
4184 
4185 /****************************************************************/
4188 static
4189 void
4190 ibuf_set_del_mark(
4191 /*==============*/
4192  const dtuple_t* entry,
4193  buf_block_t* block,
4194  const dict_index_t* index,
4195  mtr_t* mtr)
4196 {
4197  page_cur_t page_cur;
4198  ulint low_match;
4199 
4200  ut_ad(ibuf_inside(mtr));
4201  ut_ad(dtuple_check_typed(entry));
4202 
4203  low_match = page_cur_search(
4204  block, index, entry, PAGE_CUR_LE, &page_cur);
4205 
4206  if (low_match == dtuple_get_n_fields(entry)) {
4207  rec_t* rec;
4208  page_zip_des_t* page_zip;
4209 
4210  rec = page_cur_get_rec(&page_cur);
4211  page_zip = page_cur_get_page_zip(&page_cur);
4212 
4213  /* Delete mark the old index record. According to a
4214  comment in row_upd_sec_index_entry(), it can already
4215  have been delete marked if a lock wait occurred in
4216  row_ins_sec_index_entry() in a previous invocation of
4217  row_upd_sec_index_entry(). */
4218 
4219  if (UNIV_LIKELY
4221  rec, dict_table_is_comp(index->table)))) {
4222  btr_cur_set_deleted_flag_for_ibuf(rec, page_zip,
4223  TRUE, mtr);
4224  }
4225  } else {
4226  const page_t* page
4227  = page_cur_get_page(&page_cur);
4228  const buf_block_t* block
4229  = page_cur_get_block(&page_cur);
4230 
4231  ut_print_timestamp(stderr);
4232  fputs(" InnoDB: unable to find a record to delete-mark\n",
4233  stderr);
4234  fputs("InnoDB: tuple ", stderr);
4235  dtuple_print(stderr, entry);
4236  fputs("\n"
4237  "InnoDB: record ", stderr);
4238  rec_print(stderr, page_cur_get_rec(&page_cur), index);
4239  fprintf(stderr, "\nspace %u offset %u"
4240  " (%u records, index id %llu)\n"
4241  "InnoDB: Submit a detailed bug report"
4242  " to http://bugs.mysql.com\n",
4243  (unsigned) buf_block_get_space(block),
4244  (unsigned) buf_block_get_page_no(block),
4245  (unsigned) page_get_n_recs(page),
4246  (ulonglong) btr_page_get_index_id(page));
4247  ut_ad(0);
4248  }
4249 }
4250 
4251 /****************************************************************/
4253 static
4254 void
4255 ibuf_delete(
4256 /*========*/
4257  const dtuple_t* entry,
4258  buf_block_t* block,
4259  dict_index_t* index,
4260  mtr_t* mtr)
4262 {
4263  page_cur_t page_cur;
4264  ulint low_match;
4265 
4266  ut_ad(ibuf_inside(mtr));
4267  ut_ad(dtuple_check_typed(entry));
4268 
4269  low_match = page_cur_search(
4270  block, index, entry, PAGE_CUR_LE, &page_cur);
4271 
4272  if (low_match == dtuple_get_n_fields(entry)) {
4273  page_zip_des_t* page_zip= buf_block_get_page_zip(block);
4274  page_t* page = buf_block_get_frame(block);
4275  rec_t* rec = page_cur_get_rec(&page_cur);
4276 
4277  /* TODO: the below should probably be a separate function,
4278  it's a bastardized version of btr_cur_optimistic_delete. */
4279 
4280  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4281  ulint* offsets = offsets_;
4282  mem_heap_t* heap = NULL;
4283  ulint max_ins_size = 0;
4284 
4285  rec_offs_init(offsets_);
4286 
4287  offsets = rec_get_offsets(
4288  rec, index, offsets, ULINT_UNDEFINED, &heap);
4289 
4290  if (page_get_n_recs(page) <= 1
4291  || !(REC_INFO_DELETED_FLAG
4292  & rec_get_info_bits(rec, page_is_comp(page)))) {
4293  /* Refuse to purge the last record or a
4294  record that has not been marked for deletion. */
4295  ut_print_timestamp(stderr);
4296  fputs(" InnoDB: unable to purge a record\n",
4297  stderr);
4298  fputs("InnoDB: tuple ", stderr);
4299  dtuple_print(stderr, entry);
4300  fputs("\n"
4301  "InnoDB: record ", stderr);
4302  rec_print_new(stderr, rec, offsets);
4303  fprintf(stderr, "\nspace %u offset %u"
4304  " (%u records, index id %llu)\n"
4305  "InnoDB: Submit a detailed bug report"
4306  " to http://bugs.mysql.com\n",
4307  (unsigned) buf_block_get_space(block),
4308  (unsigned) buf_block_get_page_no(block),
4309  (unsigned) page_get_n_recs(page),
4310  (ulonglong) btr_page_get_index_id(page));
4311 
4312  ut_ad(0);
4313  return;
4314  }
4315 
4316  lock_update_delete(block, rec);
4317 
4318  if (!page_zip) {
4319  max_ins_size
4321  page, 1);
4322  }
4323 #ifdef UNIV_ZIP_DEBUG
4324  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
4325 #endif /* UNIV_ZIP_DEBUG */
4326  page_cur_delete_rec(&page_cur, index, offsets, mtr);
4327 #ifdef UNIV_ZIP_DEBUG
4328  ut_a(!page_zip || page_zip_validate(page_zip, page, index));
4329 #endif /* UNIV_ZIP_DEBUG */
4330 
4331  if (page_zip) {
4332  ibuf_update_free_bits_zip(block, mtr);
4333  } else {
4334  ibuf_update_free_bits_low(block, max_ins_size, mtr);
4335  }
4336 
4337  if (UNIV_LIKELY_NULL(heap)) {
4338  mem_heap_free(heap);
4339  }
4340  } else {
4341  /* The record must have been purged already. */
4342  }
4343 }
4344 
4345 /*********************************************************************/
4348 static __attribute__((nonnull))
4349 ibool
4350 ibuf_restore_pos(
4351 /*=============*/
4352  ulint space,
4353  ulint page_no,
4355  const dtuple_t* search_tuple,
4357  ulint mode,
4358  btr_pcur_t* pcur,
4360  mtr_t* mtr)
4361 {
4362  ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
4363 
4364  if (btr_pcur_restore_position(mode, pcur, mtr)) {
4365 
4366  return(TRUE);
4367  }
4368 
4369  if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
4370  /* The tablespace has been dropped. It is possible
4371  that another thread has deleted the insert buffer
4372  entry. Do not complain. */
4374  } else {
4375  fprintf(stderr,
4376  "InnoDB: ERROR: Submit the output to"
4377  " http://bugs.mysql.com\n"
4378  "InnoDB: ibuf cursor restoration fails!\n"
4379  "InnoDB: ibuf record inserted to page %lu:%lu\n",
4380  (ulong) space, (ulong) page_no);
4381  fflush(stderr);
4382 
4383  rec_print_old(stderr, btr_pcur_get_rec(pcur));
4384  rec_print_old(stderr, pcur->old_rec);
4385  dtuple_print(stderr, search_tuple);
4386 
4387  rec_print_old(stderr,
4388  page_rec_get_next(btr_pcur_get_rec(pcur)));
4389  fflush(stderr);
4390 
4392 
4393  fputs("InnoDB: Validating insert buffer tree:\n", stderr);
4394  if (!btr_validate_index(ibuf->index, 0)) {
4395  ut_error;
4396  }
4397 
4398  fprintf(stderr, "InnoDB: ibuf tree ok\n");
4399  fflush(stderr);
4400  ut_ad(0);
4401  }
4402 
4403  return(FALSE);
4404 }
4405 
4406 /*********************************************************************/
4411 static __attribute__((warn_unused_result))
4412 ibool
4413 ibuf_delete_rec(
4414 /*============*/
4415  ulint space,
4416  ulint page_no,
4418  btr_pcur_t* pcur,
4420  const dtuple_t* search_tuple,
4422  mtr_t* mtr)
4423 {
4424  ibool success;
4425  page_t* root;
4426  dberr_t err;
4427 
4428  ut_ad(ibuf_inside(mtr));
4429  ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
4430  ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
4431  ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
4432 
4433 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
4434  if (ibuf_debug == 2) {
4435  /* Inject a fault (crash). We do this before trying
4436  optimistic delete, because a pessimistic delete in the
4437  change buffer would require a larger test case. */
4438 
4439  /* Flag the buffered record as processed, to avoid
4440  an assertion failure after crash recovery. */
4442  btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
4443  ibuf_mtr_commit(mtr);
4444  log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
4445  DBUG_SUICIDE();
4446  }
4447 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
4448 
4449  success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
4450  0, mtr);
4451 
4452  if (success) {
4453  if (page_is_empty(btr_pcur_get_page(pcur))) {
4454  /* If a B-tree page is empty, it must be the root page
4455  and the whole B-tree must be empty. InnoDB does not
4456  allow empty B-tree pages other than the root. */
4457  root = btr_pcur_get_page(pcur);
4458 
4459  ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
4460  ut_ad(page_get_page_no(root)
4461  == FSP_IBUF_TREE_ROOT_PAGE_NO);
4462 
4463  /* ibuf->empty is protected by the root page latch.
4464  Before the deletion, it had to be FALSE. */
4465  ut_ad(!ibuf->empty);
4466  ibuf->empty = true;
4467  }
4468 
4469 #ifdef UNIV_IBUF_COUNT_DEBUG
4470  fprintf(stderr,
4471  "Decrementing ibuf count of space %lu page %lu\n"
4472  "from %lu by 1\n", space, page_no,
4473  ibuf_count_get(space, page_no));
4474  ibuf_count_set(space, page_no,
4475  ibuf_count_get(space, page_no) - 1);
4476 #endif
4477  return(FALSE);
4478  }
4479 
4480  ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
4481  ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
4482  ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
4483 
4484  /* We have to resort to a pessimistic delete from ibuf.
4485  Delete-mark the record so that it will not be applied again,
4486  in case the server crashes before the pessimistic delete is
4487  made persistent. */
4489  btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
4490 
4491  btr_pcur_store_position(pcur, mtr);
4493 
4494  ibuf_mtr_start(mtr);
4495  mutex_enter(&ibuf_mutex);
4496 
4497  if (!ibuf_restore_pos(space, page_no, search_tuple,
4498  BTR_MODIFY_TREE, pcur, mtr)) {
4499 
4500  mutex_exit(&ibuf_mutex);
4501  ut_ad(mtr->state == MTR_COMMITTED);
4502  goto func_exit;
4503  }
4504 
4505  root = ibuf_tree_root_get(mtr);
4506 
4507  btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
4508  RB_NONE, mtr);
4509  ut_a(err == DB_SUCCESS);
4510 
4511 #ifdef UNIV_IBUF_COUNT_DEBUG
4512  ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
4513 #endif
4514  ibuf_size_update(root, mtr);
4515  mutex_exit(&ibuf_mutex);
4516 
4517  ibuf->empty = page_is_empty(root);
4519 
4520 func_exit:
4521  ut_ad(mtr->state == MTR_COMMITTED);
4522  btr_pcur_close(pcur);
4523 
4524  return(TRUE);
4525 }
4526 
4527 /*********************************************************************/
4534 UNIV_INTERN
4535 void
4537 /*==========================*/
4538  buf_block_t* block,
4541  ulint space,
4542  ulint page_no,
4543  ulint zip_size,
4545  ibool update_ibuf_bitmap)
4550 {
4551  mem_heap_t* heap;
4552  btr_pcur_t pcur;
4553  dtuple_t* search_tuple;
4554 #ifdef UNIV_IBUF_DEBUG
4555  ulint volume = 0;
4556 #endif
4557  page_zip_des_t* page_zip = NULL;
4558  ibool tablespace_being_deleted = FALSE;
4559  ibool corruption_noticed = FALSE;
4560  mtr_t mtr;
4561 
4562  /* Counts for merged & discarded operations. */
4563  ulint mops[IBUF_OP_COUNT];
4564  ulint dops[IBUF_OP_COUNT];
4565 
4566  ut_ad(!block || buf_block_get_space(block) == space);
4567  ut_ad(!block || buf_block_get_page_no(block) == page_no);
4568  ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
4569  ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ);
4570 
4572  || trx_sys_hdr_page(space, page_no)) {
4573  return;
4574  }
4575 
4576  /* We cannot refer to zip_size in the following, because
4577  zip_size is passed as ULINT_UNDEFINED (it is unknown) when
4578  buf_read_ibuf_merge_pages() is merging (discarding) changes
4579  for a dropped tablespace. When block != NULL or
4580  update_ibuf_bitmap is specified, the zip_size must be known.
4581  That is why we will repeat the check below, with zip_size in
4582  place of 0. Passing zip_size as 0 assumes that the
4583  uncompressed page size always is a power-of-2 multiple of the
4584  compressed page size. */
4585 
4586  if (ibuf_fixed_addr_page(space, 0, page_no)
4587  || fsp_descr_page(0, page_no)) {
4588  return;
4589  }
4590 
4591  if (UNIV_LIKELY(update_ibuf_bitmap)) {
4592  ut_a(ut_is_2pow(zip_size));
4593 
4594  if (ibuf_fixed_addr_page(space, zip_size, page_no)
4595  || fsp_descr_page(zip_size, page_no)) {
4596  return;
4597  }
4598 
4599  /* If the following returns FALSE, we get the counter
4600  incremented, and must decrement it when we leave this
4601  function. When the counter is > 0, that prevents tablespace
4602  from being dropped. */
4603 
4604  tablespace_being_deleted = fil_inc_pending_ops(space);
4605 
4606  if (UNIV_UNLIKELY(tablespace_being_deleted)) {
4607  /* Do not try to read the bitmap page from space;
4608  just delete the ibuf records for the page */
4609 
4610  block = NULL;
4611  update_ibuf_bitmap = FALSE;
4612  } else {
4613  page_t* bitmap_page;
4614  ulint bitmap_bits;
4615 
4616  ibuf_mtr_start(&mtr);
4617 
4618  bitmap_page = ibuf_bitmap_get_map_page(
4619  space, page_no, zip_size, &mtr);
4620  bitmap_bits = ibuf_bitmap_page_get_bits(
4621  bitmap_page, page_no, zip_size,
4622  IBUF_BITMAP_BUFFERED, &mtr);
4623 
4624  ibuf_mtr_commit(&mtr);
4625 
4626  if (!bitmap_bits) {
4627  /* No inserts buffered for this page */
4628 
4629  if (!tablespace_being_deleted) {
4630  fil_decr_pending_ops(space);
4631  }
4632 
4633  return;
4634  }
4635  }
4636  } else if (block
4637  && (ibuf_fixed_addr_page(space, zip_size, page_no)
4638  || fsp_descr_page(zip_size, page_no))) {
4639 
4640  return;
4641  }
4642 
4643  heap = mem_heap_create(512);
4644 
4645  search_tuple = ibuf_search_tuple_build(space, page_no, heap);
4646 
4647  if (block) {
4648  /* Move the ownership of the x-latch on the page to this OS
4649  thread, so that we can acquire a second x-latch on it. This
4650  is needed for the insert operations to the index page to pass
4651  the debug checks. */
4652 
4654  page_zip = buf_block_get_page_zip(block);
4655 
4656  if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
4657  != FIL_PAGE_INDEX)
4658  || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
4659 
4660  page_t* bitmap_page;
4661 
4662  corruption_noticed = TRUE;
4663 
4664  ut_print_timestamp(stderr);
4665 
4666  ibuf_mtr_start(&mtr);
4667 
4668  fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
4669  stderr);
4670 
4671  bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
4672  zip_size, &mtr);
4673  buf_page_print(bitmap_page, 0,
4675  ibuf_mtr_commit(&mtr);
4676 
4677  fputs("\nInnoDB: Dump of the page:\n", stderr);
4678 
4679  buf_page_print(block->frame, 0,
4681 
4682  fprintf(stderr,
4683  "InnoDB: Error: corruption in the tablespace."
4684  " Bitmap shows insert\n"
4685  "InnoDB: buffer records to page n:o %lu"
4686  " though the page\n"
4687  "InnoDB: type is %lu, which is"
4688  " not an index leaf page!\n"
4689  "InnoDB: We try to resolve the problem"
4690  " by skipping the insert buffer\n"
4691  "InnoDB: merge for this page."
4692  " Please run CHECK TABLE on your tables\n"
4693  "InnoDB: to determine if they are corrupt"
4694  " after this.\n\n"
4695  "InnoDB: Please submit a detailed bug report"
4696  " to http://bugs.mysql.com\n\n",
4697  (ulong) page_no,
4698  (ulong)
4699  fil_page_get_type(block->frame));
4700  ut_ad(0);
4701  }
4702  }
4703 
4704  memset(mops, 0, sizeof(mops));
4705  memset(dops, 0, sizeof(dops));
4706 
4707 loop:
4708  ibuf_mtr_start(&mtr);
4709 
4710  /* Position pcur in the insert buffer at the first entry for this
4711  index page */
4712  btr_pcur_open_on_user_rec(
4713  ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
4714  &pcur, &mtr);
4715 
4716  if (block) {
4717  ibool success;
4718 
4719  success = buf_page_get_known_nowait(
4720  RW_X_LATCH, block,
4721  BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
4722 
4723  ut_a(success);
4724 
4725  /* This is a user page (secondary index leaf page),
4726  but we pretend that it is a change buffer page in
4727  order to obey the latching order. This should be OK,
4728  because buffered changes are applied immediately while
4729  the block is io-fixed. Other threads must not try to
4730  latch an io-fixed block. */
4731  buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
4732  }
4733 
4734  if (!btr_pcur_is_on_user_rec(&pcur)) {
4735  ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
4736 
4737  goto reset_bit;
4738  }
4739 
4740  for (;;) {
4741  rec_t* rec;
4742 
4744 
4745  rec = btr_pcur_get_rec(&pcur);
4746 
4747  /* Check if the entry is for this index page */
4748  if (ibuf_rec_get_page_no(&mtr, rec) != page_no
4749  || ibuf_rec_get_space(&mtr, rec) != space) {
4750 
4751  if (block) {
4753  block->frame, page_zip, &mtr);
4754  }
4755 
4756  goto reset_bit;
4757  }
4758 
4759  if (UNIV_UNLIKELY(corruption_noticed)) {
4760  fputs("InnoDB: Discarding record\n ", stderr);
4761  rec_print_old(stderr, rec);
4762  fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
4763  } else if (block && !rec_get_deleted_flag(rec, 0)) {
4764  /* Now we have at pcur a record which should be
4765  applied on the index page; NOTE that the call below
4766  copies pointers to fields in rec, and we must
4767  keep the latch to the rec page until the
4768  insertion is finished! */
4769  dtuple_t* entry;
4770  trx_id_t max_trx_id;
4771  dict_index_t* dummy_index;
4772  ibuf_op_t op = ibuf_rec_get_op_type(&mtr, rec);
4773 
4774  max_trx_id = page_get_max_trx_id(page_align(rec));
4775  page_update_max_trx_id(block, page_zip, max_trx_id,
4776  &mtr);
4777 
4778  ut_ad(page_validate(page_align(rec), ibuf->index));
4779 
4780  entry = ibuf_build_entry_from_ibuf_rec(
4781  &mtr, rec, heap, &dummy_index);
4782 
4783  ut_ad(page_validate(block->frame, dummy_index));
4784 
4785  switch (op) {
4786  ibool success;
4787  case IBUF_OP_INSERT:
4788 #ifdef UNIV_IBUF_DEBUG
4789  volume += rec_get_converted_size(
4790  dummy_index, entry, 0);
4791 
4792  volume += page_dir_calc_reserved_space(1);
4793 
4794  ut_a(volume <= 4 * UNIV_PAGE_SIZE
4795  / IBUF_PAGE_SIZE_PER_FREE_SPACE);
4796 #endif
4797  ibuf_insert_to_index_page(
4798  entry, block, dummy_index, &mtr);
4799  break;
4800 
4801  case IBUF_OP_DELETE_MARK:
4802  ibuf_set_del_mark(
4803  entry, block, dummy_index, &mtr);
4804  break;
4805 
4806  case IBUF_OP_DELETE:
4807  ibuf_delete(entry, block, dummy_index, &mtr);
4808  /* Because ibuf_delete() will latch an
4809  insert buffer bitmap page, commit mtr
4810  before latching any further pages.
4811  Store and restore the cursor position. */
4812  ut_ad(rec == btr_pcur_get_rec(&pcur));
4814  ut_ad(ibuf_rec_get_page_no(&mtr, rec)
4815  == page_no);
4816  ut_ad(ibuf_rec_get_space(&mtr, rec) == space);
4817 
4818  /* Mark the change buffer record processed,
4819  so that it will not be merged again in case
4820  the server crashes between the following
4821  mtr_commit() and the subsequent mtr_commit()
4822  of deleting the change buffer record. */
4823 
4825  btr_pcur_get_rec(&pcur), NULL,
4826  TRUE, &mtr);
4827 
4828  btr_pcur_store_position(&pcur, &mtr);
4829  ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
4830 
4831  ibuf_mtr_start(&mtr);
4832 
4833  success = buf_page_get_known_nowait(
4834  RW_X_LATCH, block,
4835  BUF_KEEP_OLD,
4836  __FILE__, __LINE__, &mtr);
4837  ut_a(success);
4838 
4839  /* This is a user page (secondary
4840  index leaf page), but it should be OK
4841  to use too low latching order for it,
4842  as the block is io-fixed. */
4843  buf_block_dbg_add_level(
4844  block, SYNC_IBUF_TREE_NODE);
4845 
4846  if (!ibuf_restore_pos(space, page_no,
4847  search_tuple,
4849  &pcur, &mtr)) {
4850 
4851  ut_ad(mtr.state == MTR_COMMITTED);
4852  mops[op]++;
4853  ibuf_dummy_index_free(dummy_index);
4854  goto loop;
4855  }
4856 
4857  break;
4858  default:
4859  ut_error;
4860  }
4861 
4862  mops[op]++;
4863 
4864  ibuf_dummy_index_free(dummy_index);
4865  } else {
4866  dops[ibuf_rec_get_op_type(&mtr, rec)]++;
4867  }
4868 
4869  /* Delete the record from ibuf */
4870  if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
4871  &mtr)) {
4872  /* Deletion was pessimistic and mtr was committed:
4873  we start from the beginning again */
4874 
4875  ut_ad(mtr.state == MTR_COMMITTED);
4876  goto loop;
4877  } else if (btr_pcur_is_after_last_on_page(&pcur)) {
4878  ibuf_mtr_commit(&mtr);
4879  btr_pcur_close(&pcur);
4880 
4881  goto loop;
4882  }
4883  }
4884 
4885 reset_bit:
4886  if (UNIV_LIKELY(update_ibuf_bitmap)) {
4887  page_t* bitmap_page;
4888 
4889  bitmap_page = ibuf_bitmap_get_map_page(
4890  space, page_no, zip_size, &mtr);
4891 
4892  ibuf_bitmap_page_set_bits(
4893  bitmap_page, page_no, zip_size,
4894  IBUF_BITMAP_BUFFERED, FALSE, &mtr);
4895 
4896  if (block) {
4897  ulint old_bits = ibuf_bitmap_page_get_bits(
4898  bitmap_page, page_no, zip_size,
4899  IBUF_BITMAP_FREE, &mtr);
4900 
4901  ulint new_bits = ibuf_index_page_calc_free(
4902  zip_size, block);
4903 
4904  if (old_bits != new_bits) {
4905  ibuf_bitmap_page_set_bits(
4906  bitmap_page, page_no, zip_size,
4907  IBUF_BITMAP_FREE, new_bits, &mtr);
4908  }
4909  }
4910  }
4911 
4912  ibuf_mtr_commit(&mtr);
4913  btr_pcur_close(&pcur);
4914  mem_heap_free(heap);
4915 
4916 #ifdef HAVE_ATOMIC_BUILTINS
4917  os_atomic_increment_ulint(&ibuf->n_merges, 1);
4918  ibuf_add_ops(ibuf->n_merged_ops, mops);
4919  ibuf_add_ops(ibuf->n_discarded_ops, dops);
4920 #else /* HAVE_ATOMIC_BUILTINS */
4921  /* Protect our statistics keeping from race conditions */
4922  mutex_enter(&ibuf_mutex);
4923 
4924  ibuf->n_merges++;
4925  ibuf_add_ops(ibuf->n_merged_ops, mops);
4926  ibuf_add_ops(ibuf->n_discarded_ops, dops);
4927 
4928  mutex_exit(&ibuf_mutex);
4929 #endif /* HAVE_ATOMIC_BUILTINS */
4930 
4931  if (update_ibuf_bitmap && !tablespace_being_deleted) {
4932 
4933  fil_decr_pending_ops(space);
4934  }
4935 
4936 #ifdef UNIV_IBUF_COUNT_DEBUG
4937  ut_a(ibuf_count_get(space, page_no) == 0);
4938 #endif
4939 }
4940 
4941 /*********************************************************************/
4946 UNIV_INTERN
4947 void
4949 /*============================*/
4950  ulint space)
4951 {
4952  mem_heap_t* heap;
4953  btr_pcur_t pcur;
4954  dtuple_t* search_tuple;
4955  const rec_t* ibuf_rec;
4956  ulint page_no;
4957  mtr_t mtr;
4958 
4959  /* Counts for discarded operations. */
4960  ulint dops[IBUF_OP_COUNT];
4961 
4962  heap = mem_heap_create(512);
4963 
4964  /* Use page number 0 to build the search tuple so that we get the
4965  cursor positioned at the first entry for this space id */
4966 
4967  search_tuple = ibuf_search_tuple_build(space, 0, heap);
4968 
4969  memset(dops, 0, sizeof(dops));
4970 loop:
4971  ibuf_mtr_start(&mtr);
4972 
4973  /* Position pcur in the insert buffer at the first entry for the
4974  space */
4975  btr_pcur_open_on_user_rec(
4976  ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
4977  &pcur, &mtr);
4978 
4979  if (!btr_pcur_is_on_user_rec(&pcur)) {
4980  ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
4981 
4982  goto leave_loop;
4983  }
4984 
4985  for (;;) {
4987 
4988  ibuf_rec = btr_pcur_get_rec(&pcur);
4989 
4990  /* Check if the entry is for this space */
4991  if (ibuf_rec_get_space(&mtr, ibuf_rec) != space) {
4992 
4993  goto leave_loop;
4994  }
4995 
4996  page_no = ibuf_rec_get_page_no(&mtr, ibuf_rec);
4997 
4998  dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++;
4999 
5000  /* Delete the record from ibuf */
5001  if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
5002  &mtr)) {
5003  /* Deletion was pessimistic and mtr was committed:
5004  we start from the beginning again */
5005 
5006  ut_ad(mtr.state == MTR_COMMITTED);
5007  goto loop;
5008  }
5009 
5010  if (btr_pcur_is_after_last_on_page(&pcur)) {
5011  ibuf_mtr_commit(&mtr);
5012  btr_pcur_close(&pcur);
5013 
5014  goto loop;
5015  }
5016  }
5017 
5018 leave_loop:
5019  ibuf_mtr_commit(&mtr);
5020  btr_pcur_close(&pcur);
5021 
5022 #ifdef HAVE_ATOMIC_BUILTINS
5023  ibuf_add_ops(ibuf->n_discarded_ops, dops);
5024 #else /* HAVE_ATOMIC_BUILTINS */
5025  /* Protect our statistics keeping from race conditions */
5026  mutex_enter(&ibuf_mutex);
5027  ibuf_add_ops(ibuf->n_discarded_ops, dops);
5028  mutex_exit(&ibuf_mutex);
5029 #endif /* HAVE_ATOMIC_BUILTINS */
5030 
5031  mem_heap_free(heap);
5032 }
5033 
5034 /******************************************************************/
5037 UNIV_INTERN
5038 bool
5039 ibuf_is_empty(void)
5040 /*===============*/
5041 {
5042  bool is_empty;
5043  const page_t* root;
5044  mtr_t mtr;
5045 
5046  ibuf_mtr_start(&mtr);
5047 
5048  mutex_enter(&ibuf_mutex);
5049  root = ibuf_tree_root_get(&mtr);
5050  mutex_exit(&ibuf_mutex);
5051 
5052  is_empty = page_is_empty(root);
5053  ut_a(is_empty == ibuf->empty);
5054  ibuf_mtr_commit(&mtr);
5055 
5056  return(is_empty);
5057 }
5058 
5059 /******************************************************************/
5061 UNIV_INTERN
5062 void
5063 ibuf_print(
5064 /*=======*/
5065  FILE* file)
5066 {
5067 #ifdef UNIV_IBUF_COUNT_DEBUG
5068  ulint i;
5069  ulint j;
5070 #endif
5071 
5072  mutex_enter(&ibuf_mutex);
5073 
5074  fprintf(file,
5075  "Ibuf: size %lu, free list len %lu,"
5076  " seg size %lu, %lu merges\n",
5077  (ulong) ibuf->size,
5078  (ulong) ibuf->free_list_len,
5079  (ulong) ibuf->seg_size,
5080  (ulong) ibuf->n_merges);
5081 
5082  fputs("merged operations:\n ", file);
5083  ibuf_print_ops(ibuf->n_merged_ops, file);
5084 
5085  fputs("discarded operations:\n ", file);
5086  ibuf_print_ops(ibuf->n_discarded_ops, file);
5087 
5088 #ifdef UNIV_IBUF_COUNT_DEBUG
5089  for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
5090  for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
5091  ulint count = ibuf_count_get(i, j);
5092 
5093  if (count > 0) {
5094  fprintf(stderr,
5095  "Ibuf count for space/page %lu/%lu"
5096  " is %lu\n",
5097  (ulong) i, (ulong) j, (ulong) count);
5098  }
5099  }
5100  }
5101 #endif /* UNIV_IBUF_COUNT_DEBUG */
5102 
5103  mutex_exit(&ibuf_mutex);
5104 }
5105 
5106 /******************************************************************/
5109 UNIV_INTERN
5110 dberr_t
5112 /*========================*/
5113  const trx_t* trx,
5114  ulint space_id)
5115 {
5116  ulint zip_size;
5117  ulint page_size;
5118  ulint size;
5119  ulint page_no;
5120 
5121  ut_ad(space_id);
5122  ut_ad(trx->mysql_thd);
5123 
5124  zip_size = fil_space_get_zip_size(space_id);
5125 
5126  if (zip_size == ULINT_UNDEFINED) {
5127  return(DB_TABLE_NOT_FOUND);
5128  }
5129 
5130  size = fil_space_get_size(space_id);
5131 
5132  if (size == 0) {
5133  return(DB_TABLE_NOT_FOUND);
5134  }
5135 
5136  mutex_enter(&ibuf_mutex);
5137 
5138  page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
5139 
5140  for (page_no = 0; page_no < size; page_no += page_size) {
5141  mtr_t mtr;
5142  page_t* bitmap_page;
5143  ulint i;
5144 
5145  if (trx_is_interrupted(trx)) {
5146  mutex_exit(&ibuf_mutex);
5147  return(DB_INTERRUPTED);
5148  }
5149 
5150  mtr_start(&mtr);
5151 
5152  mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
5153 
5154  ibuf_enter(&mtr);
5155 
5156  bitmap_page = ibuf_bitmap_get_map_page(
5157  space_id, page_no, zip_size, &mtr);
5158 
5159  for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
5160  const ulint offset = page_no + i;
5161 
5163  bitmap_page, offset, zip_size,
5164  IBUF_BITMAP_IBUF, &mtr)) {
5165 
5166  mutex_exit(&ibuf_mutex);
5167  ibuf_exit(&mtr);
5168  mtr_commit(&mtr);
5169 
5170  ib_errf(trx->mysql_thd,
5171  IB_LOG_LEVEL_ERROR,
5172  ER_INNODB_INDEX_CORRUPT,
5173  "Space %u page %u"
5174  " is wrongly flagged to belong to the"
5175  " insert buffer",
5176  (unsigned) space_id,
5177  (unsigned) offset);
5178 
5179  return(DB_CORRUPTION);
5180  }
5181 
5183  bitmap_page, offset, zip_size,
5184  IBUF_BITMAP_BUFFERED, &mtr)) {
5185 
5186  ib_errf(trx->mysql_thd,
5187  IB_LOG_LEVEL_WARN,
5188  ER_INNODB_INDEX_CORRUPT,
5189  "Buffered changes"
5190  " for space %u page %u are lost",
5191  (unsigned) space_id,
5192  (unsigned) offset);
5193 
5194  /* Tolerate this error, so that
5195  slightly corrupted tables can be
5196  imported and dumped. Clear the bit. */
5197  ibuf_bitmap_page_set_bits(
5198  bitmap_page, offset, zip_size,
5199  IBUF_BITMAP_BUFFERED, FALSE, &mtr);
5200  }
5201  }
5202 
5203  ibuf_exit(&mtr);
5204  mtr_commit(&mtr);
5205  }
5206 
5207  mutex_exit(&ibuf_mutex);
5208  return(DB_SUCCESS);
5209 }
5210 #endif /* !UNIV_HOTBACKUP */