MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
buf0buf.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; version 2 of the License.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc.,
22 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
23 
24 *****************************************************************************/
25 
26 /**************************************************/
33 #include "buf0buf.h"
34 
35 #ifdef UNIV_NONINL
36 #include "buf0buf.ic"
37 #endif
38 
39 #include "mem0mem.h"
40 #include "btr0btr.h"
41 #include "fil0fil.h"
42 #ifndef UNIV_HOTBACKUP
43 #include "buf0buddy.h"
44 #include "lock0lock.h"
45 #include "btr0sea.h"
46 #include "ibuf0ibuf.h"
47 #include "trx0undo.h"
48 #include "log0log.h"
49 #endif /* !UNIV_HOTBACKUP */
50 #include "srv0srv.h"
51 #include "dict0dict.h"
52 #include "log0recv.h"
53 #include "page0zip.h"
54 #include "srv0mon.h"
55 #include "buf0checksum.h"
56 
57 /*
58  IMPLEMENTATION OF THE BUFFER POOL
59  =================================
60 
61 Performance improvement:
62 ------------------------
63 Thread scheduling in NT may be so slow that the OS wait mechanism should
64 not be used even in waiting for disk reads to complete.
65 Rather, we should put waiting query threads to the queue of
66 waiting jobs, and let the OS thread do something useful while the i/o
67 is processed. In this way we could remove most OS thread switches in
68 an i/o-intensive benchmark like TPC-C.
69 
70 A possibility is to put a user space thread library between the database
71 and NT. User space thread libraries might be very fast.
72 
73 SQL Server 7.0 can be configured to use 'fibers' which are lightweight
74 threads in NT. These should be studied.
75 
76  Buffer frames and blocks
77  ------------------------
78 Following the terminology of Gray and Reuter, we call the memory
79 blocks where file pages are loaded buffer frames. For each buffer
80 frame there is a control block, or shortly, a block, in the buffer
81 control array. The control info which does not need to be stored
82 in the file along with the file page, resides in the control block.
83 
84  Buffer pool struct
85  ------------------
86 The buffer buf_pool contains a single mutex which protects all the
87 control data structures of the buf_pool. The content of a buffer frame is
88 protected by a separate read-write lock in its control block, though.
89 These locks can be locked and unlocked without owning the buf_pool->mutex.
90 The OS events in the buf_pool struct can be waited for without owning the
91 buf_pool->mutex.
92 
93 The buf_pool->mutex is a hot-spot in main memory, causing a lot of
94 memory bus traffic on multiprocessor systems when processors
95 alternately access the mutex. On our Pentium, the mutex is accessed
96 maybe every 10 microseconds. We gave up the solution to have mutexes
97 for each control block, for instance, because it seemed to be
98 complicated.
99 
100 A solution to reduce mutex contention of the buf_pool->mutex is to
101 create a separate mutex for the page hash table. On Pentium,
102 accessing the hash table takes 2 microseconds, about half
103 of the total buf_pool->mutex hold time.
104 
105  Control blocks
106  --------------
107 
108 The control block contains, for instance, the bufferfix count
109 which is incremented when a thread wants a file page to be fixed
110 in a buffer frame. The bufferfix operation does not lock the
111 contents of the frame, however. For this purpose, the control
112 block contains a read-write lock.
113 
114 The buffer frames have to be aligned so that the start memory
115 address of a frame is divisible by the universal page size, which
116 is a power of two.
117 
118 We intend to make the buffer buf_pool size on-line reconfigurable,
119 that is, the buf_pool size can be changed without closing the database.
120 Then the database administarator may adjust it to be bigger
121 at night, for example. The control block array must
122 contain enough control blocks for the maximum buffer buf_pool size
123 which is used in the particular database.
124 If the buf_pool size is cut, we exploit the virtual memory mechanism of
125 the OS, and just refrain from using frames at high addresses. Then the OS
126 can swap them to disk.
127 
128 The control blocks containing file pages are put to a hash table
129 according to the file address of the page.
130 We could speed up the access to an individual page by using
131 "pointer swizzling": we could replace the page references on
132 non-leaf index pages by direct pointers to the page, if it exists
133 in the buf_pool. We could make a separate hash table where we could
134 chain all the page references in non-leaf pages residing in the buf_pool,
135 using the page reference as the hash key,
136 and at the time of reading of a page update the pointers accordingly.
137 Drawbacks of this solution are added complexity and,
138 possibly, extra space required on non-leaf pages for memory pointers.
139 A simpler solution is just to speed up the hash table mechanism
140 in the database, using tables whose size is a power of 2.
141 
142  Lists of blocks
143  ---------------
144 
145 There are several lists of control blocks.
146 
147 The free list (buf_pool->free) contains blocks which are currently not
148 used.
149 
150 The common LRU list contains all the blocks holding a file page
151 except those for which the bufferfix count is non-zero.
152 The pages are in the LRU list roughly in the order of the last
153 access to the page, so that the oldest pages are at the end of the
154 list. We also keep a pointer to near the end of the LRU list,
155 which we can use when we want to artificially age a page in the
156 buf_pool. This is used if we know that some page is not needed
157 again for some time: we insert the block right after the pointer,
158 causing it to be replaced sooner than would normally be the case.
159 Currently this aging mechanism is used for read-ahead mechanism
160 of pages, and it can also be used when there is a scan of a full
161 table which cannot fit in the memory. Putting the pages near the
162 end of the LRU list, we make sure that most of the buf_pool stays
163 in the main memory, undisturbed.
164 
165 The unzip_LRU list contains a subset of the common LRU list. The
166 blocks on the unzip_LRU list hold a compressed file page and the
167 corresponding uncompressed page frame. A block is in unzip_LRU if and
168 only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
169 holds. The blocks in unzip_LRU will be in same order as they are in
170 the common LRU list. That is, each manipulation of the common LRU
171 list will result in the same manipulation of the unzip_LRU list.
172 
173 The chain of modified blocks (buf_pool->flush_list) contains the blocks
174 holding file pages that have been modified in the memory
175 but not written to disk yet. The block with the oldest modification
176 which has not yet been written to disk is at the end of the chain.
177 The access to this list is protected by buf_pool->flush_list_mutex.
178 
179 The chain of unmodified compressed blocks (buf_pool->zip_clean)
180 contains the control blocks (buf_page_t) of those compressed pages
181 that are not in buf_pool->flush_list and for which no uncompressed
182 page has been allocated in the buffer pool. The control blocks for
183 uncompressed pages are accessible via buf_block_t objects that are
184 reachable via buf_pool->chunks[].
185 
186 The chains of free memory blocks (buf_pool->zip_free[]) are used by
187 the buddy allocator (buf0buddy.cc) to keep track of currently unused
188 memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
189 blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
190 BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
191 pool. The buddy allocator is solely used for allocating control
192 blocks for compressed pages (buf_page_t) and compressed page frames.
193 
194  Loading a file page
195  -------------------
196 
197 First, a victim block for replacement has to be found in the
198 buf_pool. It is taken from the free list or searched for from the
199 end of the LRU-list. An exclusive lock is reserved for the frame,
200 the io_fix field is set in the block fixing the block in buf_pool,
201 and the io-operation for loading the page is queued. The io-handler thread
202 releases the X-lock on the frame and resets the io_fix field
203 when the io operation completes.
204 
205 A thread may request the above operation using the function
206 buf_page_get(). It may then continue to request a lock on the frame.
207 The lock is granted when the io-handler releases the x-lock.
208 
209  Read-ahead
210  ----------
211 
212 The read-ahead mechanism is intended to be intelligent and
213 isolated from the semantically higher levels of the database
214 index management. From the higher level we only need the
215 information if a file page has a natural successor or
216 predecessor page. On the leaf level of a B-tree index,
217 these are the next and previous pages in the natural
218 order of the pages.
219 
220 Let us first explain the read-ahead mechanism when the leafs
221 of a B-tree are scanned in an ascending or descending order.
222 When a read page is the first time referenced in the buf_pool,
223 the buffer manager checks if it is at the border of a so-called
224 linear read-ahead area. The tablespace is divided into these
225 areas of size 64 blocks, for example. So if the page is at the
226 border of such an area, the read-ahead mechanism checks if
227 all the other blocks in the area have been accessed in an
228 ascending or descending order. If this is the case, the system
229 looks at the natural successor or predecessor of the page,
230 checks if that is at the border of another area, and in this case
231 issues read-requests for all the pages in that area. Maybe
232 we could relax the condition that all the pages in the area
233 have to be accessed: if data is deleted from a table, there may
234 appear holes of unused pages in the area.
235 
236 A different read-ahead mechanism is used when there appears
237 to be a random access pattern to a file.
238 If a new page is referenced in the buf_pool, and several pages
239 of its random access area (for instance, 32 consecutive pages
240 in a tablespace) have recently been referenced, we may predict
241 that the whole area may be needed in the near future, and issue
242 the read requests for the whole area.
243 */
244 
245 #ifndef UNIV_HOTBACKUP
246 
247 static const int WAIT_FOR_READ = 100;
249 static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
250 
253 
254 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
255 static ulint buf_dbg_counter = 0;
258 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
259 #ifdef UNIV_DEBUG
260 
262 UNIV_INTERN ibool buf_debug_prints = FALSE;
263 #endif /* UNIV_DEBUG */
264 
265 #ifdef UNIV_PFS_RWLOCK
266 /* Keys to register buffer block related rwlocks and mutexes with
267 performance schema */
268 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
269 # ifdef UNIV_SYNC_DEBUG
270 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
271 # endif /* UNIV_SYNC_DEBUG */
272 #endif /* UNIV_PFS_RWLOCK */
273 
274 #ifdef UNIV_PFS_MUTEX
275 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
276 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
277 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
278 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
279 #endif /* UNIV_PFS_MUTEX */
280 
281 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
283 
284 /* Buffer block mutexes and rwlocks can be registered
285 in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286 is defined, register buffer block mutex and rwlock
287 in one group after their initialization. */
288 # define PFS_GROUP_BUFFER_SYNC
289 
290 /* This define caps the number of mutexes/rwlocks can
291 be registered with performance schema. Developers can
292 modify this define if necessary. Please note, this would
293 be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294 # define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
295 
296 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297 #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
298 
301 #define MONITOR_RW_COUNTER(io_type, counter) \
302  ((io_type == BUF_IO_READ) \
303  ? (counter##_READ) \
304  : (counter##_WRITTEN))
305 
306 /********************************************************************/
310 UNIV_INTERN
311 lsn_t
313 /*==================================*/
314 {
315  ulint i;
316  buf_page_t* bpage;
317  lsn_t lsn = 0;
318  lsn_t oldest_lsn = 0;
319 
320  /* When we traverse all the flush lists we don't want another
321  thread to add a dirty page to any flush list. */
323 
324  for (i = 0; i < srv_buf_pool_instances; i++) {
325  buf_pool_t* buf_pool;
326 
327  buf_pool = buf_pool_from_array(i);
328 
329  buf_flush_list_mutex_enter(buf_pool);
330 
331  bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
332 
333  if (bpage != NULL) {
334  ut_ad(bpage->in_flush_list);
335  lsn = bpage->oldest_modification;
336  }
337 
338  buf_flush_list_mutex_exit(buf_pool);
339 
340  if (!oldest_lsn || oldest_lsn > lsn) {
341  oldest_lsn = lsn;
342  }
343  }
344 
346 
347  /* The returned answer may be out of date: the flush_list can
348  change after the mutex has been released. */
349 
350  return(oldest_lsn);
351 }
352 
353 /********************************************************************/
355 UNIV_INTERN
356 void
358 /*===================*/
359  ulint* LRU_len,
360  ulint* free_len,
361  ulint* flush_list_len)
362 {
363  ulint i;
364 
365  *LRU_len = 0;
366  *free_len = 0;
367  *flush_list_len = 0;
368 
369  for (i = 0; i < srv_buf_pool_instances; i++) {
370  buf_pool_t* buf_pool;
371 
372  buf_pool = buf_pool_from_array(i);
373 
374  *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
375  *free_len += UT_LIST_GET_LEN(buf_pool->free);
376  *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
377  }
378 }
379 
380 /********************************************************************/
382 UNIV_INTERN
383 void
385 /*=============================*/
386  buf_pools_list_size_t* buf_pools_list_size)
388 {
389  ut_ad(buf_pools_list_size);
390  memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
391 
392  for (ulint i = 0; i < srv_buf_pool_instances; i++) {
393  buf_pool_t* buf_pool;
394 
395  buf_pool = buf_pool_from_array(i);
396  /* We don't need mutex protection since this is
397  for statistics purpose */
398  buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
399  buf_pools_list_size->unzip_LRU_bytes +=
400  UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
401  buf_pools_list_size->flush_list_bytes +=
402  buf_pool->stat.flush_list_bytes;
403  }
404 }
405 
406 /********************************************************************/
408 UNIV_INTERN
409 void
411 /*===============*/
412  buf_pool_stat_t* tot_stat)
413 {
414  ulint i;
415 
416  memset(tot_stat, 0, sizeof(*tot_stat));
417 
418  for (i = 0; i < srv_buf_pool_instances; i++) {
419  buf_pool_stat_t*buf_stat;
420  buf_pool_t* buf_pool;
421 
422  buf_pool = buf_pool_from_array(i);
423 
424  buf_stat = &buf_pool->stat;
425  tot_stat->n_page_gets += buf_stat->n_page_gets;
426  tot_stat->n_pages_read += buf_stat->n_pages_read;
427  tot_stat->n_pages_written += buf_stat->n_pages_written;
428  tot_stat->n_pages_created += buf_stat->n_pages_created;
429  tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd;
430  tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
431  tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
432  tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
433 
434  tot_stat->n_pages_not_made_young +=
435  buf_stat->n_pages_not_made_young;
436  }
437 }
438 
439 /********************************************************************/
442 UNIV_INTERN
445 /*============*/
446  buf_pool_t* buf_pool)
449 {
451  ulint index;
452  static ulint buf_pool_index;
453 
454  if (buf_pool == NULL) {
455  /* We are allocating memory from any buffer pool, ensure
456  we spread the grace on all buffer pool instances. */
457  index = buf_pool_index++ % srv_buf_pool_instances;
458  buf_pool = buf_pool_from_array(index);
459  }
460 
461  block = buf_LRU_get_free_block(buf_pool);
462 
464 
465  return(block);
466 }
467 #endif /* !UNIV_HOTBACKUP */
468 
469 /********************************************************************/
472 UNIV_INTERN
473 ibool
475 /*==================*/
476  bool check_lsn,
478  const byte* read_buf,
479  ulint zip_size)
481 {
482  ulint checksum_field1;
483  ulint checksum_field2;
484  ibool crc32_inited = FALSE;
485  ib_uint32_t crc32 = ULINT32_UNDEFINED;
486 
487  if (!zip_size
488  && memcmp(read_buf + FIL_PAGE_LSN + 4,
489  read_buf + UNIV_PAGE_SIZE
490  - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
491 
492  /* Stored log sequence numbers at the start and the end
493  of page do not match */
494 
495  return(TRUE);
496  }
497 
498 #ifndef UNIV_HOTBACKUP
499  if (check_lsn && recv_lsn_checks_on) {
500  lsn_t current_lsn;
501 
502  /* Since we are going to reset the page LSN during the import
503  phase it makes no sense to spam the log with error messages. */
504 
505  if (log_peek_lsn(&current_lsn)
506  && current_lsn
507  < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
508  ut_print_timestamp(stderr);
509 
510  fprintf(stderr,
511  " InnoDB: Error: page %lu log sequence number"
512  " " LSN_PF "\n"
513  "InnoDB: is in the future! Current system "
514  "log sequence number " LSN_PF ".\n"
515  "InnoDB: Your database may be corrupt or "
516  "you may have copied the InnoDB\n"
517  "InnoDB: tablespace but not the InnoDB "
518  "log files. See\n"
519  "InnoDB: " REFMAN
520  "forcing-innodb-recovery.html\n"
521  "InnoDB: for more information.\n",
522  (ulong) mach_read_from_4(
523  read_buf + FIL_PAGE_OFFSET),
524  (lsn_t) mach_read_from_8(
525  read_buf + FIL_PAGE_LSN),
526  current_lsn);
527  }
528  }
529 #endif
530 
531  /* Check whether the checksum fields have correct values */
532 
534  return(FALSE);
535  }
536 
537  if (zip_size) {
538  return(!page_zip_verify_checksum(read_buf, zip_size));
539  }
540 
541  checksum_field1 = mach_read_from_4(
542  read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
543 
544  checksum_field2 = mach_read_from_4(
545  read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
546 
547  /* declare empty pages non-corrupted */
548  if (checksum_field1 == 0 && checksum_field2 == 0
549  && mach_read_from_4(read_buf + FIL_PAGE_LSN) == 0) {
550  /* make sure that the page is really empty */
551  ut_d(for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) {
552  ut_a(read_buf[i] == 0); });
553 
554  return(FALSE);
555  }
556 
559 
560  crc32 = buf_calc_page_crc32(read_buf);
561 
562  return(checksum_field1 != crc32 || checksum_field2 != crc32);
563 
565 
566  return(checksum_field1
567  != buf_calc_page_new_checksum(read_buf)
568  || checksum_field2
569  != buf_calc_page_old_checksum(read_buf));
570 
572 
573  return(checksum_field1 != BUF_NO_CHECKSUM_MAGIC
574  || checksum_field2 != BUF_NO_CHECKSUM_MAGIC);
575 
578  /* There are 3 valid formulas for
579  checksum_field2 (old checksum field):
580 
581  1. Very old versions of InnoDB only stored 8 byte lsn to the
582  start and the end of the page.
583 
584  2. InnoDB versions before MySQL 5.6.3 store the old formula
585  checksum (buf_calc_page_old_checksum()).
586 
587  3. InnoDB versions 5.6.3 and newer with
588  innodb_checksum_algorithm=strict_crc32|crc32 store CRC32. */
589 
590  /* since innodb_checksum_algorithm is not strict_* allow
591  any of the algos to match for the old field */
592 
593  if (checksum_field2
594  != mach_read_from_4(read_buf + FIL_PAGE_LSN)
595  && checksum_field2 != BUF_NO_CHECKSUM_MAGIC) {
596 
597  /* The checksum does not match any of the
598  fast to check. First check the selected algorithm
599  for writing checksums because we assume that the
600  chance of it matching is higher. */
601 
604 
605  crc32 = buf_calc_page_crc32(read_buf);
606  crc32_inited = TRUE;
607 
608  if (checksum_field2 != crc32
609  && checksum_field2
610  != buf_calc_page_old_checksum(read_buf)) {
611 
612  return(TRUE);
613  }
614  } else {
617 
618  if (checksum_field2
619  != buf_calc_page_old_checksum(read_buf)) {
620 
621  crc32 = buf_calc_page_crc32(read_buf);
622  crc32_inited = TRUE;
623 
624  if (checksum_field2 != crc32) {
625  return(TRUE);
626  }
627  }
628  }
629  }
630 
631  /* old field is fine, check the new field */
632 
633  /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
634  (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
635 
636  if (checksum_field1 != 0
637  && checksum_field1 != BUF_NO_CHECKSUM_MAGIC) {
638 
639  /* The checksum does not match any of the
640  fast to check. First check the selected algorithm
641  for writing checksums because we assume that the
642  chance of it matching is higher. */
643 
646 
647  if (!crc32_inited) {
648  crc32 = buf_calc_page_crc32(read_buf);
649  crc32_inited = TRUE;
650  }
651 
652  if (checksum_field1 != crc32
653  && checksum_field1
654  != buf_calc_page_new_checksum(read_buf)) {
655 
656  return(TRUE);
657  }
658  } else {
661 
662  if (checksum_field1
663  != buf_calc_page_new_checksum(read_buf)) {
664 
665  if (!crc32_inited) {
666  crc32 = buf_calc_page_crc32(
667  read_buf);
668  crc32_inited = TRUE;
669  }
670 
671  if (checksum_field1 != crc32) {
672  return(TRUE);
673  }
674  }
675  }
676  }
677 
678  /* If CRC32 is stored in at least one of the fields, then the
679  other field must also be CRC32 */
680  if (crc32_inited
681  && ((checksum_field1 == crc32
682  && checksum_field2 != crc32)
683  || (checksum_field1 != crc32
684  && checksum_field2 == crc32))) {
685 
686  return(TRUE);
687  }
688 
689  break;
691  /* should have returned FALSE earlier */
692  ut_error;
693  /* no default so the compiler will emit a warning if new enum
694  is added and not handled here */
695  }
696 
697  DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
698 
699  return(FALSE);
700 }
701 
702 /********************************************************************/
704 UNIV_INTERN
705 void
707 /*===========*/
708  const byte* read_buf,
709  ulint zip_size,
711  ulint flags)
715 {
716 #ifndef UNIV_HOTBACKUP
718 #endif /* !UNIV_HOTBACKUP */
719  ulint size = zip_size;
720 
721  if (!size) {
722  size = UNIV_PAGE_SIZE;
723  }
724 
725  if (!(flags & BUF_PAGE_PRINT_NO_FULL)) {
726  ut_print_timestamp(stderr);
727  fprintf(stderr,
728  " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
729  (ulong) size);
730  ut_print_buf(stderr, read_buf, size);
731  fputs("\nInnoDB: End of page dump\n", stderr);
732  }
733 
734  if (zip_size) {
735  /* Print compressed page. */
736  ut_print_timestamp(stderr);
737  fprintf(stderr,
738  " InnoDB: Compressed page type (" ULINTPF "); "
739  "stored checksum in field1 " ULINTPF "; "
740  "calculated checksums for field1: "
741  "%s " ULINTPF ", "
742  "%s " ULINTPF ", "
743  "%s " ULINTPF "; "
744  "page LSN " LSN_PF "; "
745  "page number (if stored to page already) " ULINTPF "; "
746  "space id (if stored to page already) " ULINTPF "\n",
747  fil_page_get_type(read_buf),
751  page_zip_calc_checksum(read_buf, zip_size,
755  page_zip_calc_checksum(read_buf, zip_size,
759  page_zip_calc_checksum(read_buf, zip_size,
761  mach_read_from_8(read_buf + FIL_PAGE_LSN),
762  mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
763  mach_read_from_4(read_buf
765  } else {
766  ut_print_timestamp(stderr);
767  fprintf(stderr, " InnoDB: uncompressed page, "
768  "stored checksum in field1 " ULINTPF ", "
769  "calculated checksums for field1: "
770  "%s " UINT32PF ", "
771  "%s " ULINTPF ", "
772  "%s " ULINTPF ", "
773 
774  "stored checksum in field2 " ULINTPF ", "
775  "calculated checksums for field2: "
776  "%s " UINT32PF ", "
777  "%s " ULINTPF ", "
778  "%s " ULINTPF ", "
779 
780  "page LSN " ULINTPF " " ULINTPF ", "
781  "low 4 bytes of LSN at page end " ULINTPF ", "
782  "page number (if stored to page already) " ULINTPF ", "
783  "space id (if created with >= MySQL-4.1.1 "
784  "and stored already) %lu\n",
787  buf_calc_page_crc32(read_buf),
789  buf_calc_page_new_checksum(read_buf),
792 
793  mach_read_from_4(read_buf + UNIV_PAGE_SIZE
796  buf_calc_page_crc32(read_buf),
798  buf_calc_page_old_checksum(read_buf),
801 
802  mach_read_from_4(read_buf + FIL_PAGE_LSN),
803  mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
804  mach_read_from_4(read_buf + UNIV_PAGE_SIZE
806  mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
807  mach_read_from_4(read_buf
809  }
810 
811 #ifndef UNIV_HOTBACKUP
813  == TRX_UNDO_INSERT) {
814  fprintf(stderr,
815  "InnoDB: Page may be an insert undo log page\n");
816  } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
818  == TRX_UNDO_UPDATE) {
819  fprintf(stderr,
820  "InnoDB: Page may be an update undo log page\n");
821  }
822 #endif /* !UNIV_HOTBACKUP */
823 
824  switch (fil_page_get_type(read_buf)) {
825  index_id_t index_id;
826  case FIL_PAGE_INDEX:
827  index_id = btr_page_get_index_id(read_buf);
828  fprintf(stderr,
829  "InnoDB: Page may be an index page where"
830  " index id is %llu\n",
831  (ullint) index_id);
832 #ifndef UNIV_HOTBACKUP
833  index = dict_index_find_on_id_low(index_id);
834  if (index) {
835  fputs("InnoDB: (", stderr);
836  dict_index_name_print(stderr, NULL, index);
837  fputs(")\n", stderr);
838  }
839 #endif /* !UNIV_HOTBACKUP */
840  break;
841  case FIL_PAGE_INODE:
842  fputs("InnoDB: Page may be an 'inode' page\n", stderr);
843  break;
845  fputs("InnoDB: Page may be an insert buffer free list page\n",
846  stderr);
847  break;
849  fputs("InnoDB: Page may be a freshly allocated page\n",
850  stderr);
851  break;
853  fputs("InnoDB: Page may be an insert buffer bitmap page\n",
854  stderr);
855  break;
856  case FIL_PAGE_TYPE_SYS:
857  fputs("InnoDB: Page may be a system page\n",
858  stderr);
859  break;
861  fputs("InnoDB: Page may be a transaction system page\n",
862  stderr);
863  break;
865  fputs("InnoDB: Page may be a file space header page\n",
866  stderr);
867  break;
868  case FIL_PAGE_TYPE_XDES:
869  fputs("InnoDB: Page may be an extent descriptor page\n",
870  stderr);
871  break;
872  case FIL_PAGE_TYPE_BLOB:
873  fputs("InnoDB: Page may be a BLOB page\n",
874  stderr);
875  break;
876  case FIL_PAGE_TYPE_ZBLOB:
878  fputs("InnoDB: Page may be a compressed BLOB page\n",
879  stderr);
880  break;
881  }
882 
884 }
885 
886 #ifndef UNIV_HOTBACKUP
887 
888 # ifdef PFS_GROUP_BUFFER_SYNC
889 /********************************************************************/
895 static
896 void
897 pfs_register_buffer_block(
898 /*======================*/
899  buf_chunk_t* chunk)
900 {
901  ulint i;
902  ulint num_to_register;
904 
905  block = chunk->blocks;
906 
907  num_to_register = ut_min(chunk->size,
908  PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
909 
910  for (i = 0; i < num_to_register; i++) {
911  ib_mutex_t* mutex;
912  rw_lock_t* rwlock;
913 
914 # ifdef UNIV_PFS_MUTEX
915  mutex = &block->mutex;
916  ut_a(!mutex->pfs_psi);
917  mutex->pfs_psi = (PSI_server)
918  ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
919  : NULL;
920 # endif /* UNIV_PFS_MUTEX */
921 
922 # ifdef UNIV_PFS_RWLOCK
923  rwlock = &block->lock;
924  ut_a(!rwlock->pfs_psi);
925  rwlock->pfs_psi = (PSI_server)
926  ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
927  : NULL;
928 
929 # ifdef UNIV_SYNC_DEBUG
930  rwlock = &block->debug_latch;
931  ut_a(!rwlock->pfs_psi);
932  rwlock->pfs_psi = (PSI_server)
933  ? PSI_server->init_rwlock(buf_block_debug_latch_key,
934  rwlock)
935  : NULL;
936 # endif /* UNIV_SYNC_DEBUG */
937 
938 # endif /* UNIV_PFS_RWLOCK */
939  block++;
940  }
941 }
942 # endif /* PFS_GROUP_BUFFER_SYNC */
943 
944 /********************************************************************/
946 static
947 void
948 buf_block_init(
949 /*===========*/
950  buf_pool_t* buf_pool,
951  buf_block_t* block,
952  byte* frame)
953 {
954  UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
955 
956  block->frame = frame;
957 
958  block->page.buf_pool_index = buf_pool_index(buf_pool);
959  block->page.state = BUF_BLOCK_NOT_USED;
960  block->page.buf_fix_count = 0;
961  block->page.io_fix = BUF_IO_NONE;
962 
963  block->modify_clock = 0;
964 
965 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
966  block->page.file_page_was_freed = FALSE;
967 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
968 
969  block->check_index_page_at_flush = FALSE;
970  block->index = NULL;
971 
972 #ifdef UNIV_DEBUG
973  block->page.in_page_hash = FALSE;
974  block->page.in_zip_hash = FALSE;
975  block->page.in_flush_list = FALSE;
976  block->page.in_free_list = FALSE;
977  block->page.in_LRU_list = FALSE;
978  block->in_unzip_LRU_list = FALSE;
979 #endif /* UNIV_DEBUG */
980 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
981  block->n_pointers = 0;
982 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
983  page_zip_des_init(&block->page.zip);
984 
985 #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
986  /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
987  of buffer block mutex/rwlock with performance schema. If
988  PFS_GROUP_BUFFER_SYNC is defined, skip the registration
989  since buffer block mutex/rwlock will be registered later in
990  pfs_register_buffer_block() */
991 
992  mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
993  rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
994 
995 # ifdef UNIV_SYNC_DEBUG
996  rw_lock_create(PFS_NOT_INSTRUMENTED,
997  &block->debug_latch, SYNC_NO_ORDER_CHECK);
998 # endif /* UNIV_SYNC_DEBUG */
999 
1000 #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
1001  mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
1002  rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
1003 
1004 # ifdef UNIV_SYNC_DEBUG
1005  rw_lock_create(buf_block_debug_latch_key,
1006  &block->debug_latch, SYNC_NO_ORDER_CHECK);
1007 # endif /* UNIV_SYNC_DEBUG */
1008 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
1009 
1010  ut_ad(rw_lock_validate(&(block->lock)));
1011 }
1012 
1013 /********************************************************************/
1016 static
1017 buf_chunk_t*
1018 buf_chunk_init(
1019 /*===========*/
1020  buf_pool_t* buf_pool,
1021  buf_chunk_t* chunk,
1022  ulint mem_size)
1023 {
1024  buf_block_t* block;
1025  byte* frame;
1026  ulint i;
1027 
1028  /* Round down to a multiple of page size,
1029  although it already should be. */
1030  mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
1031  /* Reserve space for the block descriptors. */
1032  mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
1033  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
1034 
1035  chunk->mem_size = mem_size;
1036  chunk->mem = os_mem_alloc_large(&chunk->mem_size);
1037 
1038  if (UNIV_UNLIKELY(chunk->mem == NULL)) {
1039 
1040  return(NULL);
1041  }
1042 
1043  /* Allocate the block descriptors from
1044  the start of the memory block. */
1045  chunk->blocks = (buf_block_t*) chunk->mem;
1046 
1047  /* Align a pointer to the first frame. Note that when
1048  os_large_page_size is smaller than UNIV_PAGE_SIZE,
1049  we may allocate one fewer block than requested. When
1050  it is bigger, we may allocate more blocks than requested. */
1051 
1052  frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE);
1053  chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
1054  - (frame != chunk->mem);
1055 
1056  /* Subtract the space needed for block descriptors. */
1057  {
1058  ulint size = chunk->size;
1059 
1060  while (frame < (byte*) (chunk->blocks + size)) {
1061  frame += UNIV_PAGE_SIZE;
1062  size--;
1063  }
1064 
1065  chunk->size = size;
1066  }
1067 
1068  /* Init block structs and assign frames for them. Then we
1069  assign the frames to the first blocks (we already mapped the
1070  memory above). */
1071 
1072  block = chunk->blocks;
1073 
1074  for (i = chunk->size; i--; ) {
1075 
1076  buf_block_init(buf_pool, block, frame);
1077  UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
1078 
1079  /* Add the block to the free list */
1080  UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
1081 
1082  ut_d(block->page.in_free_list = TRUE);
1083  ut_ad(buf_pool_from_block(block) == buf_pool);
1084 
1085  block++;
1086  frame += UNIV_PAGE_SIZE;
1087  }
1088 
1089 #ifdef PFS_GROUP_BUFFER_SYNC
1090  pfs_register_buffer_block(chunk);
1091 #endif
1092  return(chunk);
1093 }
1094 
1095 #ifdef UNIV_DEBUG
1096 /*********************************************************************/
1100 static
1101 buf_block_t*
1102 buf_chunk_contains_zip(
1103 /*===================*/
1104  buf_chunk_t* chunk,
1105  const void* data)
1106 {
1107  buf_block_t* block;
1108  ulint i;
1109 
1110  block = chunk->blocks;
1111 
1112  for (i = chunk->size; i--; block++) {
1113  if (block->page.zip.data == data) {
1114 
1115  return(block);
1116  }
1117  }
1118 
1119  return(NULL);
1120 }
1121 
1122 /*********************************************************************/
1126 UNIV_INTERN
1127 buf_block_t*
1128 buf_pool_contains_zip(
1129 /*==================*/
1130  buf_pool_t* buf_pool,
1131  const void* data)
1132 {
1133  ulint n;
1134  buf_chunk_t* chunk = buf_pool->chunks;
1135 
1136  ut_ad(buf_pool);
1137  ut_ad(buf_pool_mutex_own(buf_pool));
1138  for (n = buf_pool->n_chunks; n--; chunk++) {
1139 
1140  buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1141 
1142  if (block) {
1143  return(block);
1144  }
1145  }
1146 
1147  return(NULL);
1148 }
1149 #endif /* UNIV_DEBUG */
1150 
1151 /*********************************************************************/
1154 static
1155 const buf_block_t*
1156 buf_chunk_not_freed(
1157 /*================*/
1158  buf_chunk_t* chunk)
1159 {
1160  buf_block_t* block;
1161  ulint i;
1162 
1163  block = chunk->blocks;
1164 
1165  for (i = chunk->size; i--; block++) {
1166  ibool ready;
1167 
1168  switch (buf_block_get_state(block)) {
1169  case BUF_BLOCK_POOL_WATCH:
1170  case BUF_BLOCK_ZIP_PAGE:
1171  case BUF_BLOCK_ZIP_DIRTY:
1172  /* The uncompressed buffer pool should never
1173  contain compressed block descriptors. */
1174  ut_error;
1175  break;
1176  case BUF_BLOCK_NOT_USED:
1178  case BUF_BLOCK_MEMORY:
1179  case BUF_BLOCK_REMOVE_HASH:
1180  /* Skip blocks that are not being used for
1181  file pages. */
1182  break;
1183  case BUF_BLOCK_FILE_PAGE:
1184  mutex_enter(&block->mutex);
1185  ready = buf_flush_ready_for_replace(&block->page);
1186  mutex_exit(&block->mutex);
1187 
1188  if (!ready) {
1189 
1190  return(block);
1191  }
1192 
1193  break;
1194  }
1195  }
1196 
1197  return(NULL);
1198 }
1199 
1200 /********************************************************************/
1202 static
1203 void
1204 buf_pool_set_sizes(void)
1205 /*====================*/
1206 {
1207  ulint i;
1208  ulint curr_size = 0;
1209 
1211 
1212  for (i = 0; i < srv_buf_pool_instances; i++) {
1213  buf_pool_t* buf_pool;
1214 
1215  buf_pool = buf_pool_from_array(i);
1216  curr_size += buf_pool->curr_pool_size;
1217  }
1218 
1219  srv_buf_pool_curr_size = curr_size;
1221 
1223 }
1224 
1225 /********************************************************************/
1228 UNIV_INTERN
1229 ulint
1231 /*===================*/
1232  buf_pool_t* buf_pool,
1233  ulint buf_pool_size,
1234  ulint instance_no)
1235 {
1236  ulint i;
1237  buf_chunk_t* chunk;
1238 
1239  /* 1. Initialize general fields
1240  ------------------------------- */
1241  mutex_create(buf_pool_mutex_key,
1242  &buf_pool->mutex, SYNC_BUF_POOL);
1243  mutex_create(buf_pool_zip_mutex_key,
1244  &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1245 
1246  buf_pool_mutex_enter(buf_pool);
1247 
1248  if (buf_pool_size > 0) {
1249  buf_pool->n_chunks = 1;
1250 
1251  buf_pool->chunks = chunk =
1252  (buf_chunk_t*) mem_zalloc(sizeof *chunk);
1253 
1254  UT_LIST_INIT(buf_pool->free);
1255 
1256  if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1257  mem_free(chunk);
1258  mem_free(buf_pool);
1259 
1260  buf_pool_mutex_exit(buf_pool);
1261 
1262  return(DB_ERROR);
1263  }
1264 
1265  buf_pool->instance_no = instance_no;
1266  buf_pool->old_pool_size = buf_pool_size;
1267  buf_pool->curr_size = chunk->size;
1268  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1269 
1270  /* Number of locks protecting page_hash must be a
1271  power of two */
1276 
1277  buf_pool->page_hash = ha_create(2 * buf_pool->curr_size,
1279  MEM_HEAP_FOR_PAGE_HASH,
1280  SYNC_BUF_PAGE_HASH);
1281 
1282  buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1283 
1284  buf_pool->last_printout_time = ut_time();
1285  }
1286  /* 2. Initialize flushing fields
1287  -------------------------------- */
1288 
1289  mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1290  SYNC_BUF_FLUSH_LIST);
1291 
1292  for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1293  buf_pool->no_flush[i] = os_event_create();
1294  }
1295 
1296  buf_pool->watch = (buf_page_t*) mem_zalloc(
1297  sizeof(*buf_pool->watch) * BUF_POOL_WATCH_SIZE);
1298 
1299  /* All fields are initialized by mem_zalloc(). */
1300 
1301  buf_pool->try_LRU_scan = TRUE;
1302 
1303  buf_pool_mutex_exit(buf_pool);
1304 
1305  return(DB_SUCCESS);
1306 }
1307 
1308 /********************************************************************/
1310 static
1311 void
1312 buf_pool_free_instance(
1313 /*===================*/
1314  buf_pool_t* buf_pool) /* in,own: buffer pool instance
1315  to free */
1316 {
1317  buf_chunk_t* chunk;
1318  buf_chunk_t* chunks;
1319  buf_page_t* bpage;
1320 
1321  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1322  while (bpage != NULL) {
1323  buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
1324  enum buf_page_state state = buf_page_get_state(bpage);
1325 
1326  ut_ad(buf_page_in_file(bpage));
1327  ut_ad(bpage->in_LRU_list);
1328 
1329  if (state != BUF_BLOCK_FILE_PAGE) {
1330  /* We must not have any dirty block except
1331  when doing a fast shutdown. */
1332  ut_ad(state == BUF_BLOCK_ZIP_PAGE
1333  || srv_fast_shutdown == 2);
1334  buf_page_free_descriptor(bpage);
1335  }
1336 
1337  bpage = prev_bpage;
1338  }
1339 
1340  mem_free(buf_pool->watch);
1341  buf_pool->watch = NULL;
1342 
1343  chunks = buf_pool->chunks;
1344  chunk = chunks + buf_pool->n_chunks;
1345 
1346  while (--chunk >= chunks) {
1347  os_mem_free_large(chunk->mem, chunk->mem_size);
1348  }
1349 
1350  mem_free(buf_pool->chunks);
1351  ha_clear(buf_pool->page_hash);
1352  hash_table_free(buf_pool->page_hash);
1353  hash_table_free(buf_pool->zip_hash);
1354 }
1355 
1356 /********************************************************************/
1359 UNIV_INTERN
1360 dberr_t
1362 /*==========*/
1363  ulint total_size,
1364  ulint n_instances)
1365 {
1366  ulint i;
1367  const ulint size = total_size / n_instances;
1368 
1369  ut_ad(n_instances > 0);
1370  ut_ad(n_instances <= MAX_BUFFER_POOLS);
1371  ut_ad(n_instances == srv_buf_pool_instances);
1372 
1374  n_instances * sizeof *buf_pool_ptr);
1375 
1376  for (i = 0; i < n_instances; i++) {
1377  buf_pool_t* ptr = &buf_pool_ptr[i];
1378 
1379  if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1380 
1381  /* Free all the instances created so far. */
1382  buf_pool_free(i);
1383 
1384  return(DB_ERROR);
1385  }
1386  }
1387 
1388  buf_pool_set_sizes();
1389  buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1390 
1391  btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1392 
1393  return(DB_SUCCESS);
1394 }
1395 
1396 /********************************************************************/
1399 UNIV_INTERN
1400 void
1402 /*==========*/
1403  ulint n_instances)
1404 {
1405  ulint i;
1406 
1407  for (i = 0; i < n_instances; i++) {
1408  buf_pool_free_instance(buf_pool_from_array(i));
1409  }
1410 
1412  buf_pool_ptr = NULL;
1413 }
1414 
1415 /********************************************************************/
1417 UNIV_INTERN
1418 void
1420 /*===========================*/
1421 {
1422  ulint p;
1423 
1424 #ifdef UNIV_SYNC_DEBUG
1425  ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1426 #endif /* UNIV_SYNC_DEBUG */
1428 
1429  for (p = 0; p < srv_buf_pool_instances; p++) {
1430  buf_pool_t* buf_pool = buf_pool_from_array(p);
1431  buf_chunk_t* chunks = buf_pool->chunks;
1432  buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1433 
1434  while (--chunk >= chunks) {
1435  buf_block_t* block = chunk->blocks;
1436  ulint i = chunk->size;
1437 
1438  for (; i--; block++) {
1439  dict_index_t* index = block->index;
1440 
1441  /* We can set block->index = NULL
1442  when we have an x-latch on btr_search_latch;
1443  see the comment in buf0buf.h */
1444 
1445  if (!index) {
1446  /* Not hashed */
1447  continue;
1448  }
1449 
1450  block->index = NULL;
1451 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1452  block->n_pointers = 0;
1453 # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1454  }
1455  }
1456  }
1457 }
1458 
1459 /********************************************************************/
1463 UNIV_INTERN
1464 void
1466 /*=========*/
1467  buf_page_t* bpage,
1470  buf_page_t* dpage)
1471 {
1472  buf_page_t* b;
1473  ulint fold;
1474  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1475 
1476  fold = buf_page_address_fold(bpage->space, bpage->offset);
1477 
1478  ut_ad(buf_pool_mutex_own(buf_pool));
1479  ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
1480  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1482  ut_a(bpage->buf_fix_count == 0);
1483  ut_ad(bpage->in_LRU_list);
1484  ut_ad(!bpage->in_zip_hash);
1485  ut_ad(bpage->in_page_hash);
1486  ut_ad(bpage == buf_page_hash_get_low(buf_pool,
1487  bpage->space,
1488  bpage->offset,
1489  fold));
1490 
1491  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1492 #ifdef UNIV_DEBUG
1493  switch (buf_page_get_state(bpage)) {
1494  case BUF_BLOCK_POOL_WATCH:
1495  case BUF_BLOCK_NOT_USED:
1497  case BUF_BLOCK_FILE_PAGE:
1498  case BUF_BLOCK_MEMORY:
1499  case BUF_BLOCK_REMOVE_HASH:
1500  ut_error;
1501  case BUF_BLOCK_ZIP_DIRTY:
1502  case BUF_BLOCK_ZIP_PAGE:
1503  break;
1504  }
1505 #endif /* UNIV_DEBUG */
1506 
1507  memcpy(dpage, bpage, sizeof *dpage);
1508 
1509  ut_d(bpage->in_LRU_list = FALSE);
1510  ut_d(bpage->in_page_hash = FALSE);
1511 
1512  /* relocate buf_pool->LRU */
1513  b = UT_LIST_GET_PREV(LRU, bpage);
1514  UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1515 
1516  if (b) {
1517  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1518  } else {
1519  UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1520  }
1521 
1522  if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1523  buf_pool->LRU_old = dpage;
1524 #ifdef UNIV_LRU_DEBUG
1525  /* buf_pool->LRU_old must be the first item in the LRU list
1526  whose "old" flag is set. */
1527  ut_a(buf_pool->LRU_old->old);
1528  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1529  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1530  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1531  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1532  } else {
1533  /* Check that the "old" flag is consistent in
1534  the block and its neighbours. */
1535  buf_page_set_old(dpage, buf_page_is_old(dpage));
1536 #endif /* UNIV_LRU_DEBUG */
1537  }
1538 
1540  LRU, buf_page_t, buf_pool->LRU, CheckInLRUList()));
1541 
1542  /* relocate buf_pool->page_hash */
1543  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1544  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1545 }
1546 
1547 /********************************************************************/
1550 UNIV_INTERN
1551 ibool
1553 /*=======================*/
1554  buf_pool_t* buf_pool,
1555  const buf_page_t* bpage)
1556 {
1557  /* We must also own the appropriate hash lock. */
1558  ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
1559  ut_ad(buf_page_in_file(bpage));
1560 
1561  if (bpage < &buf_pool->watch[0]
1562  || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1563 
1565  || bpage->zip.data != NULL);
1566 
1567  return(FALSE);
1568  }
1569 
1571  ut_ad(!bpage->in_zip_hash);
1572  ut_ad(bpage->in_page_hash);
1573  ut_ad(bpage->zip.data == NULL);
1574  ut_ad(bpage->buf_fix_count > 0);
1575  return(TRUE);
1576 }
1577 
1578 /****************************************************************/
1583 UNIV_INTERN
1584 buf_page_t*
1586 /*===============*/
1587  ulint space,
1588  ulint offset,
1589  ulint fold)
1590 {
1591  buf_page_t* bpage;
1592  ulint i;
1593  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1594  rw_lock_t* hash_lock;
1595 
1596  hash_lock = buf_page_hash_lock_get(buf_pool, fold);
1597 
1598 #ifdef UNIV_SYNC_DEBUG
1599  ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
1600 #endif /* UNIV_SYNC_DEBUG */
1601 
1602  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1603 
1604  if (UNIV_LIKELY_NULL(bpage)) {
1605 page_found:
1606  if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1607  /* The page was loaded meanwhile. */
1608  return(bpage);
1609  }
1610  /* Add to an existing watch. */
1611  bpage->buf_fix_count++;
1612  return(NULL);
1613  }
1614 
1615  /* From this point this function becomes fairly heavy in terms
1616  of latching. We acquire the buf_pool mutex as well as all the
1617  hash_locks. buf_pool mutex is needed because any changes to
1618  the page_hash must be covered by it and hash_locks are needed
1619  because we don't want to read any stale information in
1620  buf_pool->watch[]. However, it is not in the critical code path
1621  as this function will be called only by the purge thread. */
1622 
1623 
1624  /* To obey latching order first release the hash_lock. */
1625  rw_lock_x_unlock(hash_lock);
1626 
1627  buf_pool_mutex_enter(buf_pool);
1628  hash_lock_x_all(buf_pool->page_hash);
1629 
1630  /* We have to recheck that the page
1631  was not loaded or a watch set by some other
1632  purge thread. This is because of the small
1633  time window between when we release the
1634  hash_lock to acquire buf_pool mutex above. */
1635 
1636  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1637  if (UNIV_LIKELY_NULL(bpage)) {
1638  buf_pool_mutex_exit(buf_pool);
1639  hash_unlock_x_all_but(buf_pool->page_hash, hash_lock);
1640  goto page_found;
1641  }
1642 
1643  for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1644  bpage = &buf_pool->watch[i];
1645 
1646  ut_ad(bpage->access_time == 0);
1647  ut_ad(bpage->newest_modification == 0);
1648  ut_ad(bpage->oldest_modification == 0);
1649  ut_ad(bpage->zip.data == NULL);
1650  ut_ad(!bpage->in_zip_hash);
1651 
1652  switch (bpage->state) {
1653  case BUF_BLOCK_POOL_WATCH:
1654  ut_ad(!bpage->in_page_hash);
1655  ut_ad(bpage->buf_fix_count == 0);
1656 
1657  /* bpage is pointing to buf_pool->watch[],
1658  which is protected by buf_pool->mutex.
1659  Normally, buf_page_t objects are protected by
1660  buf_block_t::mutex or buf_pool->zip_mutex or both. */
1661 
1662  bpage->state = BUF_BLOCK_ZIP_PAGE;
1663  bpage->space = space;
1664  bpage->offset = offset;
1665  bpage->buf_fix_count = 1;
1666 
1667  ut_d(bpage->in_page_hash = TRUE);
1668  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1669  fold, bpage);
1670 
1671  buf_pool_mutex_exit(buf_pool);
1672  /* Once the sentinel is in the page_hash we can
1673  safely release all locks except just the
1674  relevant hash_lock */
1675  hash_unlock_x_all_but(buf_pool->page_hash,
1676  hash_lock);
1677 
1678  return(NULL);
1679  case BUF_BLOCK_ZIP_PAGE:
1680  ut_ad(bpage->in_page_hash);
1681  ut_ad(bpage->buf_fix_count > 0);
1682  break;
1683  default:
1684  ut_error;
1685  }
1686  }
1687 
1688  /* Allocation failed. Either the maximum number of purge
1689  threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1690  should be modified to return a special non-NULL value and the
1691  caller should purge the record directly. */
1692  ut_error;
1693 
1694  /* Fix compiler warning */
1695  return(NULL);
1696 }
1697 
1698 /****************************************************************/
1703 static
1704 void
1705 buf_pool_watch_remove(
1706 /*==================*/
1707  buf_pool_t* buf_pool,
1708  ulint fold,
1710  buf_page_t* watch)
1711 {
1712 #ifdef UNIV_SYNC_DEBUG
1713  /* We must also own the appropriate hash_bucket mutex. */
1714  rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
1715  ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
1716 #endif /* UNIV_SYNC_DEBUG */
1717 
1718  ut_ad(buf_pool_mutex_own(buf_pool));
1719 
1720  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
1721  ut_d(watch->in_page_hash = FALSE);
1722  watch->buf_fix_count = 0;
1723  watch->state = BUF_BLOCK_POOL_WATCH;
1724 }
1725 
1726 /****************************************************************/
1729 UNIV_INTERN
1730 void
1732 /*=================*/
1733  ulint space,
1734  ulint offset)
1735 {
1736  buf_page_t* bpage;
1737  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1738  ulint fold = buf_page_address_fold(space, offset);
1739  rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool,
1740  fold);
1741 
1742  /* We only need to have buf_pool mutex in case where we end
1743  up calling buf_pool_watch_remove but to obey latching order
1744  we acquire it here before acquiring hash_lock. This should
1745  not cause too much grief as this function is only ever
1746  called from the purge thread. */
1747  buf_pool_mutex_enter(buf_pool);
1748 
1749  rw_lock_x_lock(hash_lock);
1750 
1751  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1752  /* The page must exist because buf_pool_watch_set()
1753  increments buf_fix_count. */
1754  ut_a(bpage);
1755 
1756  if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
1757  ib_mutex_t* mutex = buf_page_get_mutex(bpage);
1758 
1759  mutex_enter(mutex);
1760  ut_a(bpage->buf_fix_count > 0);
1761  bpage->buf_fix_count--;
1762  mutex_exit(mutex);
1763  } else {
1764  ut_a(bpage->buf_fix_count > 0);
1765 
1766  if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
1767  buf_pool_watch_remove(buf_pool, fold, bpage);
1768  }
1769  }
1770 
1771  buf_pool_mutex_exit(buf_pool);
1772  rw_lock_x_unlock(hash_lock);
1773 }
1774 
1775 /****************************************************************/
1780 UNIV_INTERN
1781 ibool
1783 /*====================*/
1784  ulint space,
1785  ulint offset)
1786 {
1787  ibool ret;
1788  buf_page_t* bpage;
1789  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1790  ulint fold = buf_page_address_fold(space, offset);
1791  rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool,
1792  fold);
1793 
1794  rw_lock_s_lock(hash_lock);
1795 
1796  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1797  /* The page must exist because buf_pool_watch_set()
1798  increments buf_fix_count. */
1799  ut_a(bpage);
1800  ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
1801  rw_lock_s_unlock(hash_lock);
1802 
1803  return(ret);
1804 }
1805 
1806 /********************************************************************/
1810 UNIV_INTERN
1811 void
1813 /*================*/
1814  buf_page_t* bpage)
1815 {
1816  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1817 
1818  buf_pool_mutex_enter(buf_pool);
1819 
1820  ut_a(buf_page_in_file(bpage));
1821 
1822  buf_LRU_make_block_young(bpage);
1823 
1824  buf_pool_mutex_exit(buf_pool);
1825 }
1826 
1827 /********************************************************************/
1831 static
1832 void
1833 buf_page_make_young_if_needed(
1834 /*==========================*/
1835  buf_page_t* bpage)
1837 {
1838 #ifdef UNIV_DEBUG
1839  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1840  ut_ad(!buf_pool_mutex_own(buf_pool));
1841 #endif /* UNIV_DEBUG */
1842  ut_a(buf_page_in_file(bpage));
1843 
1844  if (buf_page_peek_if_too_old(bpage)) {
1845  buf_page_make_young(bpage);
1846  }
1847 }
1848 
1849 /********************************************************************/
1852 UNIV_INTERN
1853 void
1855 /*================================*/
1856  ulint space,
1857  ulint offset)
1858 {
1859  buf_block_t* block;
1860  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1861 
1862  buf_pool_mutex_enter(buf_pool);
1863 
1864  block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1865 
1866  if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
1867  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1868  block->check_index_page_at_flush = FALSE;
1869  }
1870 
1871  buf_pool_mutex_exit(buf_pool);
1872 }
1873 
1874 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
1875 /********************************************************************/
1881 UNIV_INTERN
1882 buf_page_t*
1883 buf_page_set_file_page_was_freed(
1884 /*=============================*/
1885  ulint space,
1886  ulint offset)
1887 {
1888  buf_page_t* bpage;
1889  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1890  rw_lock_t* hash_lock;
1891 
1892  bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
1893  &hash_lock);
1894 
1895  if (bpage) {
1897  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1898  mutex_enter(block_mutex);
1899  rw_lock_s_unlock(hash_lock);
1900  /* bpage->file_page_was_freed can already hold
1901  when this code is invoked from dict_drop_index_tree() */
1902  bpage->file_page_was_freed = TRUE;
1903  mutex_exit(block_mutex);
1904  }
1905 
1906  return(bpage);
1907 }
1908 
1909 /********************************************************************/
1915 UNIV_INTERN
1916 buf_page_t*
1917 buf_page_reset_file_page_was_freed(
1918 /*===============================*/
1919  ulint space,
1920  ulint offset)
1921 {
1922  buf_page_t* bpage;
1923  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1924  rw_lock_t* hash_lock;
1925 
1926  bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
1927  &hash_lock);
1928  if (bpage) {
1929  ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
1930  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1931  mutex_enter(block_mutex);
1932  rw_lock_s_unlock(hash_lock);
1933  bpage->file_page_was_freed = FALSE;
1934  mutex_exit(block_mutex);
1935  }
1936 
1937  return(bpage);
1938 }
1939 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
1940 
1941 /********************************************************************/
1945 static
1946 void
1947 buf_block_try_discard_uncompressed(
1948 /*===============================*/
1949  ulint space,
1950  ulint offset)
1951 {
1952  buf_page_t* bpage;
1953  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1954 
1955  /* Since we need to acquire buf_pool mutex to discard
1956  the uncompressed frame and because page_hash mutex resides
1957  below buf_pool mutex in sync ordering therefore we must
1958  first release the page_hash mutex. This means that the
1959  block in question can move out of page_hash. Therefore
1960  we need to check again if the block is still in page_hash. */
1961  buf_pool_mutex_enter(buf_pool);
1962 
1963  bpage = buf_page_hash_get(buf_pool, space, offset);
1964 
1965  if (bpage) {
1966  buf_LRU_free_page(bpage, false);
1967  }
1968 
1969  buf_pool_mutex_exit(buf_pool);
1970 }
1971 
1972 /********************************************************************/
1981 UNIV_INTERN
1982 buf_page_t*
1984 /*=============*/
1985  ulint space,
1986  ulint zip_size,
1987  ulint offset)
1988 {
1989  buf_page_t* bpage;
1991  rw_lock_t* hash_lock;
1992  ibool discard_attempted = FALSE;
1993  ibool must_read;
1994  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1995 
1996  buf_pool->stat.n_page_gets++;
1997 
1998  for (;;) {
1999 lookup:
2000 
2001  /* The following call will also grab the page_hash
2002  mutex if the page is found. */
2003  bpage = buf_page_hash_get_s_locked(buf_pool, space,
2004  offset, &hash_lock);
2005  if (bpage) {
2006  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2007  break;
2008  }
2009 
2010  /* Page not in buf_pool: needs to be read from file */
2011 
2012  ut_ad(!hash_lock);
2013  buf_read_page(space, zip_size, offset);
2014 
2015 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2016  ut_a(++buf_dbg_counter % 5771 || buf_validate());
2017 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2018  }
2019 
2020  ut_ad(buf_page_hash_lock_held_s(buf_pool, bpage));
2021 
2022  if (!bpage->zip.data) {
2023  /* There is no compressed page. */
2024 err_exit:
2025  rw_lock_s_unlock(hash_lock);
2026  return(NULL);
2027  }
2028 
2029  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2030 
2031  switch (buf_page_get_state(bpage)) {
2032  case BUF_BLOCK_POOL_WATCH:
2033  case BUF_BLOCK_NOT_USED:
2035  case BUF_BLOCK_MEMORY:
2036  case BUF_BLOCK_REMOVE_HASH:
2037  break;
2038  case BUF_BLOCK_ZIP_PAGE:
2039  case BUF_BLOCK_ZIP_DIRTY:
2040  block_mutex = &buf_pool->zip_mutex;
2041  mutex_enter(block_mutex);
2042  bpage->buf_fix_count++;
2043  goto got_block;
2044  case BUF_BLOCK_FILE_PAGE:
2045  /* Discard the uncompressed page frame if possible. */
2046  if (!discard_attempted) {
2047  rw_lock_s_unlock(hash_lock);
2048  buf_block_try_discard_uncompressed(space,
2049  offset);
2050  discard_attempted = TRUE;
2051  goto lookup;
2052  }
2053 
2054  block_mutex = &((buf_block_t*) bpage)->mutex;
2055  mutex_enter(block_mutex);
2057  __FILE__, __LINE__);
2058  goto got_block;
2059  }
2060 
2061  ut_error;
2062  goto err_exit;
2063 
2064 got_block:
2065  must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2066 
2067  rw_lock_s_unlock(hash_lock);
2068 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
2069  ut_a(!bpage->file_page_was_freed);
2070 #endif
2071 
2072  buf_page_set_accessed(bpage);
2073 
2074  mutex_exit(block_mutex);
2075 
2076  buf_page_make_young_if_needed(bpage);
2077 
2078 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2079  ut_a(++buf_dbg_counter % 5771 || buf_validate());
2080  ut_a(bpage->buf_fix_count > 0);
2081  ut_a(buf_page_in_file(bpage));
2082 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2083 
2084  if (must_read) {
2085  /* Let us wait until the read operation
2086  completes */
2087 
2088  for (;;) {
2089  enum buf_io_fix io_fix;
2090 
2091  mutex_enter(block_mutex);
2092  io_fix = buf_page_get_io_fix(bpage);
2093  mutex_exit(block_mutex);
2094 
2095  if (io_fix == BUF_IO_READ) {
2096 
2097  os_thread_sleep(WAIT_FOR_READ);
2098  } else {
2099  break;
2100  }
2101  }
2102  }
2103 
2104 #ifdef UNIV_IBUF_COUNT_DEBUG
2105  ut_a(ibuf_count_get(buf_page_get_space(bpage),
2106  buf_page_get_page_no(bpage)) == 0);
2107 #endif
2108  return(bpage);
2109 }
2110 
2111 /********************************************************************/
2113 UNIV_INLINE
2114 void
2116 /*===============*/
2117  buf_block_t* block)
2118 {
2119  block->check_index_page_at_flush = FALSE;
2120  block->index = NULL;
2121 
2122  block->n_hash_helps = 0;
2123  block->n_fields = 1;
2124  block->n_bytes = 0;
2125  block->left_side = TRUE;
2126 }
2127 #endif /* !UNIV_HOTBACKUP */
2128 
2129 /********************************************************************/
2132 UNIV_INTERN
2133 ibool
2135 /*===============*/
2136  buf_block_t* block,
2137  ibool check)
2138 {
2139  const byte* frame = block->page.zip.data;
2140  ulint size = page_zip_get_size(&block->page.zip);
2141 
2142  ut_ad(buf_block_get_zip_size(block));
2143  ut_a(buf_block_get_space(block) != 0);
2144 
2145  if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) {
2146 
2147  ut_print_timestamp(stderr);
2148  fprintf(stderr,
2149  " InnoDB: compressed page checksum mismatch"
2150  " (space %u page %u): stored: %lu, crc32: %lu "
2151  "innodb: %lu, none: %lu\n",
2152  block->page.space, block->page.offset,
2154  page_zip_calc_checksum(frame, size,
2156  page_zip_calc_checksum(frame, size,
2158  page_zip_calc_checksum(frame, size,
2160  return(FALSE);
2161  }
2162 
2163  switch (fil_page_get_type(frame)) {
2164  case FIL_PAGE_INDEX:
2165  if (page_zip_decompress(&block->page.zip,
2166  block->frame, TRUE)) {
2167  return(TRUE);
2168  }
2169 
2170  fprintf(stderr,
2171  "InnoDB: unable to decompress space %lu page %lu\n",
2172  (ulong) block->page.space,
2173  (ulong) block->page.offset);
2174  return(FALSE);
2175 
2177  case FIL_PAGE_INODE:
2178  case FIL_PAGE_IBUF_BITMAP:
2179  case FIL_PAGE_TYPE_FSP_HDR:
2180  case FIL_PAGE_TYPE_XDES:
2181  case FIL_PAGE_TYPE_ZBLOB:
2182  case FIL_PAGE_TYPE_ZBLOB2:
2183  /* Copy to uncompressed storage. */
2184  memcpy(block->frame, frame,
2185  buf_block_get_zip_size(block));
2186  return(TRUE);
2187  }
2188 
2189  ut_print_timestamp(stderr);
2190  fprintf(stderr,
2191  " InnoDB: unknown compressed page"
2192  " type %lu\n",
2193  fil_page_get_type(frame));
2194  return(FALSE);
2195 }
2196 
2197 #ifndef UNIV_HOTBACKUP
2198 /*******************************************************************/
2202 UNIV_INTERN
2203 buf_block_t*
2205 /*=====================*/
2206  buf_pool_t* buf_pool,
2208  const byte* ptr)
2209 {
2210  buf_chunk_t* chunk;
2211  ulint i;
2212 
2213  /* TODO: protect buf_pool->chunks with a mutex (it will
2214  currently remain constant after buf_pool_init()) */
2215  for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2216  ulint offs;
2217 
2218  if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
2219 
2220  continue;
2221  }
2222  /* else */
2223 
2224  offs = ptr - chunk->blocks->frame;
2225 
2226  offs >>= UNIV_PAGE_SIZE_SHIFT;
2227 
2228  if (UNIV_LIKELY(offs < chunk->size)) {
2229  buf_block_t* block = &chunk->blocks[offs];
2230 
2231  /* The function buf_chunk_init() invokes
2232  buf_block_init() so that block[n].frame ==
2233  block->frame + n * UNIV_PAGE_SIZE. Check it. */
2234  ut_ad(block->frame == page_align(ptr));
2235 #ifdef UNIV_DEBUG
2236  /* A thread that updates these fields must
2237  hold buf_pool->mutex and block->mutex. Acquire
2238  only the latter. */
2239  mutex_enter(&block->mutex);
2240 
2241  switch (buf_block_get_state(block)) {
2242  case BUF_BLOCK_POOL_WATCH:
2243  case BUF_BLOCK_ZIP_PAGE:
2244  case BUF_BLOCK_ZIP_DIRTY:
2245  /* These types should only be used in
2246  the compressed buffer pool, whose
2247  memory is allocated from
2248  buf_pool->chunks, in UNIV_PAGE_SIZE
2249  blocks flagged as BUF_BLOCK_MEMORY. */
2250  ut_error;
2251  break;
2252  case BUF_BLOCK_NOT_USED:
2254  case BUF_BLOCK_MEMORY:
2255  /* Some data structures contain
2256  "guess" pointers to file pages. The
2257  file pages may have been freed and
2258  reused. Do not complain. */
2259  break;
2260  case BUF_BLOCK_REMOVE_HASH:
2261  /* buf_LRU_block_remove_hashed_page()
2262  will overwrite the FIL_PAGE_OFFSET and
2263  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2264  0xff and set the state to
2265  BUF_BLOCK_REMOVE_HASH. */
2267  == 0xffffffff);
2269  == 0xffffffff);
2270  break;
2271  case BUF_BLOCK_FILE_PAGE:
2272  ut_ad(block->page.space
2273  == page_get_space_id(page_align(ptr)));
2274  ut_ad(block->page.offset
2275  == page_get_page_no(page_align(ptr)));
2276  break;
2277  }
2278 
2279  mutex_exit(&block->mutex);
2280 #endif /* UNIV_DEBUG */
2281 
2282  return(block);
2283  }
2284  }
2285 
2286  return(NULL);
2287 }
2288 
2289 /*******************************************************************/
2292 UNIV_INTERN
2293 buf_block_t*
2295 /*============*/
2296  const byte* ptr)
2297 {
2298  ulint i;
2299 
2300  for (i = 0; i < srv_buf_pool_instances; i++) {
2301  buf_block_t* block;
2302 
2303  block = buf_block_align_instance(
2304  buf_pool_from_array(i), ptr);
2305  if (block) {
2306  return(block);
2307  }
2308  }
2309 
2310  /* The block should always be found. */
2311  ut_error;
2312  return(NULL);
2313 }
2314 
2315 /********************************************************************/
2320 static
2321 ibool
2322 buf_pointer_is_block_field_instance(
2323 /*================================*/
2324  buf_pool_t* buf_pool,
2325  const void* ptr)
2326 {
2327  const buf_chunk_t* chunk = buf_pool->chunks;
2328  const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
2329 
2330  /* TODO: protect buf_pool->chunks with a mutex (it will
2331  currently remain constant after buf_pool_init()) */
2332  while (chunk < echunk) {
2333  if (ptr >= (void*) chunk->blocks
2334  && ptr < (void*) (chunk->blocks + chunk->size)) {
2335 
2336  return(TRUE);
2337  }
2338 
2339  chunk++;
2340  }
2341 
2342  return(FALSE);
2343 }
2344 
2345 /********************************************************************/
2349 UNIV_INTERN
2350 ibool
2352 /*=======================*/
2353  const void* ptr)
2354 {
2355  ulint i;
2356 
2357  for (i = 0; i < srv_buf_pool_instances; i++) {
2358  ibool found;
2359 
2360  found = buf_pointer_is_block_field_instance(
2361  buf_pool_from_array(i), ptr);
2362  if (found) {
2363  return(TRUE);
2364  }
2365  }
2366 
2367  return(FALSE);
2368 }
2369 
2370 /********************************************************************/
2373 static
2374 ibool
2375 buf_block_is_uncompressed(
2376 /*======================*/
2377  buf_pool_t* buf_pool,
2378  const buf_block_t* block)
2380 {
2381  if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2382  /* The pointer should be aligned. */
2383  return(FALSE);
2384  }
2385 
2386  return(buf_pointer_is_block_field_instance(buf_pool, (void*) block));
2387 }
2388 
2389 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
2390 /********************************************************************/
2393 static
2394 bool
2395 buf_debug_execute_is_force_flush()
2396 /*==============================*/
2397 {
2398  DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); );
2399 
2400  /* This is used during queisce testing, we want to ensure maximum
2401  buffering by the change buffer. */
2402 
2403  if (srv_ibuf_disable_background_merge) {
2404  return(true);
2405  }
2406 
2407  return(false);
2408 }
2409 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
2410 
2411 /********************************************************************/
2414 UNIV_INTERN
2415 buf_block_t*
2417 /*=============*/
2418  ulint space,
2419  ulint zip_size,
2421  ulint offset,
2422  ulint rw_latch,
2423  buf_block_t* guess,
2424  ulint mode,
2427  const char* file,
2428  ulint line,
2429  mtr_t* mtr)
2430 {
2431  buf_block_t* block;
2432  ulint fold;
2433  unsigned access_time;
2434  ulint fix_type;
2435  ibool must_read;
2436  rw_lock_t* hash_lock;
2438  ulint retries = 0;
2439  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2440 
2441  ut_ad(mtr);
2442  ut_ad(mtr->state == MTR_ACTIVE);
2443  ut_ad((rw_latch == RW_S_LATCH)
2444  || (rw_latch == RW_X_LATCH)
2445  || (rw_latch == RW_NO_LATCH));
2446 #ifdef UNIV_DEBUG
2447  switch (mode) {
2448  case BUF_GET_NO_LATCH:
2449  ut_ad(rw_latch == RW_NO_LATCH);
2450  break;
2451  case BUF_GET:
2452  case BUF_GET_IF_IN_POOL:
2453  case BUF_PEEK_IF_IN_POOL:
2456  break;
2457  default:
2458  ut_error;
2459  }
2460 #endif /* UNIV_DEBUG */
2461  ut_ad(zip_size == fil_space_get_zip_size(space));
2462  ut_ad(ut_is_2pow(zip_size));
2463 #ifndef UNIV_LOG_DEBUG
2464  ut_ad(!ibuf_inside(mtr)
2465  || ibuf_page_low(space, zip_size, offset,
2466  FALSE, file, line, NULL));
2467 #endif
2468  buf_pool->stat.n_page_gets++;
2469  fold = buf_page_address_fold(space, offset);
2470  hash_lock = buf_page_hash_lock_get(buf_pool, fold);
2471 loop:
2472  block = guess;
2473 
2474  rw_lock_s_lock(hash_lock);
2475  if (block) {
2476  /* If the guess is a compressed page descriptor that
2477  has been allocated by buf_page_alloc_descriptor(),
2478  it may have been freed by buf_relocate(). */
2479 
2480  if (!buf_block_is_uncompressed(buf_pool, block)
2481  || offset != block->page.offset
2482  || space != block->page.space
2484 
2485  /* Our guess was bogus or things have changed
2486  since. */
2487  block = guess = NULL;
2488  } else {
2489  ut_ad(!block->page.in_zip_hash);
2490  }
2491  }
2492 
2493  if (block == NULL) {
2495  buf_pool, space, offset, fold);
2496  }
2497 
2498  if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2499  rw_lock_s_unlock(hash_lock);
2500  block = NULL;
2501  }
2502 
2503  if (block == NULL) {
2504  /* Page not in buf_pool: needs to be read from file */
2505 
2506  if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2507  rw_lock_x_lock(hash_lock);
2508  block = (buf_block_t*) buf_pool_watch_set(
2509  space, offset, fold);
2510 
2511  if (UNIV_LIKELY_NULL(block)) {
2512  /* We can release hash_lock after we
2513  acquire block_mutex to make sure that
2514  no state change takes place. */
2515  block_mutex = buf_page_get_mutex(&block->page);
2516  mutex_enter(block_mutex);
2517 
2518  /* Now safe to release page_hash mutex */
2519  rw_lock_x_unlock(hash_lock);
2520  goto got_block;
2521  }
2522 
2523  rw_lock_x_unlock(hash_lock);
2524  }
2525 
2526  if (mode == BUF_GET_IF_IN_POOL
2527  || mode == BUF_PEEK_IF_IN_POOL
2528  || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2529 #ifdef UNIV_SYNC_DEBUG
2530  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
2531  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
2532 #endif /* UNIV_SYNC_DEBUG */
2533  return(NULL);
2534  }
2535 
2536  if (buf_read_page(space, zip_size, offset)) {
2537  buf_read_ahead_random(space, zip_size, offset,
2538  ibuf_inside(mtr));
2539 
2540  retries = 0;
2541  } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2542  ++retries;
2543  DBUG_EXECUTE_IF(
2544  "innodb_page_corruption_retries",
2545  retries = BUF_PAGE_READ_MAX_RETRIES;
2546  );
2547  } else {
2548  fprintf(stderr, "InnoDB: Error: Unable"
2549  " to read tablespace %lu page no"
2550  " %lu into the buffer pool after"
2551  " %lu attempts\n"
2552  "InnoDB: The most probable cause"
2553  " of this error may be that the"
2554  " table has been corrupted.\n"
2555  "InnoDB: You can try to fix this"
2556  " problem by using"
2557  " innodb_force_recovery.\n"
2558  "InnoDB: Please see reference manual"
2559  " for more details.\n"
2560  "InnoDB: Aborting...\n",
2561  space, offset,
2562  BUF_PAGE_READ_MAX_RETRIES);
2563 
2564  ut_error;
2565  }
2566 
2567 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2568  ut_a(++buf_dbg_counter % 5771 || buf_validate());
2569 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2570  goto loop;
2571  }
2572 
2573 
2574  /* We can release hash_lock after we acquire block_mutex to
2575  make sure that no state change takes place. */
2576  block_mutex = buf_page_get_mutex(&block->page);
2577  mutex_enter(block_mutex);
2578 
2579  /* Now safe to release page_hash mutex */
2580  rw_lock_s_unlock(hash_lock);
2581 
2582 got_block:
2583  ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2584  ut_ad(mutex_own(block_mutex));
2585 
2586  must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2587 
2588  if (must_read && (mode == BUF_GET_IF_IN_POOL
2589  || mode == BUF_PEEK_IF_IN_POOL)) {
2590 
2591  /* The page is being read to buffer pool,
2592  but we cannot wait around for the read to
2593  complete. */
2594 null_exit:
2595  mutex_exit(block_mutex);
2596 
2597  return(NULL);
2598  }
2599 
2600  switch (buf_block_get_state(block)) {
2601  buf_page_t* bpage;
2602 
2603  case BUF_BLOCK_FILE_PAGE:
2604  break;
2605 
2606  case BUF_BLOCK_ZIP_PAGE:
2607  case BUF_BLOCK_ZIP_DIRTY:
2608  if (mode == BUF_PEEK_IF_IN_POOL) {
2609  /* This mode is only used for dropping an
2610  adaptive hash index. There cannot be an
2611  adaptive hash index for a compressed-only
2612  page, so do not bother decompressing the page. */
2613  goto null_exit;
2614  }
2615 
2616  bpage = &block->page;
2617 
2618  if (bpage->buf_fix_count
2619  || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2620  /* This condition often occurs when the buffer
2621  is not buffer-fixed, but I/O-fixed by
2622  buf_page_init_for_read(). */
2623  mutex_exit(block_mutex);
2624 wait_until_unfixed:
2625  /* The block is buffer-fixed or I/O-fixed.
2626  Try again later. */
2627  os_thread_sleep(WAIT_FOR_READ);
2628 
2629  goto loop;
2630  }
2631 
2632  /* Buffer-fix the block so that it cannot be evicted
2633  or relocated while we are attempting to allocate an
2634  uncompressed page. */
2635  bpage->buf_fix_count++;
2636 
2637  /* Allocate an uncompressed page. */
2638  mutex_exit(block_mutex);
2639  block = buf_LRU_get_free_block(buf_pool);
2640  ut_a(block);
2641 
2642  buf_pool_mutex_enter(buf_pool);
2643 
2644  rw_lock_x_lock(hash_lock);
2645  /* Buffer-fixing prevents the page_hash from changing. */
2646  ut_ad(bpage == buf_page_hash_get_low(
2647  buf_pool, space, offset, fold));
2648 
2649  mutex_enter(&block->mutex);
2650  mutex_enter(&buf_pool->zip_mutex);
2651 
2652  if (--bpage->buf_fix_count
2653  || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2654 
2655  mutex_exit(&buf_pool->zip_mutex);
2656  /* The block was buffer-fixed or I/O-fixed while
2657  buf_pool->mutex was not held by this thread.
2658  Free the block that was allocated and retry.
2659  This should be extremely unlikely, for example,
2660  if buf_page_get_zip() was invoked. */
2661 
2663  buf_pool_mutex_exit(buf_pool);
2664  rw_lock_x_unlock(hash_lock);
2665  mutex_exit(&block->mutex);
2666 
2667  goto wait_until_unfixed;
2668  }
2669 
2670  /* Move the compressed page from bpage to block,
2671  and uncompress it. */
2672 
2673  buf_relocate(bpage, &block->page);
2674  buf_block_init_low(block);
2675  block->lock_hash_val = lock_rec_hash(space, offset);
2676 
2677  UNIV_MEM_DESC(&block->page.zip.data,
2678  page_zip_get_size(&block->page.zip));
2679 
2680  if (buf_page_get_state(&block->page)
2681  == BUF_BLOCK_ZIP_PAGE) {
2682 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2683  UT_LIST_REMOVE(list, buf_pool->zip_clean,
2684  &block->page);
2685 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2686  ut_ad(!block->page.in_flush_list);
2687  } else {
2688  /* Relocate buf_pool->flush_list. */
2690  &block->page);
2691  }
2692 
2693  /* Buffer-fix, I/O-fix, and X-latch the block
2694  for the duration of the decompression.
2695  Also add the block to the unzip_LRU list. */
2696  block->page.state = BUF_BLOCK_FILE_PAGE;
2697 
2698  /* Insert at the front of unzip_LRU list */
2699  buf_unzip_LRU_add_block(block, FALSE);
2700 
2701  block->page.buf_fix_count = 1;
2703  rw_lock_x_lock_inline(&block->lock, 0, file, line);
2704 
2705  UNIV_MEM_INVALID(bpage, sizeof *bpage);
2706 
2707  rw_lock_x_unlock(hash_lock);
2708 
2709  buf_pool->n_pend_unzip++;
2710  buf_pool_mutex_exit(buf_pool);
2711 
2712  access_time = buf_page_is_accessed(&block->page);
2713  mutex_exit(&block->mutex);
2714  mutex_exit(&buf_pool->zip_mutex);
2715 
2716  buf_page_free_descriptor(bpage);
2717 
2718  /* Decompress the page while not holding
2719  buf_pool->mutex or block->mutex. */
2720 
2721  /* Page checksum verification is already done when
2722  the page is read from disk. Hence page checksum
2723  verification is not necessary when decompressing the page. */
2724  ut_a(buf_zip_decompress(block, FALSE));
2725 
2726  if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2727  if (access_time) {
2728 #ifdef UNIV_IBUF_COUNT_DEBUG
2729  ut_a(ibuf_count_get(space, offset) == 0);
2730 #endif /* UNIV_IBUF_COUNT_DEBUG */
2731  } else {
2733  block, space, offset, zip_size, TRUE);
2734  }
2735  }
2736 
2737  /* Unfix and unlatch the block. */
2738  buf_pool_mutex_enter(buf_pool);
2739  mutex_enter(&block->mutex);
2740  block->page.buf_fix_count--;
2742  buf_pool->n_pend_unzip--;
2743  buf_pool_mutex_exit(buf_pool);
2744  rw_lock_x_unlock(&block->lock);
2745 
2746  break;
2747 
2748  case BUF_BLOCK_POOL_WATCH:
2749  case BUF_BLOCK_NOT_USED:
2751  case BUF_BLOCK_MEMORY:
2752  case BUF_BLOCK_REMOVE_HASH:
2753  ut_error;
2754  break;
2755  }
2756 
2757 #ifdef UNIV_SYNC_DEBUG
2758  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
2759  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
2760 #endif /* UNIV_SYNC_DEBUG */
2761 
2763 
2764 #if UNIV_WORD_SIZE == 4
2765  /* On 32-bit systems, there is no padding in buf_page_t. On
2766  other systems, Valgrind could complain about uninitialized pad
2767  bytes. */
2768  UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
2769 #endif
2770 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
2771 
2772  if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
2773  && (ibuf_debug || buf_debug_execute_is_force_flush())) {
2774  /* Try to evict the block from the buffer pool, to use the
2775  insert buffer (change buffer) as much as possible. */
2776 
2777  /* To obey the latching order, release the
2778  block->mutex before acquiring buf_pool->mutex. Protect
2779  the block from changes by temporarily buffer-fixing it
2780  for the time we are not holding block->mutex. */
2781  buf_block_buf_fix_inc(block, file, line);
2782  mutex_exit(&block->mutex);
2783  buf_pool_mutex_enter(buf_pool);
2784  mutex_enter(&block->mutex);
2785  buf_block_buf_fix_dec(block);
2786  mutex_exit(&block->mutex);
2787 
2788  /* Now we are only holding the buf_pool->mutex,
2789  not block->mutex or hash_lock. Blocks cannot be
2790  relocated or enter or exit the buf_pool while we
2791  are holding the buf_pool->mutex. */
2792 
2793  if (buf_LRU_free_page(&block->page, true)) {
2794  buf_pool_mutex_exit(buf_pool);
2795  rw_lock_x_lock(hash_lock);
2796 
2797  if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2798  /* Set the watch, as it would have
2799  been set if the page were not in the
2800  buffer pool in the first place. */
2801  block = (buf_block_t*) buf_pool_watch_set(
2802  space, offset, fold);
2803  } else {
2805  buf_pool, space, offset, fold);
2806  }
2807 
2808  rw_lock_x_unlock(hash_lock);
2809 
2810  if (UNIV_LIKELY_NULL(block)) {
2811  /* Either the page has been read in or
2812  a watch was set on that in the window
2813  where we released the buf_pool::mutex
2814  and before we acquire the hash_lock
2815  above. Try again. */
2816  guess = block;
2817  goto loop;
2818  }
2819 
2820  fprintf(stderr,
2821  "innodb_change_buffering_debug evict %u %u\n",
2822  (unsigned) space, (unsigned) offset);
2823  return(NULL);
2824  }
2825 
2826  mutex_enter(&block->mutex);
2827 
2828  if (buf_flush_page_try(buf_pool, block)) {
2829  fprintf(stderr,
2830  "innodb_change_buffering_debug flush %u %u\n",
2831  (unsigned) space, (unsigned) offset);
2832  guess = block;
2833  goto loop;
2834  }
2835 
2836  /* Failed to evict the page; change it directly */
2837 
2838  buf_pool_mutex_exit(buf_pool);
2839  }
2840 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
2841 
2842  buf_block_buf_fix_inc(block, file, line);
2843 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
2845  || !block->page.file_page_was_freed);
2846 #endif
2847  /* Check if this is the first access to the page */
2848  access_time = buf_page_is_accessed(&block->page);
2849 
2850  buf_page_set_accessed(&block->page);
2851 
2852  mutex_exit(&block->mutex);
2853 
2854  if (mode != BUF_PEEK_IF_IN_POOL) {
2855  buf_page_make_young_if_needed(&block->page);
2856  }
2857 
2858 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2859  ut_a(++buf_dbg_counter % 5771 || buf_validate());
2860  ut_a(block->page.buf_fix_count > 0);
2862 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2863 
2864  switch (rw_latch) {
2865  case RW_NO_LATCH:
2866  if (must_read) {
2867  /* Let us wait until the read operation
2868  completes */
2869 
2870  for (;;) {
2871  enum buf_io_fix io_fix;
2872 
2873  mutex_enter(&block->mutex);
2874  io_fix = buf_block_get_io_fix(block);
2875  mutex_exit(&block->mutex);
2876 
2877  if (io_fix == BUF_IO_READ) {
2878  /* wait by temporaly s-latch */
2879  rw_lock_s_lock(&(block->lock));
2880  rw_lock_s_unlock(&(block->lock));
2881  } else {
2882  break;
2883  }
2884  }
2885  }
2886 
2887  fix_type = MTR_MEMO_BUF_FIX;
2888  break;
2889 
2890  case RW_S_LATCH:
2891  rw_lock_s_lock_inline(&(block->lock), 0, file, line);
2892 
2893  fix_type = MTR_MEMO_PAGE_S_FIX;
2894  break;
2895 
2896  default:
2897  ut_ad(rw_latch == RW_X_LATCH);
2898  rw_lock_x_lock_inline(&(block->lock), 0, file, line);
2899 
2900  fix_type = MTR_MEMO_PAGE_X_FIX;
2901  break;
2902  }
2903 
2904  mtr_memo_push(mtr, block, fix_type);
2905 
2906  if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
2907  /* In the case of a first access, try to apply linear
2908  read-ahead */
2909 
2910  buf_read_ahead_linear(space, zip_size, offset,
2911  ibuf_inside(mtr));
2912  }
2913 
2914 #ifdef UNIV_IBUF_COUNT_DEBUG
2915  ut_a(ibuf_count_get(buf_block_get_space(block),
2916  buf_block_get_page_no(block)) == 0);
2917 #endif
2918 #ifdef UNIV_SYNC_DEBUG
2919  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
2920  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
2921 #endif /* UNIV_SYNC_DEBUG */
2922  return(block);
2923 }
2924 
2925 /********************************************************************/
2929 UNIV_INTERN
2930 ibool
2932 /*====================*/
2933  ulint rw_latch,
2934  buf_block_t* block,
2935  ib_uint64_t modify_clock,
2936  const char* file,
2937  ulint line,
2938  mtr_t* mtr)
2939 {
2940  buf_pool_t* buf_pool;
2941  unsigned access_time;
2942  ibool success;
2943  ulint fix_type;
2944 
2945  ut_ad(block);
2946  ut_ad(mtr);
2947  ut_ad(mtr->state == MTR_ACTIVE);
2948  ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2949 
2950  mutex_enter(&block->mutex);
2951 
2952  if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
2953 
2954  mutex_exit(&block->mutex);
2955 
2956  return(FALSE);
2957  }
2958 
2959  buf_block_buf_fix_inc(block, file, line);
2960 
2961  access_time = buf_page_is_accessed(&block->page);
2962 
2963  buf_page_set_accessed(&block->page);
2964 
2965  mutex_exit(&block->mutex);
2966 
2967  buf_page_make_young_if_needed(&block->page);
2968 
2969  ut_ad(!ibuf_inside(mtr)
2970  || ibuf_page(buf_block_get_space(block),
2971  buf_block_get_zip_size(block),
2972  buf_block_get_page_no(block), NULL));
2973 
2974  if (rw_latch == RW_S_LATCH) {
2975  success = rw_lock_s_lock_nowait(&(block->lock),
2976  file, line);
2977  fix_type = MTR_MEMO_PAGE_S_FIX;
2978  } else {
2979  success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
2980  file, line);
2981  fix_type = MTR_MEMO_PAGE_X_FIX;
2982  }
2983 
2984  if (UNIV_UNLIKELY(!success)) {
2985  mutex_enter(&block->mutex);
2986  buf_block_buf_fix_dec(block);
2987  mutex_exit(&block->mutex);
2988 
2989  return(FALSE);
2990  }
2991 
2992  if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
2993  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
2994 
2995  if (rw_latch == RW_S_LATCH) {
2996  rw_lock_s_unlock(&(block->lock));
2997  } else {
2998  rw_lock_x_unlock(&(block->lock));
2999  }
3000 
3001  mutex_enter(&block->mutex);
3002  buf_block_buf_fix_dec(block);
3003  mutex_exit(&block->mutex);
3004 
3005  return(FALSE);
3006  }
3007 
3008  mtr_memo_push(mtr, block, fix_type);
3009 
3010 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3011  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3012  ut_a(block->page.buf_fix_count > 0);
3014 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3015 
3016 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3017  mutex_enter(&block->mutex);
3018  ut_a(!block->page.file_page_was_freed);
3019  mutex_exit(&block->mutex);
3020 #endif
3021 
3022  if (!access_time) {
3023  /* In the case of a first access, try to apply linear
3024  read-ahead */
3025 
3027  buf_block_get_zip_size(block),
3028  buf_block_get_page_no(block),
3029  ibuf_inside(mtr));
3030  }
3031 
3032 #ifdef UNIV_IBUF_COUNT_DEBUG
3033  ut_a(ibuf_count_get(buf_block_get_space(block),
3034  buf_block_get_page_no(block)) == 0);
3035 #endif
3036  buf_pool = buf_pool_from_block(block);
3037  buf_pool->stat.n_page_gets++;
3038 
3039  return(TRUE);
3040 }
3041 
3042 /********************************************************************/
3047 UNIV_INTERN
3048 ibool
3050 /*======================*/
3051  ulint rw_latch,
3052  buf_block_t* block,
3053  ulint mode,
3054  const char* file,
3055  ulint line,
3056  mtr_t* mtr)
3057 {
3058  buf_pool_t* buf_pool;
3059  ibool success;
3060  ulint fix_type;
3061 
3062  ut_ad(mtr);
3063  ut_ad(mtr->state == MTR_ACTIVE);
3064  ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3065 
3066  mutex_enter(&block->mutex);
3067 
3069  /* Another thread is just freeing the block from the LRU list
3070  of the buffer pool: do not try to access this page; this
3071  attempt to access the page can only come through the hash
3072  index because when the buffer block state is ..._REMOVE_HASH,
3073  we have already removed it from the page address hash table
3074  of the buffer pool. */
3075 
3076  mutex_exit(&block->mutex);
3077 
3078  return(FALSE);
3079  }
3080 
3082 
3083  buf_block_buf_fix_inc(block, file, line);
3084 
3085  buf_page_set_accessed(&block->page);
3086 
3087  mutex_exit(&block->mutex);
3088 
3089  buf_pool = buf_pool_from_block(block);
3090 
3091  if (mode == BUF_MAKE_YOUNG) {
3092  buf_page_make_young_if_needed(&block->page);
3093  }
3094 
3095  ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
3096 
3097  if (rw_latch == RW_S_LATCH) {
3098  success = rw_lock_s_lock_nowait(&(block->lock),
3099  file, line);
3100  fix_type = MTR_MEMO_PAGE_S_FIX;
3101  } else {
3102  success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
3103  file, line);
3104  fix_type = MTR_MEMO_PAGE_X_FIX;
3105  }
3106 
3107  if (!success) {
3108  mutex_enter(&block->mutex);
3109  buf_block_buf_fix_dec(block);
3110  mutex_exit(&block->mutex);
3111 
3112  return(FALSE);
3113  }
3114 
3115  mtr_memo_push(mtr, block, fix_type);
3116 
3117 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3118  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3119  ut_a(block->page.buf_fix_count > 0);
3121 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3122 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3123  if (mode != BUF_KEEP_OLD) {
3124  /* If mode == BUF_KEEP_OLD, we are executing an I/O
3125  completion routine. Avoid a bogus assertion failure
3126  when ibuf_merge_or_delete_for_page() is processing a
3127  page that was just freed due to DROP INDEX, or
3128  deleting a record from SYS_INDEXES. This check will be
3129  skipped in recv_recover_page() as well. */
3130 
3131  mutex_enter(&block->mutex);
3132  ut_a(!block->page.file_page_was_freed);
3133  mutex_exit(&block->mutex);
3134  }
3135 #endif
3136 
3137 #ifdef UNIV_IBUF_COUNT_DEBUG
3138  ut_a((mode == BUF_KEEP_OLD)
3139  || (ibuf_count_get(buf_block_get_space(block),
3140  buf_block_get_page_no(block)) == 0));
3141 #endif
3142  buf_pool->stat.n_page_gets++;
3143 
3144  return(TRUE);
3145 }
3146 
3147 /*******************************************************************/
3152 UNIV_INTERN
3153 const buf_block_t*
3155 /*==================*/
3156  ulint space_id,
3157  ulint page_no,
3158  const char* file,
3159  ulint line,
3160  mtr_t* mtr)
3161 {
3162  buf_block_t* block;
3163  ibool success;
3164  ulint fix_type;
3165  buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
3166  rw_lock_t* hash_lock;
3167 
3168  ut_ad(mtr);
3169  ut_ad(mtr->state == MTR_ACTIVE);
3170 
3171  block = buf_block_hash_get_s_locked(buf_pool, space_id,
3172  page_no, &hash_lock);
3173 
3174  if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3175  if (block) {
3176  rw_lock_s_unlock(hash_lock);
3177  }
3178  return(NULL);
3179  }
3180 
3181  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3182 
3183  mutex_enter(&block->mutex);
3184  rw_lock_s_unlock(hash_lock);
3185 
3186 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3188  ut_a(buf_block_get_space(block) == space_id);
3189  ut_a(buf_block_get_page_no(block) == page_no);
3190 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3191 
3192  buf_block_buf_fix_inc(block, file, line);
3193  mutex_exit(&block->mutex);
3194 
3195  fix_type = MTR_MEMO_PAGE_S_FIX;
3196  success = rw_lock_s_lock_nowait(&block->lock, file, line);
3197 
3198  if (!success) {
3199  /* Let us try to get an X-latch. If the current thread
3200  is holding an X-latch on the page, we cannot get an
3201  S-latch. */
3202 
3203  fix_type = MTR_MEMO_PAGE_X_FIX;
3204  success = rw_lock_x_lock_func_nowait_inline(&block->lock,
3205  file, line);
3206  }
3207 
3208  if (!success) {
3209  mutex_enter(&block->mutex);
3210  buf_block_buf_fix_dec(block);
3211  mutex_exit(&block->mutex);
3212 
3213  return(NULL);
3214  }
3215 
3216  mtr_memo_push(mtr, block, fix_type);
3217 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3218  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3219  ut_a(block->page.buf_fix_count > 0);
3221 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3222 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3223  mutex_enter(&block->mutex);
3224  ut_a(!block->page.file_page_was_freed);
3225  mutex_exit(&block->mutex);
3226 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
3227  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3228 
3229  buf_pool->stat.n_page_gets++;
3230 
3231 #ifdef UNIV_IBUF_COUNT_DEBUG
3232  ut_a(ibuf_count_get(buf_block_get_space(block),
3233  buf_block_get_page_no(block)) == 0);
3234 #endif
3235 
3236  return(block);
3237 }
3238 
3239 /********************************************************************/
3241 UNIV_INLINE
3242 void
3244 /*==============*/
3245  buf_page_t* bpage)
3246 {
3247  bpage->flush_type = BUF_FLUSH_LRU;
3248  bpage->io_fix = BUF_IO_NONE;
3249  bpage->buf_fix_count = 0;
3250  bpage->freed_page_clock = 0;
3251  bpage->access_time = 0;
3252  bpage->newest_modification = 0;
3253  bpage->oldest_modification = 0;
3254  HASH_INVALIDATE(bpage, hash);
3255 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3256  bpage->file_page_was_freed = FALSE;
3257 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
3258 }
3259 
3260 /********************************************************************/
3262 static __attribute__((nonnull))
3263 void
3264 buf_page_init(
3265 /*==========*/
3266  buf_pool_t* buf_pool,
3267  ulint space,
3268  ulint offset,
3270  ulint fold,
3271  ulint zip_size,
3272  buf_block_t* block)
3273 {
3274  buf_page_t* hash_page;
3275 
3276  ut_ad(buf_pool == buf_pool_get(space, offset));
3277  ut_ad(buf_pool_mutex_own(buf_pool));
3278 
3279  ut_ad(mutex_own(&(block->mutex)));
3281 
3282 #ifdef UNIV_SYNC_DEBUG
3283  ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, fold),
3284  RW_LOCK_EX));
3285 #endif /* UNIV_SYNC_DEBUG */
3286 
3287  /* Set the state of the block */
3288  buf_block_set_file_page(block, space, offset);
3289 
3290 #ifdef UNIV_DEBUG_VALGRIND
3291  if (!space) {
3292  /* Silence valid Valgrind warnings about uninitialized
3293  data being written to data files. There are some unused
3294  bytes on some pages that InnoDB does not initialize. */
3295  UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3296  }
3297 #endif /* UNIV_DEBUG_VALGRIND */
3298 
3299  buf_block_init_low(block);
3300 
3301  block->lock_hash_val = lock_rec_hash(space, offset);
3302 
3303  buf_page_init_low(&block->page);
3304 
3305  /* Insert into the hash table of file pages */
3306 
3307  hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3308 
3309  if (UNIV_LIKELY(!hash_page)) {
3310  } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3311  /* Preserve the reference count. */
3312  ulint buf_fix_count = hash_page->buf_fix_count;
3313 
3314  ut_a(buf_fix_count > 0);
3315  block->page.buf_fix_count += buf_fix_count;
3316  buf_pool_watch_remove(buf_pool, fold, hash_page);
3317  } else {
3318  fprintf(stderr,
3319  "InnoDB: Error: page %lu %lu already found"
3320  " in the hash table: %p, %p\n",
3321  (ulong) space,
3322  (ulong) offset,
3323  (const void*) hash_page, (const void*) block);
3324 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3325  mutex_exit(&block->mutex);
3326  buf_pool_mutex_exit(buf_pool);
3327  buf_print();
3328  buf_LRU_print();
3329  buf_validate();
3330  buf_LRU_validate();
3331 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3332  ut_error;
3333  }
3334 
3335  ut_ad(!block->page.in_zip_hash);
3336  ut_ad(!block->page.in_page_hash);
3337  ut_d(block->page.in_page_hash = TRUE);
3338  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3339  fold, &block->page);
3340  if (zip_size) {
3341  page_zip_set_size(&block->page.zip, zip_size);
3342  }
3343 }
3344 
3345 /********************************************************************/
3355 UNIV_INTERN
3356 buf_page_t*
3358 /*===================*/
3359  dberr_t* err,
3360  ulint mode,
3361  ulint space,
3362  ulint zip_size,
3363  ibool unzip,
3364  ib_int64_t tablespace_version,
3368  ulint offset)
3369 {
3370  buf_block_t* block;
3371  buf_page_t* bpage = NULL;
3372  buf_page_t* watch_page;
3373  rw_lock_t* hash_lock;
3374  mtr_t mtr;
3375  ulint fold;
3376  ibool lru = FALSE;
3377  void* data;
3378  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3379 
3380  ut_ad(buf_pool);
3381 
3382  *err = DB_SUCCESS;
3383 
3384  if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3385  /* It is a read-ahead within an ibuf routine */
3386 
3387  ut_ad(!ibuf_bitmap_page(zip_size, offset));
3388 
3389  ibuf_mtr_start(&mtr);
3390 
3392  && !ibuf_page(space, zip_size, offset, &mtr)) {
3393 
3394  ibuf_mtr_commit(&mtr);
3395 
3396  return(NULL);
3397  }
3398  } else {
3399  ut_ad(mode == BUF_READ_ANY_PAGE);
3400  }
3401 
3402  if (zip_size && !unzip && !recv_recovery_is_on()) {
3403  block = NULL;
3404  } else {
3405  block = buf_LRU_get_free_block(buf_pool);
3406  ut_ad(block);
3407  ut_ad(buf_pool_from_block(block) == buf_pool);
3408  }
3409 
3410  fold = buf_page_address_fold(space, offset);
3411  hash_lock = buf_page_hash_lock_get(buf_pool, fold);
3412 
3413  buf_pool_mutex_enter(buf_pool);
3414  rw_lock_x_lock(hash_lock);
3415 
3416  watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3417  if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3418  /* The page is already in the buffer pool. */
3419  watch_page = NULL;
3420 err_exit:
3421  rw_lock_x_unlock(hash_lock);
3422  if (block) {
3423  mutex_enter(&block->mutex);
3425  mutex_exit(&block->mutex);
3426  }
3427 
3428  bpage = NULL;
3429  goto func_exit;
3430  }
3431 
3433  space, tablespace_version)) {
3434  /* The page belongs to a space which has been
3435  deleted or is being deleted. */
3436  *err = DB_TABLESPACE_DELETED;
3437 
3438  goto err_exit;
3439  }
3440 
3441  if (block) {
3442  bpage = &block->page;
3443 
3444  mutex_enter(&block->mutex);
3445 
3446  ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3447 
3448  buf_page_init(buf_pool, space, offset, fold, zip_size, block);
3449  rw_lock_x_unlock(hash_lock);
3450 
3451  /* The block must be put to the LRU list, to the old blocks */
3452  buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3453 
3454  /* We set a pass-type x-lock on the frame because then
3455  the same thread which called for the read operation
3456  (and is running now at this point of code) can wait
3457  for the read to complete by waiting for the x-lock on
3458  the frame; if the x-lock were recursive, the same
3459  thread would illegally get the x-lock before the page
3460  read is completed. The x-lock is cleared by the
3461  io-handler thread. */
3462 
3463  rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3465 
3466  if (zip_size) {
3467  /* buf_pool->mutex may be released and
3468  reacquired by buf_buddy_alloc(). Thus, we
3469  must release block->mutex in order not to
3470  break the latching order in the reacquisition
3471  of buf_pool->mutex. We also must defer this
3472  operation until after the block descriptor has
3473  been added to buf_pool->LRU and
3474  buf_pool->page_hash. */
3475  mutex_exit(&block->mutex);
3476  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3477  mutex_enter(&block->mutex);
3478  block->page.zip.data = (page_zip_t*) data;
3479 
3480  /* To maintain the invariant
3481  block->in_unzip_LRU_list
3482  == buf_page_belongs_to_unzip_LRU(&block->page)
3483  we have to add this block to unzip_LRU
3484  after block->page.zip.data is set. */
3486  buf_unzip_LRU_add_block(block, TRUE);
3487  }
3488 
3489  mutex_exit(&block->mutex);
3490  } else {
3491  rw_lock_x_unlock(hash_lock);
3492 
3493  /* The compressed page must be allocated before the
3494  control block (bpage), in order to avoid the
3495  invocation of buf_buddy_relocate_block() on
3496  uninitialized data. */
3497  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3498 
3499  rw_lock_x_lock(hash_lock);
3500 
3501  /* If buf_buddy_alloc() allocated storage from the LRU list,
3502  it released and reacquired buf_pool->mutex. Thus, we must
3503  check the page_hash again, as it may have been modified. */
3504  if (UNIV_UNLIKELY(lru)) {
3505 
3506  watch_page = buf_page_hash_get_low(
3507  buf_pool, space, offset, fold);
3508 
3509  if (UNIV_UNLIKELY(watch_page
3510  && !buf_pool_watch_is_sentinel(buf_pool,
3511  watch_page))) {
3512 
3513  /* The block was added by some other thread. */
3514  rw_lock_x_unlock(hash_lock);
3515  watch_page = NULL;
3516  buf_buddy_free(buf_pool, data, zip_size);
3517 
3518  bpage = NULL;
3519  goto func_exit;
3520  }
3521  }
3522 
3523  bpage = buf_page_alloc_descriptor();
3524 
3525  /* Initialize the buf_pool pointer. */
3526  bpage->buf_pool_index = buf_pool_index(buf_pool);
3527 
3528  page_zip_des_init(&bpage->zip);
3529  page_zip_set_size(&bpage->zip, zip_size);
3530  bpage->zip.data = (page_zip_t*) data;
3531 
3532  mutex_enter(&buf_pool->zip_mutex);
3533  UNIV_MEM_DESC(bpage->zip.data,
3534  page_zip_get_size(&bpage->zip));
3535 
3536  buf_page_init_low(bpage);
3537 
3538  bpage->state = BUF_BLOCK_ZIP_PAGE;
3539  bpage->space = space;
3540  bpage->offset = offset;
3541 
3542 #ifdef UNIV_DEBUG
3543  bpage->in_page_hash = FALSE;
3544  bpage->in_zip_hash = FALSE;
3545  bpage->in_flush_list = FALSE;
3546  bpage->in_free_list = FALSE;
3547  bpage->in_LRU_list = FALSE;
3548 #endif /* UNIV_DEBUG */
3549 
3550  ut_d(bpage->in_page_hash = TRUE);
3551 
3552  if (UNIV_LIKELY_NULL(watch_page)) {
3553 
3554  /* Preserve the reference count. */
3555  ulint buf_fix_count = watch_page->buf_fix_count;
3556  ut_a(buf_fix_count > 0);
3557  bpage->buf_fix_count += buf_fix_count;
3558  ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3559  buf_pool_watch_remove(buf_pool, fold, watch_page);
3560  }
3561 
3562  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3563  bpage);
3564 
3565  rw_lock_x_unlock(hash_lock);
3566 
3567  /* The block must be put to the LRU list, to the old blocks.
3568  The zip_size is already set into the page zip */
3569  buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3570 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3571  buf_LRU_insert_zip_clean(bpage);
3572 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3573 
3575 
3576  mutex_exit(&buf_pool->zip_mutex);
3577  }
3578 
3579  buf_pool->n_pend_reads++;
3580 func_exit:
3581  buf_pool_mutex_exit(buf_pool);
3582 
3583  if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3584 
3585  ibuf_mtr_commit(&mtr);
3586  }
3587 
3588 
3589 #ifdef UNIV_SYNC_DEBUG
3590  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
3591  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
3592 #endif /* UNIV_SYNC_DEBUG */
3593 
3594  ut_ad(!bpage || buf_page_in_file(bpage));
3595  return(bpage);
3596 }
3597 
3598 /********************************************************************/
3604 UNIV_INTERN
3605 buf_block_t*
3607 /*============*/
3608  ulint space,
3609  ulint offset,
3611  ulint zip_size,
3612  mtr_t* mtr)
3613 {
3614  buf_frame_t* frame;
3615  buf_block_t* block;
3616  ulint fold;
3617  buf_block_t* free_block = NULL;
3618  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3619  rw_lock_t* hash_lock;
3620 
3621  ut_ad(mtr);
3622  ut_ad(mtr->state == MTR_ACTIVE);
3623  ut_ad(space || !zip_size);
3624 
3625  free_block = buf_LRU_get_free_block(buf_pool);
3626 
3627  fold = buf_page_address_fold(space, offset);
3628  hash_lock = buf_page_hash_lock_get(buf_pool, fold);
3629 
3630  buf_pool_mutex_enter(buf_pool);
3631  rw_lock_x_lock(hash_lock);
3632 
3634  buf_pool, space, offset, fold);
3635 
3636  if (block
3637  && buf_page_in_file(&block->page)
3638  && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3639 #ifdef UNIV_IBUF_COUNT_DEBUG
3640  ut_a(ibuf_count_get(space, offset) == 0);
3641 #endif
3642 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3643  block->page.file_page_was_freed = FALSE;
3644 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
3645 
3646  /* Page can be found in buf_pool */
3647  buf_pool_mutex_exit(buf_pool);
3648  rw_lock_x_unlock(hash_lock);
3649 
3650  buf_block_free(free_block);
3651 
3652  return(buf_page_get_with_no_latch(space, zip_size,
3653  offset, mtr));
3654  }
3655 
3656  /* If we get here, the page was not in buf_pool: init it there */
3657 
3658 #ifdef UNIV_DEBUG
3659  if (buf_debug_prints) {
3660  fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3661  (ulong) space, (ulong) offset);
3662  }
3663 #endif /* UNIV_DEBUG */
3664 
3665  block = free_block;
3666 
3667  mutex_enter(&block->mutex);
3668 
3669  buf_page_init(buf_pool, space, offset, fold, zip_size, block);
3670 
3671  rw_lock_x_unlock(hash_lock);
3672 
3673  /* The block must be put to the LRU list */
3674  buf_LRU_add_block(&block->page, FALSE);
3675 
3676  buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3677  buf_pool->stat.n_pages_created++;
3678 
3679  if (zip_size) {
3680  void* data;
3681  ibool lru;
3682 
3683  /* Prevent race conditions during buf_buddy_alloc(),
3684  which may release and reacquire buf_pool->mutex,
3685  by IO-fixing and X-latching the block. */
3686 
3688  rw_lock_x_lock(&block->lock);
3689 
3690  mutex_exit(&block->mutex);
3691  /* buf_pool->mutex may be released and reacquired by
3692  buf_buddy_alloc(). Thus, we must release block->mutex
3693  in order not to break the latching order in
3694  the reacquisition of buf_pool->mutex. We also must
3695  defer this operation until after the block descriptor
3696  has been added to buf_pool->LRU and buf_pool->page_hash. */
3697  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3698  mutex_enter(&block->mutex);
3699  block->page.zip.data = (page_zip_t*) data;
3700 
3701  /* To maintain the invariant
3702  block->in_unzip_LRU_list
3703  == buf_page_belongs_to_unzip_LRU(&block->page)
3704  we have to add this block to unzip_LRU after
3705  block->page.zip.data is set. */
3707  buf_unzip_LRU_add_block(block, FALSE);
3708 
3710  rw_lock_x_unlock(&block->lock);
3711  }
3712 
3713  buf_pool_mutex_exit(buf_pool);
3714 
3715  mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3716 
3717  buf_page_set_accessed(&block->page);
3718 
3719  mutex_exit(&block->mutex);
3720 
3721  /* Delete possible entries for the page from the insert buffer:
3722  such can exist if the page belonged to an index which was dropped */
3723 
3724  ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3725 
3726  frame = block->frame;
3727 
3728  memset(frame + FIL_PAGE_PREV, 0xff, 4);
3729  memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3731 
3732  /* Reset to zero the file flush lsn field in the page; if the first
3733  page of an ibdata file is 'created' in this function into the buffer
3734  pool then we lose the original contents of the file flush lsn stamp.
3735  Then InnoDB could in a crash recovery print a big, false, corruption
3736  warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3737 
3738  memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3739 
3740 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3741  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3742 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3743 #ifdef UNIV_IBUF_COUNT_DEBUG
3744  ut_a(ibuf_count_get(buf_block_get_space(block),
3745  buf_block_get_page_no(block)) == 0);
3746 #endif
3747  return(block);
3748 }
3749 
3750 /********************************************************************/
3754 static
3755 void
3756 buf_page_monitor(
3757 /*=============*/
3758  const buf_page_t* bpage,
3759  enum buf_io_fix io_type)
3760 {
3761  const byte* frame;
3762  monitor_id_t counter;
3763 
3764  /* If the counter module is not turned on, just return */
3765  if (!MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)) {
3766  return;
3767  }
3768 
3769  ut_a(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3770 
3771  frame = bpage->zip.data
3772  ? bpage->zip.data
3773  : ((buf_block_t*) bpage)->frame;
3774 
3775  switch (fil_page_get_type(frame)) {
3776  ulint level;
3777 
3778  case FIL_PAGE_INDEX:
3779  level = btr_page_get_level_low(frame);
3780 
3781  /* Check if it is an index page for insert buffer */
3782  if (btr_page_get_index_id(frame)
3783  == (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) {
3784  if (level == 0) {
3785  counter = MONITOR_RW_COUNTER(
3786  io_type, MONITOR_INDEX_IBUF_LEAF_PAGE);
3787  } else {
3788  counter = MONITOR_RW_COUNTER(
3789  io_type,
3790  MONITOR_INDEX_IBUF_NON_LEAF_PAGE);
3791  }
3792  } else {
3793  if (level == 0) {
3794  counter = MONITOR_RW_COUNTER(
3795  io_type, MONITOR_INDEX_LEAF_PAGE);
3796  } else {
3797  counter = MONITOR_RW_COUNTER(
3798  io_type, MONITOR_INDEX_NON_LEAF_PAGE);
3799  }
3800  }
3801  break;
3802 
3803  case FIL_PAGE_UNDO_LOG:
3804  counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE);
3805  break;
3806 
3807  case FIL_PAGE_INODE:
3808  counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE);
3809  break;
3810 
3812  counter = MONITOR_RW_COUNTER(io_type,
3813  MONITOR_IBUF_FREELIST_PAGE);
3814  break;
3815 
3816  case FIL_PAGE_IBUF_BITMAP:
3817  counter = MONITOR_RW_COUNTER(io_type,
3818  MONITOR_IBUF_BITMAP_PAGE);
3819  break;
3820 
3821  case FIL_PAGE_TYPE_SYS:
3822  counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE);
3823  break;
3824 
3825  case FIL_PAGE_TYPE_TRX_SYS:
3826  counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE);
3827  break;
3828 
3829  case FIL_PAGE_TYPE_FSP_HDR:
3830  counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE);
3831  break;
3832 
3833  case FIL_PAGE_TYPE_XDES:
3834  counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE);
3835  break;
3836 
3837  case FIL_PAGE_TYPE_BLOB:
3838  counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE);
3839  break;
3840 
3841  case FIL_PAGE_TYPE_ZBLOB:
3842  counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE);
3843  break;
3844 
3845  case FIL_PAGE_TYPE_ZBLOB2:
3846  counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE);
3847  break;
3848 
3849  default:
3850  counter = MONITOR_RW_COUNTER(io_type, MONITOR_OTHER_PAGE);
3851  }
3852 
3853  MONITOR_INC_NOCHECK(counter);
3854 }
3855 
3856 /********************************************************************/
3860 static
3861 ibool
3862 buf_mark_space_corrupt(
3863 /*===================*/
3864  buf_page_t* bpage)
3865 {
3866  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3867  const ibool uncompressed = (buf_page_get_state(bpage)
3868  == BUF_BLOCK_FILE_PAGE);
3869  ulint space = bpage->space;
3870  ibool ret = TRUE;
3871 
3872  /* First unfix and release lock on the bpage */
3873  buf_pool_mutex_enter(buf_pool);
3874  mutex_enter(buf_page_get_mutex(bpage));
3876  ut_ad(bpage->buf_fix_count == 0);
3877 
3878  /* Set BUF_IO_NONE before we remove the block from LRU list */
3880 
3881  if (uncompressed) {
3882  rw_lock_x_unlock_gen(
3883  &((buf_block_t*) bpage)->lock,
3884  BUF_IO_READ);
3885  }
3886 
3887  mutex_exit(buf_page_get_mutex(bpage));
3888 
3889  /* Find the table with specified space id, and mark it corrupted */
3890  if (dict_set_corrupted_by_space(space)) {
3891  buf_LRU_free_one_page(bpage);
3892  } else {
3893  ret = FALSE;
3894  }
3895 
3896  ut_ad(buf_pool->n_pend_reads > 0);
3897  buf_pool->n_pend_reads--;
3898 
3899  buf_pool_mutex_exit(buf_pool);
3900 
3901  return(ret);
3902 }
3903 
3904 /********************************************************************/
3908 UNIV_INTERN
3909 bool
3911 /*=================*/
3912  buf_page_t* bpage)
3913 {
3914  enum buf_io_fix io_type;
3915  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3916  const ibool uncompressed = (buf_page_get_state(bpage)
3917  == BUF_BLOCK_FILE_PAGE);
3918 
3919  ut_a(buf_page_in_file(bpage));
3920 
3921  /* We do not need protect io_fix here by mutex to read
3922  it because this is the only function where we can change the value
3923  from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3924  ensures that this is the only thread that handles the i/o for this
3925  block. */
3926 
3927  io_type = buf_page_get_io_fix(bpage);
3928  ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3929 
3930  if (io_type == BUF_IO_READ) {
3931  ulint read_page_no;
3932  ulint read_space_id;
3933  byte* frame;
3934 
3935  if (buf_page_get_zip_size(bpage)) {
3936  frame = bpage->zip.data;
3937  buf_pool->n_pend_unzip++;
3938  if (uncompressed
3939  && !buf_zip_decompress((buf_block_t*) bpage,
3940  FALSE)) {
3941 
3942  buf_pool->n_pend_unzip--;
3943  goto corrupt;
3944  }
3945  buf_pool->n_pend_unzip--;
3946  } else {
3947  ut_a(uncompressed);
3948  frame = ((buf_block_t*) bpage)->frame;
3949  }
3950 
3951  /* If this page is not uninitialized and not in the
3952  doublewrite buffer, then the page number and space id
3953  should be the same as in block. */
3954  read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3955  read_space_id = mach_read_from_4(
3957 
3958  if (bpage->space == TRX_SYS_SPACE
3959  && buf_dblwr_page_inside(bpage->offset)) {
3960 
3961  ut_print_timestamp(stderr);
3962  fprintf(stderr,
3963  " InnoDB: Error: reading page %lu\n"
3964  "InnoDB: which is in the"
3965  " doublewrite buffer!\n",
3966  (ulong) bpage->offset);
3967  } else if (!read_space_id && !read_page_no) {
3968  /* This is likely an uninitialized page. */
3969  } else if ((bpage->space
3970  && bpage->space != read_space_id)
3971  || bpage->offset != read_page_no) {
3972  /* We did not compare space_id to read_space_id
3973  if bpage->space == 0, because the field on the
3974  page may contain garbage in MySQL < 4.1.1,
3975  which only supported bpage->space == 0. */
3976 
3977  ut_print_timestamp(stderr);
3978  fprintf(stderr,
3979  " InnoDB: Error: space id and page n:o"
3980  " stored in the page\n"
3981  "InnoDB: read in are %lu:%lu,"
3982  " should be %lu:%lu!\n",
3983  (ulong) read_space_id, (ulong) read_page_no,
3984  (ulong) bpage->space,
3985  (ulong) bpage->offset);
3986  }
3987 
3988  /* From version 3.23.38 up we store the page checksum
3989  to the 4 first bytes of the page end lsn field */
3990 
3991  if (buf_page_is_corrupted(true, frame,
3992  buf_page_get_zip_size(bpage))) {
3993 
3994  /* Not a real corruption if it was triggered by
3995  error injection */
3996  DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
3997  if (bpage->space > TRX_SYS_SPACE
3998  && buf_mark_space_corrupt(bpage)) {
3999  ib_logf(IB_LOG_LEVEL_INFO,
4000  "Simulated page corruption");
4001  return(true);
4002  }
4003  goto page_not_corrupt;
4004  ;);
4005 corrupt:
4006  fprintf(stderr,
4007  "InnoDB: Database page corruption on disk"
4008  " or a failed\n"
4009  "InnoDB: file read of page %lu.\n"
4010  "InnoDB: You may have to recover"
4011  " from a backup.\n",
4012  (ulong) bpage->offset);
4013  buf_page_print(frame, buf_page_get_zip_size(bpage),
4015  fprintf(stderr,
4016  "InnoDB: Database page corruption on disk"
4017  " or a failed\n"
4018  "InnoDB: file read of page %lu.\n"
4019  "InnoDB: You may have to recover"
4020  " from a backup.\n",
4021  (ulong) bpage->offset);
4022  fputs("InnoDB: It is also possible that"
4023  " your operating\n"
4024  "InnoDB: system has corrupted its"
4025  " own file cache\n"
4026  "InnoDB: and rebooting your computer"
4027  " removes the\n"
4028  "InnoDB: error.\n"
4029  "InnoDB: If the corrupt page is an index page\n"
4030  "InnoDB: you can also try to"
4031  " fix the corruption\n"
4032  "InnoDB: by dumping, dropping,"
4033  " and reimporting\n"
4034  "InnoDB: the corrupt table."
4035  " You can use CHECK\n"
4036  "InnoDB: TABLE to scan your"
4037  " table for corruption.\n"
4038  "InnoDB: See also "
4039  REFMAN "forcing-innodb-recovery.html\n"
4040  "InnoDB: about forcing recovery.\n", stderr);
4041 
4043  /* If page space id is larger than TRX_SYS_SPACE
4044  (0), we will attempt to mark the corresponding
4045  table as corrupted instead of crashing server */
4046  if (bpage->space > TRX_SYS_SPACE
4047  && buf_mark_space_corrupt(bpage)) {
4048  return(false);
4049  } else {
4050  fputs("InnoDB: Ending processing"
4051  " because of"
4052  " a corrupt database page.\n",
4053  stderr);
4054  ut_error;
4055  }
4056  }
4057  }
4058 
4059  DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
4060  page_not_corrupt: bpage = bpage; );
4061 
4062  if (recv_recovery_is_on()) {
4063  /* Pages must be uncompressed for crash recovery. */
4064  ut_a(uncompressed);
4065  recv_recover_page(TRUE, (buf_block_t*) bpage);
4066  }
4067 
4068  if (uncompressed && !recv_no_ibuf_operations) {
4070  (buf_block_t*) bpage, bpage->space,
4071  bpage->offset, buf_page_get_zip_size(bpage),
4072  TRUE);
4073  }
4074  }
4075 
4076  buf_pool_mutex_enter(buf_pool);
4077  mutex_enter(buf_page_get_mutex(bpage));
4078 
4079 #ifdef UNIV_IBUF_COUNT_DEBUG
4080  if (io_type == BUF_IO_WRITE || uncompressed) {
4081  /* For BUF_IO_READ of compressed-only blocks, the
4082  buffered operations will be merged by buf_page_get_gen()
4083  after the block has been uncompressed. */
4084  ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4085  }
4086 #endif
4087  /* Because this thread which does the unlocking is not the same that
4088  did the locking, we use a pass value != 0 in unlock, which simply
4089  removes the newest lock debug record, without checking the thread
4090  id. */
4091 
4093 
4094  switch (io_type) {
4095  case BUF_IO_READ:
4096  /* NOTE that the call to ibuf may have moved the ownership of
4097  the x-latch to this OS thread: do not let this confuse you in
4098  debugging! */
4099 
4100  ut_ad(buf_pool->n_pend_reads > 0);
4101  buf_pool->n_pend_reads--;
4102  buf_pool->stat.n_pages_read++;
4103 
4104  if (uncompressed) {
4105  rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4106  BUF_IO_READ);
4107  }
4108 
4109  break;
4110 
4111  case BUF_IO_WRITE:
4112  /* Write means a flush operation: call the completion
4113  routine in the flush system */
4114 
4115  buf_flush_write_complete(bpage);
4116 
4117  if (uncompressed) {
4118  rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4119  BUF_IO_WRITE);
4120  }
4121 
4122  buf_pool->stat.n_pages_written++;
4123 
4124  break;
4125 
4126  default:
4127  ut_error;
4128  }
4129 
4130  buf_page_monitor(bpage, io_type);
4131 
4132 #ifdef UNIV_DEBUG
4133  if (buf_debug_prints) {
4134  fprintf(stderr, "Has %s page space %lu page no %lu\n",
4135  io_type == BUF_IO_READ ? "read" : "written",
4136  (ulong) buf_page_get_space(bpage),
4137  (ulong) buf_page_get_page_no(bpage));
4138  }
4139 #endif /* UNIV_DEBUG */
4140 
4141  mutex_exit(buf_page_get_mutex(bpage));
4142  buf_pool_mutex_exit(buf_pool);
4143 
4144  return(true);
4145 }
4146 
4147 /*********************************************************************/
4150 static
4151 ibool
4152 buf_all_freed_instance(
4153 /*===================*/
4154  buf_pool_t* buf_pool)
4155 {
4156  ulint i;
4157  buf_chunk_t* chunk;
4158 
4159  ut_ad(buf_pool);
4160 
4161  buf_pool_mutex_enter(buf_pool);
4162 
4163  chunk = buf_pool->chunks;
4164 
4165  for (i = buf_pool->n_chunks; i--; chunk++) {
4166 
4167  const buf_block_t* block = buf_chunk_not_freed(chunk);
4168 
4169  if (UNIV_LIKELY_NULL(block)) {
4170  fprintf(stderr,
4171  "Page %lu %lu still fixed or dirty\n",
4172  (ulong) block->page.space,
4173  (ulong) block->page.offset);
4174  ut_error;
4175  }
4176  }
4177 
4178  buf_pool_mutex_exit(buf_pool);
4179 
4180  return(TRUE);
4181 }
4182 
4183 /*********************************************************************/
4185 static
4186 void
4187 buf_pool_invalidate_instance(
4188 /*=========================*/
4189  buf_pool_t* buf_pool)
4190 {
4191  ulint i;
4192 
4193  buf_pool_mutex_enter(buf_pool);
4194 
4195  for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4196 
4197  /* As this function is called during startup and
4198  during redo application phase during recovery, InnoDB
4199  is single threaded (apart from IO helper threads) at
4200  this stage. No new write batch can be in intialization
4201  stage at this point. */
4202  ut_ad(buf_pool->init_flush[i] == FALSE);
4203 
4204  /* However, it is possible that a write batch that has
4205  been posted earlier is still not complete. For buffer
4206  pool invalidation to proceed we must ensure there is NO
4207  write activity happening. */
4208  if (buf_pool->n_flush[i] > 0) {
4209  buf_flush_t type = static_cast<buf_flush_t>(i);
4210 
4211  buf_pool_mutex_exit(buf_pool);
4212  buf_flush_wait_batch_end(buf_pool, type);
4213  buf_pool_mutex_enter(buf_pool);
4214  }
4215  }
4216 
4217  buf_pool_mutex_exit(buf_pool);
4218 
4219  ut_ad(buf_all_freed_instance(buf_pool));
4220 
4221  buf_pool_mutex_enter(buf_pool);
4222 
4223  while (buf_LRU_scan_and_free_block(buf_pool, TRUE)) {
4224  }
4225 
4226  ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4227  ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4228 
4229  buf_pool->freed_page_clock = 0;
4230  buf_pool->LRU_old = NULL;
4231  buf_pool->LRU_old_len = 0;
4232 
4233  memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4234  buf_refresh_io_stats(buf_pool);
4235 
4236  buf_pool_mutex_exit(buf_pool);
4237 }
4238 
4239 /*********************************************************************/
4243 UNIV_INTERN
4244 void
4245 buf_pool_invalidate(void)
4246 /*=====================*/
4247 {
4248  ulint i;
4249 
4250  for (i = 0; i < srv_buf_pool_instances; i++) {
4251  buf_pool_invalidate_instance(buf_pool_from_array(i));
4252  }
4253 }
4254 
4255 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4256 /*********************************************************************/
4259 static
4260 ibool
4261 buf_pool_validate_instance(
4262 /*=======================*/
4263  buf_pool_t* buf_pool)
4264 {
4265  buf_page_t* b;
4266  buf_chunk_t* chunk;
4267  ulint i;
4268  ulint n_lru_flush = 0;
4269  ulint n_page_flush = 0;
4270  ulint n_list_flush = 0;
4271  ulint n_lru = 0;
4272  ulint n_flush = 0;
4273  ulint n_free = 0;
4274  ulint n_zip = 0;
4275  ulint fold = 0;
4276  ulint space = 0;
4277  ulint offset = 0;
4278 
4279  ut_ad(buf_pool);
4280 
4281  buf_pool_mutex_enter(buf_pool);
4282  hash_lock_x_all(buf_pool->page_hash);
4283 
4284  chunk = buf_pool->chunks;
4285 
4286  /* Check the uncompressed blocks. */
4287 
4288  for (i = buf_pool->n_chunks; i--; chunk++) {
4289 
4290  ulint j;
4291  buf_block_t* block = chunk->blocks;
4292 
4293  for (j = chunk->size; j--; block++) {
4294 
4295  mutex_enter(&block->mutex);
4296 
4297  switch (buf_block_get_state(block)) {
4298  case BUF_BLOCK_POOL_WATCH:
4299  case BUF_BLOCK_ZIP_PAGE:
4300  case BUF_BLOCK_ZIP_DIRTY:
4301  /* These should only occur on
4302  zip_clean, zip_free[], or flush_list. */
4303  ut_error;
4304  break;
4305 
4306  case BUF_BLOCK_FILE_PAGE:
4307  space = buf_block_get_space(block);
4308  offset = buf_block_get_page_no(block);
4309  fold = buf_page_address_fold(space, offset);
4310  ut_a(buf_page_hash_get_low(buf_pool,
4311  space,
4312  offset,
4313  fold)
4314  == &block->page);
4315 
4316 #ifdef UNIV_IBUF_COUNT_DEBUG
4317  ut_a(buf_page_get_io_fix(&block->page)
4318  == BUF_IO_READ
4319  || !ibuf_count_get(buf_block_get_space(
4320  block),
4322  block)));
4323 #endif
4324  switch (buf_page_get_io_fix(&block->page)) {
4325  case BUF_IO_NONE:
4326  break;
4327 
4328  case BUF_IO_WRITE:
4329  switch (buf_page_get_flush_type(
4330  &block->page)) {
4331  case BUF_FLUSH_LRU:
4332  n_lru_flush++;
4333  goto assert_s_latched;
4334  case BUF_FLUSH_SINGLE_PAGE:
4335  n_page_flush++;
4336 assert_s_latched:
4338  &block->lock,
4339  RW_LOCK_SHARED));
4340  break;
4341  case BUF_FLUSH_LIST:
4342  n_list_flush++;
4343  break;
4344  default:
4345  ut_error;
4346  }
4347 
4348  break;
4349 
4350  case BUF_IO_READ:
4351 
4352  ut_a(rw_lock_is_locked(&block->lock,
4353  RW_LOCK_EX));
4354  break;
4355 
4356  case BUF_IO_PIN:
4357  break;
4358  }
4359 
4360  n_lru++;
4361  break;
4362 
4363  case BUF_BLOCK_NOT_USED:
4364  n_free++;
4365  break;
4366 
4368  case BUF_BLOCK_MEMORY:
4369  case BUF_BLOCK_REMOVE_HASH:
4370  /* do nothing */
4371  break;
4372  }
4373 
4374  mutex_exit(&block->mutex);
4375  }
4376  }
4377 
4378  mutex_enter(&buf_pool->zip_mutex);
4379 
4380  /* Check clean compressed-only blocks. */
4381 
4382  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4383  b = UT_LIST_GET_NEXT(list, b)) {
4385  switch (buf_page_get_io_fix(b)) {
4386  case BUF_IO_NONE:
4387  case BUF_IO_PIN:
4388  /* All clean blocks should be I/O-unfixed. */
4389  break;
4390  case BUF_IO_READ:
4391  /* In buf_LRU_free_page(), we temporarily set
4392  b->io_fix = BUF_IO_READ for a newly allocated
4393  control block in order to prevent
4394  buf_page_get_gen() from decompressing the block. */
4395  break;
4396  default:
4397  ut_error;
4398  break;
4399  }
4400 
4401  /* It is OK to read oldest_modification here because
4402  we have acquired buf_pool->zip_mutex above which acts
4403  as the 'block->mutex' for these bpages. */
4405  fold = buf_page_address_fold(b->space, b->offset);
4406  ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
4407  fold) == b);
4408  n_lru++;
4409  n_zip++;
4410  }
4411 
4412  /* Check dirty blocks. */
4413 
4414  buf_flush_list_mutex_enter(buf_pool);
4415  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4416  b = UT_LIST_GET_NEXT(list, b)) {
4417  ut_ad(b->in_flush_list);
4419  n_flush++;
4420 
4421  switch (buf_page_get_state(b)) {
4422  case BUF_BLOCK_ZIP_DIRTY:
4423  n_lru++;
4424  n_zip++;
4425  switch (buf_page_get_io_fix(b)) {
4426  case BUF_IO_NONE:
4427  case BUF_IO_READ:
4428  case BUF_IO_PIN:
4429  break;
4430  case BUF_IO_WRITE:
4431  switch (buf_page_get_flush_type(b)) {
4432  case BUF_FLUSH_LRU:
4433  n_lru_flush++;
4434  break;
4435  case BUF_FLUSH_SINGLE_PAGE:
4436  n_page_flush++;
4437  break;
4438  case BUF_FLUSH_LIST:
4439  n_list_flush++;
4440  break;
4441  default:
4442  ut_error;
4443  }
4444  break;
4445  }
4446  break;
4447  case BUF_BLOCK_FILE_PAGE:
4448  /* uncompressed page */
4449  break;
4450  case BUF_BLOCK_POOL_WATCH:
4451  case BUF_BLOCK_ZIP_PAGE:
4452  case BUF_BLOCK_NOT_USED:
4454  case BUF_BLOCK_MEMORY:
4455  case BUF_BLOCK_REMOVE_HASH:
4456  ut_error;
4457  break;
4458  }
4459  fold = buf_page_address_fold(b->space, b->offset);
4460  ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
4461  fold) == b);
4462  }
4463 
4464  ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4465 
4466  hash_unlock_x_all(buf_pool->page_hash);
4467  buf_flush_list_mutex_exit(buf_pool);
4468 
4469  mutex_exit(&buf_pool->zip_mutex);
4470 
4471  if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4472  fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4473  (ulong) n_lru, (ulong) n_free,
4474  (ulong) buf_pool->curr_size, (ulong) n_zip);
4475  ut_error;
4476  }
4477 
4478  ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4479  if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4480  fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4481  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4482  (ulong) n_free);
4483  ut_error;
4484  }
4485 
4486  ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4487  ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4488  ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush);
4489 
4490  buf_pool_mutex_exit(buf_pool);
4491 
4492  ut_a(buf_LRU_validate());
4493  ut_a(buf_flush_validate(buf_pool));
4494 
4495  return(TRUE);
4496 }
4497 
4498 /*********************************************************************/
4501 UNIV_INTERN
4502 ibool
4503 buf_validate(void)
4504 /*==============*/
4505 {
4506  ulint i;
4507 
4508  for (i = 0; i < srv_buf_pool_instances; i++) {
4509  buf_pool_t* buf_pool;
4510 
4511  buf_pool = buf_pool_from_array(i);
4512 
4513  buf_pool_validate_instance(buf_pool);
4514  }
4515  return(TRUE);
4516 }
4517 
4518 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4519 
4520 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4521 /*********************************************************************/
4523 static
4524 void
4525 buf_print_instance(
4526 /*===============*/
4527  buf_pool_t* buf_pool)
4528 {
4529  index_id_t* index_ids;
4530  ulint* counts;
4531  ulint size;
4532  ulint i;
4533  ulint j;
4534  index_id_t id;
4535  ulint n_found;
4536  buf_chunk_t* chunk;
4538 
4539  ut_ad(buf_pool);
4540 
4541  size = buf_pool->curr_size;
4542 
4543  index_ids = static_cast<index_id_t*>(
4544  mem_alloc(size * sizeof *index_ids));
4545 
4546  counts = static_cast<ulint*>(mem_alloc(sizeof(ulint) * size));
4547 
4548  buf_pool_mutex_enter(buf_pool);
4549  buf_flush_list_mutex_enter(buf_pool);
4550 
4551  fprintf(stderr,
4552  "buf_pool size %lu\n"
4553  "database pages %lu\n"
4554  "free pages %lu\n"
4555  "modified database pages %lu\n"
4556  "n pending decompressions %lu\n"
4557  "n pending reads %lu\n"
4558  "n pending flush LRU %lu list %lu single page %lu\n"
4559  "pages made young %lu, not young %lu\n"
4560  "pages read %lu, created %lu, written %lu\n",
4561  (ulong) size,
4562  (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4563  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4564  (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4565  (ulong) buf_pool->n_pend_unzip,
4566  (ulong) buf_pool->n_pend_reads,
4567  (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4568  (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4569  (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4570  (ulong) buf_pool->stat.n_pages_made_young,
4571  (ulong) buf_pool->stat.n_pages_not_made_young,
4572  (ulong) buf_pool->stat.n_pages_read,
4573  (ulong) buf_pool->stat.n_pages_created,
4574  (ulong) buf_pool->stat.n_pages_written);
4575 
4576  buf_flush_list_mutex_exit(buf_pool);
4577 
4578  /* Count the number of blocks belonging to each index in the buffer */
4579 
4580  n_found = 0;
4581 
4582  chunk = buf_pool->chunks;
4583 
4584  for (i = buf_pool->n_chunks; i--; chunk++) {
4585  buf_block_t* block = chunk->blocks;
4586  ulint n_blocks = chunk->size;
4587 
4588  for (; n_blocks--; block++) {
4589  const buf_frame_t* frame = block->frame;
4590 
4591  if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4592 
4593  id = btr_page_get_index_id(frame);
4594 
4595  /* Look for the id in the index_ids array */
4596  j = 0;
4597 
4598  while (j < n_found) {
4599 
4600  if (index_ids[j] == id) {
4601  counts[j]++;
4602 
4603  break;
4604  }
4605  j++;
4606  }
4607 
4608  if (j == n_found) {
4609  n_found++;
4610  index_ids[j] = id;
4611  counts[j] = 1;
4612  }
4613  }
4614  }
4615  }
4616 
4617  buf_pool_mutex_exit(buf_pool);
4618 
4619  for (i = 0; i < n_found; i++) {
4620  index = dict_index_get_if_in_cache(index_ids[i]);
4621 
4622  fprintf(stderr,
4623  "Block count for index %llu in buffer is about %lu",
4624  (ullint) index_ids[i],
4625  (ulong) counts[i]);
4626 
4627  if (index) {
4628  putc(' ', stderr);
4629  dict_index_name_print(stderr, NULL, index);
4630  }
4631 
4632  putc('\n', stderr);
4633  }
4634 
4635  mem_free(index_ids);
4636  mem_free(counts);
4637 
4638  ut_a(buf_pool_validate_instance(buf_pool));
4639 }
4640 
4641 /*********************************************************************/
4643 UNIV_INTERN
4644 void
4645 buf_print(void)
4646 /*===========*/
4647 {
4648  ulint i;
4649 
4650  for (i = 0; i < srv_buf_pool_instances; i++) {
4651  buf_pool_t* buf_pool;
4652 
4653  buf_pool = buf_pool_from_array(i);
4654  buf_print_instance(buf_pool);
4655  }
4656 }
4657 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4658 
4659 #ifdef UNIV_DEBUG
4660 /*********************************************************************/
4663 UNIV_INTERN
4664 ulint
4665 buf_get_latched_pages_number_instance(
4666 /*==================================*/
4667  buf_pool_t* buf_pool)
4668 {
4669  buf_page_t* b;
4670  ulint i;
4671  buf_chunk_t* chunk;
4672  ulint fixed_pages_number = 0;
4673 
4674  buf_pool_mutex_enter(buf_pool);
4675 
4676  chunk = buf_pool->chunks;
4677 
4678  for (i = buf_pool->n_chunks; i--; chunk++) {
4679  buf_block_t* block;
4680  ulint j;
4681 
4682  block = chunk->blocks;
4683 
4684  for (j = chunk->size; j--; block++) {
4685  if (buf_block_get_state(block)
4686  != BUF_BLOCK_FILE_PAGE) {
4687 
4688  continue;
4689  }
4690 
4691  mutex_enter(&block->mutex);
4692 
4693  if (block->page.buf_fix_count != 0
4694  || buf_page_get_io_fix(&block->page)
4695  != BUF_IO_NONE) {
4696  fixed_pages_number++;
4697  }
4698 
4699  mutex_exit(&block->mutex);
4700  }
4701  }
4702 
4703  mutex_enter(&buf_pool->zip_mutex);
4704 
4705  /* Traverse the lists of clean and dirty compressed-only blocks. */
4706 
4707  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4708  b = UT_LIST_GET_NEXT(list, b)) {
4711 
4712  if (b->buf_fix_count != 0
4713  || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4714  fixed_pages_number++;
4715  }
4716  }
4717 
4718  buf_flush_list_mutex_enter(buf_pool);
4719  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4720  b = UT_LIST_GET_NEXT(list, b)) {
4721  ut_ad(b->in_flush_list);
4722 
4723  switch (buf_page_get_state(b)) {
4724  case BUF_BLOCK_ZIP_DIRTY:
4725  if (b->buf_fix_count != 0
4726  || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4727  fixed_pages_number++;
4728  }
4729  break;
4730  case BUF_BLOCK_FILE_PAGE:
4731  /* uncompressed page */
4732  break;
4733  case BUF_BLOCK_POOL_WATCH:
4734  case BUF_BLOCK_ZIP_PAGE:
4735  case BUF_BLOCK_NOT_USED:
4737  case BUF_BLOCK_MEMORY:
4738  case BUF_BLOCK_REMOVE_HASH:
4739  ut_error;
4740  break;
4741  }
4742  }
4743 
4744  buf_flush_list_mutex_exit(buf_pool);
4745  mutex_exit(&buf_pool->zip_mutex);
4746  buf_pool_mutex_exit(buf_pool);
4747 
4748  return(fixed_pages_number);
4749 }
4750 
4751 /*********************************************************************/
4754 UNIV_INTERN
4755 ulint
4756 buf_get_latched_pages_number(void)
4757 /*==============================*/
4758 {
4759  ulint i;
4760  ulint total_latched_pages = 0;
4761 
4762  for (i = 0; i < srv_buf_pool_instances; i++) {
4763  buf_pool_t* buf_pool;
4764 
4765  buf_pool = buf_pool_from_array(i);
4766 
4767  total_latched_pages += buf_get_latched_pages_number_instance(
4768  buf_pool);
4769  }
4770 
4771  return(total_latched_pages);
4772 }
4773 
4774 #endif /* UNIV_DEBUG */
4775 
4776 /*********************************************************************/
4779 UNIV_INTERN
4780 ulint
4782 /*============================*/
4783 {
4784  ulint i;
4785  ulint pend_ios = 0;
4786 
4787  for (i = 0; i < srv_buf_pool_instances; i++) {
4788  pend_ios += buf_pool_from_array(i)->n_pend_reads;
4789  }
4790 
4791  return(pend_ios);
4792 }
4793 
4794 /*********************************************************************/
4798 UNIV_INTERN
4799 ulint
4801 /*============================*/
4802 {
4803  ulint ratio;
4804  ulint lru_len = 0;
4805  ulint free_len = 0;
4806  ulint flush_list_len = 0;
4807 
4808  buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4809 
4810  ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4811 
4812  /* 1 + is there to avoid division by zero */
4813 
4814  return(ratio);
4815 }
4816 
4817 /*******************************************************************/
4819 static
4820 void
4821 buf_stats_aggregate_pool_info(
4822 /*==========================*/
4823  buf_pool_info_t* total_info,
4826  const buf_pool_info_t* pool_info)
4828 {
4829  ut_a(total_info && pool_info);
4830 
4831  /* Nothing to copy if total_info is the same as pool_info */
4832  if (total_info == pool_info) {
4833  return;
4834  }
4835 
4836  total_info->pool_size += pool_info->pool_size;
4837  total_info->lru_len += pool_info->lru_len;
4838  total_info->old_lru_len += pool_info->old_lru_len;
4839  total_info->free_list_len += pool_info->free_list_len;
4840  total_info->flush_list_len += pool_info->flush_list_len;
4841  total_info->n_pend_unzip += pool_info->n_pend_unzip;
4842  total_info->n_pend_reads += pool_info->n_pend_reads;
4843  total_info->n_pending_flush_lru += pool_info->n_pending_flush_lru;
4844  total_info->n_pending_flush_list += pool_info->n_pending_flush_list;
4845  total_info->n_pages_made_young += pool_info->n_pages_made_young;
4846  total_info->n_pages_not_made_young += pool_info->n_pages_not_made_young;
4847  total_info->n_pages_read += pool_info->n_pages_read;
4848  total_info->n_pages_created += pool_info->n_pages_created;
4849  total_info->n_pages_written += pool_info->n_pages_written;
4850  total_info->n_page_gets += pool_info->n_page_gets;
4851  total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd;
4852  total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
4853  total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
4854  total_info->page_made_young_rate += pool_info->page_made_young_rate;
4855  total_info->page_not_made_young_rate +=
4856  pool_info->page_not_made_young_rate;
4857  total_info->pages_read_rate += pool_info->pages_read_rate;
4858  total_info->pages_created_rate += pool_info->pages_created_rate;
4859  total_info->pages_written_rate += pool_info->pages_written_rate;
4860  total_info->n_page_get_delta += pool_info->n_page_get_delta;
4861  total_info->page_read_delta += pool_info->page_read_delta;
4862  total_info->young_making_delta += pool_info->young_making_delta;
4863  total_info->not_young_making_delta += pool_info->not_young_making_delta;
4864  total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate;
4865  total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
4866  total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
4867  total_info->unzip_lru_len += pool_info->unzip_lru_len;
4868  total_info->io_sum += pool_info->io_sum;
4869  total_info->io_cur += pool_info->io_cur;
4870  total_info->unzip_sum += pool_info->unzip_sum;
4871  total_info->unzip_cur += pool_info->unzip_cur;
4872 }
4873 /*******************************************************************/
4877 UNIV_INTERN
4878 void
4880 /*====================*/
4881  buf_pool_t* buf_pool,
4882  ulint pool_id,
4883  buf_pool_info_t* all_pool_info)
4885 {
4886  buf_pool_info_t* pool_info;
4887  time_t current_time;
4888  double time_elapsed;
4889 
4890  /* Find appropriate pool_info to store stats for this buffer pool */
4891  pool_info = &all_pool_info[pool_id];
4892 
4893  buf_pool_mutex_enter(buf_pool);
4894  buf_flush_list_mutex_enter(buf_pool);
4895 
4896  pool_info->pool_unique_id = pool_id;
4897 
4898  pool_info->pool_size = buf_pool->curr_size;
4899 
4900  pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
4901 
4902  pool_info->old_lru_len = buf_pool->LRU_old_len;
4903 
4904  pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free);
4905 
4906  pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list);
4907 
4908  pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
4909 
4910  pool_info->n_pend_reads = buf_pool->n_pend_reads;
4911 
4912  pool_info->n_pending_flush_lru =
4913  (buf_pool->n_flush[BUF_FLUSH_LRU]
4914  + buf_pool->init_flush[BUF_FLUSH_LRU]);
4915 
4916  pool_info->n_pending_flush_list =
4917  (buf_pool->n_flush[BUF_FLUSH_LIST]
4918  + buf_pool->init_flush[BUF_FLUSH_LIST]);
4919 
4920  pool_info->n_pending_flush_single_page =
4921  (buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
4922  + buf_pool->init_flush[BUF_FLUSH_SINGLE_PAGE]);
4923 
4924  buf_flush_list_mutex_exit(buf_pool);
4925 
4926  current_time = time(NULL);
4927  time_elapsed = 0.001 + difftime(current_time,
4928  buf_pool->last_printout_time);
4929 
4930  pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young;
4931 
4932  pool_info->n_pages_not_made_young =
4933  buf_pool->stat.n_pages_not_made_young;
4934 
4935  pool_info->n_pages_read = buf_pool->stat.n_pages_read;
4936 
4937  pool_info->n_pages_created = buf_pool->stat.n_pages_created;
4938 
4939  pool_info->n_pages_written = buf_pool->stat.n_pages_written;
4940 
4941  pool_info->n_page_gets = buf_pool->stat.n_page_gets;
4942 
4943  pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd;
4944  pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
4945 
4946  pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
4947 
4948  pool_info->page_made_young_rate =
4949  (buf_pool->stat.n_pages_made_young
4950  - buf_pool->old_stat.n_pages_made_young) / time_elapsed;
4951 
4952  pool_info->page_not_made_young_rate =
4953  (buf_pool->stat.n_pages_not_made_young
4954  - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed;
4955 
4956  pool_info->pages_read_rate =
4957  (buf_pool->stat.n_pages_read
4958  - buf_pool->old_stat.n_pages_read) / time_elapsed;
4959 
4960  pool_info->pages_created_rate =
4961  (buf_pool->stat.n_pages_created
4962  - buf_pool->old_stat.n_pages_created) / time_elapsed;
4963 
4964  pool_info->pages_written_rate =
4965  (buf_pool->stat.n_pages_written
4966  - buf_pool->old_stat.n_pages_written) / time_elapsed;
4967 
4968  pool_info->n_page_get_delta = buf_pool->stat.n_page_gets
4969  - buf_pool->old_stat.n_page_gets;
4970 
4971  if (pool_info->n_page_get_delta) {
4972  pool_info->page_read_delta = buf_pool->stat.n_pages_read
4973  - buf_pool->old_stat.n_pages_read;
4974 
4975  pool_info->young_making_delta =
4976  buf_pool->stat.n_pages_made_young
4977  - buf_pool->old_stat.n_pages_made_young;
4978 
4979  pool_info->not_young_making_delta =
4980  buf_pool->stat.n_pages_not_made_young
4981  - buf_pool->old_stat.n_pages_not_made_young;
4982  }
4983  pool_info->pages_readahead_rnd_rate =
4984  (buf_pool->stat.n_ra_pages_read_rnd
4985  - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed;
4986 
4987 
4988  pool_info->pages_readahead_rate =
4989  (buf_pool->stat.n_ra_pages_read
4990  - buf_pool->old_stat.n_ra_pages_read) / time_elapsed;
4991 
4992  pool_info->pages_evicted_rate =
4993  (buf_pool->stat.n_ra_pages_evicted
4994  - buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed;
4995 
4996  pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
4997 
4998  pool_info->io_sum = buf_LRU_stat_sum.io;
4999 
5000  pool_info->io_cur = buf_LRU_stat_cur.io;
5001 
5002  pool_info->unzip_sum = buf_LRU_stat_sum.unzip;
5003 
5004  pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
5005 
5006  buf_refresh_io_stats(buf_pool);
5007  buf_pool_mutex_exit(buf_pool);
5008 }
5009 
5010 /*********************************************************************/
5012 UNIV_INTERN
5013 void
5014 buf_print_io_instance(
5015 /*==================*/
5016  buf_pool_info_t*pool_info,
5017  FILE* file)
5018 {
5019  ut_ad(pool_info);
5020 
5021  fprintf(file,
5022  "Buffer pool size %lu\n"
5023  "Free buffers %lu\n"
5024  "Database pages %lu\n"
5025  "Old database pages %lu\n"
5026  "Modified db pages %lu\n"
5027  "Pending reads %lu\n"
5028  "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
5029  pool_info->pool_size,
5030  pool_info->free_list_len,
5031  pool_info->lru_len,
5032  pool_info->old_lru_len,
5033  pool_info->flush_list_len,
5034  pool_info->n_pend_reads,
5035  pool_info->n_pending_flush_lru,
5036  pool_info->n_pending_flush_list,
5037  pool_info->n_pending_flush_single_page);
5038 
5039  fprintf(file,
5040  "Pages made young %lu, not young %lu\n"
5041  "%.2f youngs/s, %.2f non-youngs/s\n"
5042  "Pages read %lu, created %lu, written %lu\n"
5043  "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
5044  pool_info->n_pages_made_young,
5045  pool_info->n_pages_not_made_young,
5046  pool_info->page_made_young_rate,
5047  pool_info->page_not_made_young_rate,
5048  pool_info->n_pages_read,
5049  pool_info->n_pages_created,
5050  pool_info->n_pages_written,
5051  pool_info->pages_read_rate,
5052  pool_info->pages_created_rate,
5053  pool_info->pages_written_rate);
5054 
5055  if (pool_info->n_page_get_delta) {
5056  fprintf(file,
5057  "Buffer pool hit rate %lu / 1000,"
5058  " young-making rate %lu / 1000 not %lu / 1000\n",
5059  (ulong) (1000 - (1000 * pool_info->page_read_delta
5060  / pool_info->n_page_get_delta)),
5061  (ulong) (1000 * pool_info->young_making_delta
5062  / pool_info->n_page_get_delta),
5063  (ulong) (1000 * pool_info->not_young_making_delta
5064  / pool_info->n_page_get_delta));
5065  } else {
5066  fputs("No buffer pool page gets since the last printout\n",
5067  file);
5068  }
5069 
5070  /* Statistics about read ahead algorithm */
5071  fprintf(file, "Pages read ahead %.2f/s,"
5072  " evicted without access %.2f/s,"
5073  " Random read ahead %.2f/s\n",
5074 
5075  pool_info->pages_readahead_rate,
5076  pool_info->pages_evicted_rate,
5077  pool_info->pages_readahead_rnd_rate);
5078 
5079  /* Print some values to help us with visualizing what is
5080  happening with LRU eviction. */
5081  fprintf(file,
5082  "LRU len: %lu, unzip_LRU len: %lu\n"
5083  "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
5084  pool_info->lru_len, pool_info->unzip_lru_len,
5085  pool_info->io_sum, pool_info->io_cur,
5086  pool_info->unzip_sum, pool_info->unzip_cur);
5087 }
5088 
5089 /*********************************************************************/
5091 UNIV_INTERN
5092 void
5093 buf_print_io(
5094 /*=========*/
5095  FILE* file)
5096 {
5097  ulint i;
5098  buf_pool_info_t* pool_info;
5099  buf_pool_info_t* pool_info_total;
5100 
5101  /* If srv_buf_pool_instances is greater than 1, allocate
5102  one extra buf_pool_info_t, the last one stores
5103  aggregated/total values from all pools */
5104  if (srv_buf_pool_instances > 1) {
5105  pool_info = (buf_pool_info_t*) mem_zalloc((
5106  srv_buf_pool_instances + 1) * sizeof *pool_info);
5107 
5108  pool_info_total = &pool_info[srv_buf_pool_instances];
5109  } else {
5110  ut_a(srv_buf_pool_instances == 1);
5111 
5112  pool_info_total = pool_info =
5113  static_cast<buf_pool_info_t*>(
5114  mem_zalloc(sizeof *pool_info));
5115  }
5116 
5117  for (i = 0; i < srv_buf_pool_instances; i++) {
5118  buf_pool_t* buf_pool;
5119 
5120  buf_pool = buf_pool_from_array(i);
5121 
5122  /* Fetch individual buffer pool info and calculate
5123  aggregated stats along the way */
5124  buf_stats_get_pool_info(buf_pool, i, pool_info);
5125 
5126  /* If we have more than one buffer pool, store
5127  the aggregated stats */
5128  if (srv_buf_pool_instances > 1) {
5129  buf_stats_aggregate_pool_info(pool_info_total,
5130  &pool_info[i]);
5131  }
5132  }
5133 
5134  /* Print the aggreate buffer pool info */
5135  buf_print_io_instance(pool_info_total, file);
5136 
5137  /* If there are more than one buffer pool, print each individual pool
5138  info */
5139  if (srv_buf_pool_instances > 1) {
5140  fputs("----------------------\n"
5141  "INDIVIDUAL BUFFER POOL INFO\n"
5142  "----------------------\n", file);
5143 
5144  for (i = 0; i < srv_buf_pool_instances; i++) {
5145  fprintf(file, "---BUFFER POOL %lu\n", i);
5146  buf_print_io_instance(&pool_info[i], file);
5147  }
5148  }
5149 
5150  mem_free(pool_info);
5151 }
5152 
5153 /**********************************************************************/
5155 UNIV_INTERN
5156 void
5158 /*=================*/
5159  buf_pool_t* buf_pool)
5160 {
5161  buf_pool->last_printout_time = ut_time();
5162  buf_pool->old_stat = buf_pool->stat;
5163 }
5164 
5165 /**********************************************************************/
5167 UNIV_INTERN
5168 void
5170 /*==========================*/
5171 {
5172  for (ulint i = 0; i < srv_buf_pool_instances; i++) {
5173  buf_pool_t* buf_pool;
5174 
5175  buf_pool = buf_pool_from_array(i);
5176 
5177  buf_refresh_io_stats(buf_pool);
5178  }
5179 }
5180 
5181 /**********************************************************************/
5184 UNIV_INTERN
5185 ibool
5186 buf_all_freed(void)
5187 /*===============*/
5188 {
5189  for (ulint i = 0; i < srv_buf_pool_instances; i++) {
5190  buf_pool_t* buf_pool;
5191 
5192  buf_pool = buf_pool_from_array(i);
5193 
5194  if (!buf_all_freed_instance(buf_pool)) {
5195  return(FALSE);
5196  }
5197  }
5198 
5199  return(TRUE);
5200 }
5201 
5202 /*********************************************************************/
5206 UNIV_INTERN
5207 ulint
5209 /*==============================*/
5210 {
5211  ulint i;
5212  ulint pending_io = 0;
5213 
5215 
5216  for (i = 0; i < srv_buf_pool_instances; i++) {
5217  const buf_pool_t* buf_pool;
5218 
5219  buf_pool = buf_pool_from_array(i);
5220 
5221  pending_io += buf_pool->n_pend_reads
5222  + buf_pool->n_flush[BUF_FLUSH_LRU]
5223  + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
5224  + buf_pool->n_flush[BUF_FLUSH_LIST];
5225 
5226  }
5227 
5229 
5230  return(pending_io);
5231 }
5232 
5233 #if 0
5234 Code currently not used
5235 /*********************************************************************/
5238 UNIV_INTERN
5239 ulint
5241 /*=======================*/
5242 {
5243  ulint len;
5244 
5245  buf_pool_mutex_enter(buf_pool);
5246 
5247  len = UT_LIST_GET_LEN(buf_pool->free);
5248 
5249  buf_pool_mutex_exit(buf_pool);
5250 
5251  return(len);
5252 }
5253 #endif
5254 
5255 #else /* !UNIV_HOTBACKUP */
5256 /********************************************************************/
5258 UNIV_INTERN
5259 void
5260 buf_page_init_for_backup_restore(
5261 /*=============================*/
5262  ulint space,
5263  ulint offset,
5265  ulint zip_size,
5267  buf_block_t* block)
5268 {
5269  block->page.state = BUF_BLOCK_FILE_PAGE;
5270  block->page.space = space;
5271  block->page.offset = offset;
5272 
5273  page_zip_des_init(&block->page.zip);
5274 
5275  /* We assume that block->page.data has been allocated
5276  with zip_size == UNIV_PAGE_SIZE. */
5277  ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
5278  ut_ad(ut_is_2pow(zip_size));
5279  page_zip_set_size(&block->page.zip, zip_size);
5280  if (zip_size) {
5281  block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5282  }
5283 }
5284 #endif /* !UNIV_HOTBACKUP */