MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
buf0lru.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "buf0lru.h"
27 
28 #ifndef UNIV_HOTBACKUP
29 #ifdef UNIV_NONINL
30 #include "buf0lru.ic"
31 #endif
32 
33 #include "ut0byte.h"
34 #include "ut0lst.h"
35 #include "ut0rnd.h"
36 #include "sync0sync.h"
37 #include "sync0rw.h"
38 #include "hash0hash.h"
39 #include "os0sync.h"
40 #include "fil0fil.h"
41 #include "btr0btr.h"
42 #include "buf0buddy.h"
43 #include "buf0buf.h"
44 #include "buf0dblwr.h"
45 #include "buf0flu.h"
46 #include "buf0rea.h"
47 #include "btr0sea.h"
48 #include "ibuf0ibuf.h"
49 #include "os0file.h"
50 #include "page0zip.h"
51 #include "log0recv.h"
52 #include "srv0srv.h"
53 #include "srv0mon.h"
54 #include "lock0lock.h"
55 
56 #include "ha_prototypes.h"
57 
65 #define BUF_LRU_OLD_TOLERANCE 20
66 
70 #define BUF_LRU_NON_OLD_MIN_LEN 5
71 #if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
72 # error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
73 #endif
74 
82 #define BUF_LRU_DROP_SEARCH_SIZE 1024
83 
86 static ibool buf_lru_switched_on_innodb_mon = FALSE;
87 
88 /******************************************************************/
97 /* @{ */
98 
102 #define BUF_LRU_STAT_N_INTERVAL 50
103 
106 #define BUF_LRU_IO_TO_UNZIP_FACTOR 50
107 
110 static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
111 
113 static ulint buf_LRU_stat_arr_ind;
114 
118 
122 
123 /* @} */
124 
128 UNIV_INTERN uint buf_LRU_old_threshold_ms;
129 /* @} */
130 
131 /******************************************************************/
145 static __attribute__((nonnull, warn_unused_result))
146 bool
147 buf_LRU_block_remove_hashed(
148 /*========================*/
149  buf_page_t* bpage,
152  bool zip);
154 /******************************************************************/
156 static
157 void
158 buf_LRU_block_free_hashed_page(
159 /*===========================*/
160  buf_block_t* block);
163 /******************************************************************/
166 static inline
167 void
168 incr_LRU_size_in_bytes(
169 /*===================*/
170  buf_page_t* bpage,
171  buf_pool_t* buf_pool)
172 {
173  ut_ad(buf_pool_mutex_own(buf_pool));
174  ulint zip_size = page_zip_get_size(&bpage->zip);
175  buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
176  ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
177 }
178 
179 /******************************************************************/
183 UNIV_INTERN
184 ibool
186 /*=========================*/
187  buf_pool_t* buf_pool)
188 {
189  ulint io_avg;
190  ulint unzip_avg;
191 
192  ut_ad(buf_pool_mutex_own(buf_pool));
193 
194  /* If the unzip_LRU list is empty, we can only use the LRU. */
195  if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
196  return(FALSE);
197  }
198 
199  /* If unzip_LRU is at most 10% of the size of the LRU list,
200  then use the LRU. This slack allows us to keep hot
201  decompressed pages in the buffer pool. */
202  if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
203  <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
204  return(FALSE);
205  }
206 
207  /* If eviction hasn't started yet, we assume by default
208  that a workload is disk bound. */
209  if (buf_pool->freed_page_clock == 0) {
210  return(TRUE);
211  }
212 
213  /* Calculate the average over past intervals, and add the values
214  of the current interval. */
219 
220  /* Decide based on our formula. If the load is I/O bound
221  (unzip_avg is smaller than the weighted io_avg), evict an
222  uncompressed frame from unzip_LRU. Otherwise we assume that
223  the load is CPU bound and evict from the regular LRU. */
224  return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
225 }
226 
227 /******************************************************************/
230 static
231 void
232 buf_LRU_drop_page_hash_batch(
233 /*=========================*/
234  ulint space_id,
235  ulint zip_size,
237  const ulint* arr,
238  ulint count)
239 {
240  ulint i;
241 
242  ut_ad(arr != NULL);
244 
245  for (i = 0; i < count; ++i) {
246  btr_search_drop_page_hash_when_freed(space_id, zip_size,
247  arr[i]);
248  }
249 }
250 
251 /******************************************************************/
256 static
257 void
258 buf_LRU_drop_page_hash_for_tablespace(
259 /*==================================*/
260  buf_pool_t* buf_pool,
261  ulint id)
262 {
263  buf_page_t* bpage;
264  ulint* page_arr;
265  ulint num_entries;
266  ulint zip_size;
267 
268  zip_size = fil_space_get_zip_size(id);
269 
270  if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
271  /* Somehow, the tablespace does not exist. Nothing to drop. */
272  ut_ad(0);
273  return;
274  }
275 
276  page_arr = static_cast<ulint*>(ut_malloc(
277  sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
278 
279  buf_pool_mutex_enter(buf_pool);
280  num_entries = 0;
281 
282 scan_again:
283  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
284 
285  while (bpage != NULL) {
286  buf_page_t* prev_bpage;
287  ibool is_fixed;
288 
289  prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
290 
291  ut_a(buf_page_in_file(bpage));
292 
294  || bpage->space != id
295  || bpage->io_fix != BUF_IO_NONE) {
296  /* Compressed pages are never hashed.
297  Skip blocks of other tablespaces.
298  Skip I/O-fixed blocks (to be dealt with later). */
299 next_page:
300  bpage = prev_bpage;
301  continue;
302  }
303 
304  mutex_enter(&((buf_block_t*) bpage)->mutex);
305  is_fixed = bpage->buf_fix_count > 0
306  || !((buf_block_t*) bpage)->index;
307  mutex_exit(&((buf_block_t*) bpage)->mutex);
308 
309  if (is_fixed) {
310  goto next_page;
311  }
312 
313  /* Store the page number so that we can drop the hash
314  index in a batch later. */
315  page_arr[num_entries] = bpage->offset;
316  ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
317  ++num_entries;
318 
319  if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) {
320  goto next_page;
321  }
322 
323  /* Array full. We release the buf_pool->mutex to obey
324  the latching order. */
325  buf_pool_mutex_exit(buf_pool);
326 
327  buf_LRU_drop_page_hash_batch(
328  id, zip_size, page_arr, num_entries);
329 
330  num_entries = 0;
331 
332  buf_pool_mutex_enter(buf_pool);
333 
334  /* Note that we released the buf_pool mutex above
335  after reading the prev_bpage during processing of a
336  page_hash_batch (i.e.: when the array was full).
337  Because prev_bpage could belong to a compressed-only
338  block, it may have been relocated, and thus the
339  pointer cannot be trusted. Because bpage is of type
340  buf_block_t, it is safe to dereference.
341 
342  bpage can change in the LRU list. This is OK because
343  this function is a 'best effort' to drop as many
344  search hash entries as possible and it does not
345  guarantee that ALL such entries will be dropped. */
346 
347  /* If, however, bpage has been removed from LRU list
348  to the free list then we should restart the scan.
349  bpage->state is protected by buf_pool mutex. */
350  if (bpage
352  goto scan_again;
353  }
354  }
355 
356  buf_pool_mutex_exit(buf_pool);
357 
358  /* Drop any remaining batch of search hashed pages. */
359  buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
360  ut_free(page_arr);
361 }
362 
363 /******************************************************************/
369 static __attribute__((nonnull))
370 void
371 buf_flush_yield(
372 /*============*/
373  buf_pool_t* buf_pool,
374  buf_page_t* bpage)
375 {
377 
378  ut_ad(buf_pool_mutex_own(buf_pool));
379  ut_ad(buf_page_in_file(bpage));
380 
381  block_mutex = buf_page_get_mutex(bpage);
382 
383  mutex_enter(block_mutex);
384  /* "Fix" the block so that the position cannot be
385  changed after we release the buffer pool and
386  block mutexes. */
387  buf_page_set_sticky(bpage);
388 
389  /* Now it is safe to release the buf_pool->mutex. */
390  buf_pool_mutex_exit(buf_pool);
391 
392  mutex_exit(block_mutex);
393  /* Try and force a context switch. */
394  os_thread_yield();
395 
396  buf_pool_mutex_enter(buf_pool);
397 
398  mutex_enter(block_mutex);
399  /* "Unfix" the block now that we have both the
400  buffer pool and block mutex again. */
401  buf_page_unset_sticky(bpage);
402  mutex_exit(block_mutex);
403 }
404 
405 /******************************************************************/
410 static __attribute__((nonnull(1), warn_unused_result))
411 bool
412 buf_flush_try_yield(
413 /*================*/
414  buf_pool_t* buf_pool,
415  buf_page_t* bpage,
416  ulint processed)
417 {
418  /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
419  loop we release buf_pool->mutex to let other threads
420  do their job but only if the block is not IO fixed. This
421  ensures that the block stays in its position in the
422  flush_list. */
423 
424  if (bpage != NULL
425  && processed >= BUF_LRU_DROP_SEARCH_SIZE
426  && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
427 
428  buf_flush_list_mutex_exit(buf_pool);
429 
430  /* Release the buffer pool and block mutex
431  to give the other threads a go. */
432 
433  buf_flush_yield(buf_pool, bpage);
434 
435  buf_flush_list_mutex_enter(buf_pool);
436 
437  /* Should not have been removed from the flush
438  list during the yield. However, this check is
439  not sufficient to catch a remove -> add. */
440 
441  ut_ad(bpage->in_flush_list);
442 
443  return(true);
444  }
445 
446  return(false);
447 }
448 
449 /******************************************************************/
453 static __attribute__((nonnull, warn_unused_result))
454 bool
455 buf_flush_or_remove_page(
456 /*=====================*/
457  buf_pool_t* buf_pool,
458  buf_page_t* bpage,
459  bool flush)
462 {
463  ut_ad(buf_pool_mutex_own(buf_pool));
464  ut_ad(buf_flush_list_mutex_own(buf_pool));
465 
466  /* bpage->space and bpage->io_fix are protected by
467  buf_pool->mutex and block_mutex. It is safe to check
468  them while holding buf_pool->mutex only. */
469 
470  if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
471 
472  /* We cannot remove this page during this scan
473  yet; maybe the system is currently reading it
474  in, or flushing the modifications to the file */
475  return(false);
476 
477  }
478 
479  ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
480  bool processed = false;
481 
482  /* We have to release the flush_list_mutex to obey the
483  latching order. We are however guaranteed that the page
484  will stay in the flush_list and won't be relocated because
485  buf_flush_remove() and buf_flush_relocate_on_flush_list()
486  need buf_pool->mutex as well. */
487 
488  buf_flush_list_mutex_exit(buf_pool);
489 
490  mutex_enter(block_mutex);
491 
492  ut_ad(bpage->oldest_modification != 0);
493 
494  if (!flush) {
495 
496  buf_flush_remove(bpage);
497 
498  mutex_exit(block_mutex);
499 
500  processed = true;
501 
502  } else if (buf_flush_ready_for_flush(bpage,
504 
505  /* The following call will release the buffer pool
506  and block mutex. */
507  buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
508  ut_ad(!mutex_own(block_mutex));
509 
510  /* Wake possible simulated aio thread to actually
511  post the writes to the operating system */
513 
514  buf_pool_mutex_enter(buf_pool);
515 
516  processed = true;
517  } else {
518  /* Not ready for flush. It can't be IO fixed because we
519  checked for that at the start of the function. It must
520  be buffer fixed. */
521  ut_ad(bpage->buf_fix_count > 0);
522  mutex_exit(block_mutex);
523  }
524 
525  buf_flush_list_mutex_enter(buf_pool);
526 
527  ut_ad(!mutex_own(block_mutex));
528  ut_ad(buf_pool_mutex_own(buf_pool));
529 
530  return(processed);
531 }
532 
533 /******************************************************************/
541 static __attribute__((nonnull(1), warn_unused_result))
542 dberr_t
543 buf_flush_or_remove_pages(
544 /*======================*/
545  buf_pool_t* buf_pool,
546  ulint id,
548  bool flush,
551  const trx_t* trx)
553 {
554  buf_page_t* prev;
555  buf_page_t* bpage;
556  ulint processed = 0;
557 
558  buf_flush_list_mutex_enter(buf_pool);
559 
560 rescan:
561  bool all_freed = true;
562 
563  for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
564  bpage != NULL;
565  bpage = prev) {
566 
567  ut_a(buf_page_in_file(bpage));
568 
569  /* Save the previous link because once we free the
570  page we can't rely on the links. */
571 
572  prev = UT_LIST_GET_PREV(list, bpage);
573 
574  if (buf_page_get_space(bpage) != id) {
575 
576  /* Skip this block, as it does not belong to
577  the target space. */
578 
579  } else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) {
580 
581  /* Remove was unsuccessful, we have to try again
582  by scanning the entire list from the end.
583  This also means that we never released the
584  buf_pool mutex. Therefore we can trust the prev
585  pointer.
586  buf_flush_or_remove_page() released the
587  flush list mutex but not the buf_pool mutex.
588  Therefore it is possible that a new page was
589  added to the flush list. For example, in case
590  where we are at the head of the flush list and
591  prev == NULL. That is OK because we have the
592  tablespace quiesced and no new pages for this
593  space-id should enter flush_list. This is
594  because the only callers of this function are
595  DROP TABLE and FLUSH TABLE FOR EXPORT.
596  We know that we'll have to do at least one more
597  scan but we don't break out of loop here and
598  try to do as much work as we can in this
599  iteration. */
600 
601  all_freed = false;
602  } else if (flush) {
603 
604  /* The processing was successful. And during the
605  processing we have released the buf_pool mutex
606  when calling buf_page_flush(). We cannot trust
607  prev pointer. */
608  goto rescan;
609  }
610 
611  ++processed;
612 
613  /* Yield if we have hogged the CPU and mutexes for too long. */
614  if (buf_flush_try_yield(buf_pool, prev, processed)) {
615 
616  /* Reset the batch size counter if we had to yield. */
617 
618  processed = 0;
619  }
620 
621 #ifdef DBUG_OFF
622  if (flush) {
623  DBUG_EXECUTE_IF("ib_export_flush_crash",
624  static ulint n_pages;
625  if (++n_pages == 4) {DBUG_SUICIDE();});
626  }
627 #endif /* DBUG_OFF */
628 
629  /* The check for trx is interrupted is expensive, we want
630  to check every N iterations. */
631  if (!processed && trx && trx_is_interrupted(trx)) {
632  buf_flush_list_mutex_exit(buf_pool);
633  return(DB_INTERRUPTED);
634  }
635  }
636 
637  buf_flush_list_mutex_exit(buf_pool);
638 
639  return(all_freed ? DB_SUCCESS : DB_FAIL);
640 }
641 
642 /******************************************************************/
647 static __attribute__((nonnull(1)))
648 void
649 buf_flush_dirty_pages(
650 /*==================*/
651  buf_pool_t* buf_pool,
652  ulint id,
653  bool flush,
655  const trx_t* trx)
657 {
658  dberr_t err;
659 
660  do {
661  buf_pool_mutex_enter(buf_pool);
662 
663  err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
664 
665  buf_pool_mutex_exit(buf_pool);
666 
667  ut_ad(buf_flush_validate(buf_pool));
668 
669  if (err == DB_FAIL) {
670  os_thread_sleep(2000);
671  }
672 
673  /* DB_FAIL is a soft error, it means that the task wasn't
674  completed, needs to be retried. */
675 
676  ut_ad(buf_flush_validate(buf_pool));
677 
678  } while (err == DB_FAIL);
679 
680  ut_ad(err == DB_INTERRUPTED
681  || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
682 }
683 
684 /******************************************************************/
687 static __attribute__((nonnull))
688 void
689 buf_LRU_remove_all_pages(
690 /*=====================*/
691  buf_pool_t* buf_pool,
692  ulint id)
693 {
694  buf_page_t* bpage;
695  ibool all_freed;
696 
697 scan_again:
698  buf_pool_mutex_enter(buf_pool);
699 
700  all_freed = TRUE;
701 
702  for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
703  bpage != NULL;
704  /* No op */) {
705 
706  rw_lock_t* hash_lock;
707  buf_page_t* prev_bpage;
708  ib_mutex_t* block_mutex = NULL;
709 
710  ut_a(buf_page_in_file(bpage));
711  ut_ad(bpage->in_LRU_list);
712 
713  prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
714 
715  /* bpage->space and bpage->io_fix are protected by
716  buf_pool->mutex and the block_mutex. It is safe to check
717  them while holding buf_pool->mutex only. */
718 
719  if (buf_page_get_space(bpage) != id) {
720  /* Skip this block, as it does not belong to
721  the space that is being invalidated. */
722  goto next_page;
723  } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
724  /* We cannot remove this page during this scan
725  yet; maybe the system is currently reading it
726  in, or flushing the modifications to the file */
727 
728  all_freed = FALSE;
729  goto next_page;
730  } else {
731  ulint fold = buf_page_address_fold(
732  bpage->space, bpage->offset);
733 
734  hash_lock = buf_page_hash_lock_get(buf_pool, fold);
735 
736  rw_lock_x_lock(hash_lock);
737 
738  block_mutex = buf_page_get_mutex(bpage);
739  mutex_enter(block_mutex);
740 
741  if (bpage->buf_fix_count > 0) {
742 
743  mutex_exit(block_mutex);
744 
745  rw_lock_x_unlock(hash_lock);
746 
747  /* We cannot remove this page during
748  this scan yet; maybe the system is
749  currently reading it in, or flushing
750  the modifications to the file */
751 
752  all_freed = FALSE;
753 
754  goto next_page;
755  }
756  }
757 
758  ut_ad(mutex_own(block_mutex));
759 
760 #ifdef UNIV_DEBUG
761  if (buf_debug_prints) {
762  fprintf(stderr,
763  "Dropping space %lu page %lu\n",
764  (ulong) buf_page_get_space(bpage),
765  (ulong) buf_page_get_page_no(bpage));
766  }
767 #endif
768  if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
769  /* Do nothing, because the adaptive hash index
770  covers uncompressed pages only. */
771  } else if (((buf_block_t*) bpage)->index) {
772  ulint page_no;
773  ulint zip_size;
774 
775  buf_pool_mutex_exit(buf_pool);
776 
777  zip_size = buf_page_get_zip_size(bpage);
778  page_no = buf_page_get_page_no(bpage);
779 
780  rw_lock_x_unlock(hash_lock);
781 
782  mutex_exit(block_mutex);
783 
784  /* Note that the following call will acquire
785  and release block->lock X-latch. */
786 
788  id, zip_size, page_no);
789 
790  goto scan_again;
791  }
792 
793  if (bpage->oldest_modification != 0) {
794 
795  buf_flush_remove(bpage);
796  }
797 
798  ut_ad(!bpage->in_flush_list);
799 
800  /* Remove from the LRU list. */
801 
802  if (buf_LRU_block_remove_hashed(bpage, true)) {
803  buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
804  } else {
805  ut_ad(block_mutex == &buf_pool->zip_mutex);
806  }
807 
808  ut_ad(!mutex_own(block_mutex));
809 
810 #ifdef UNIV_SYNC_DEBUG
811  /* buf_LRU_block_remove_hashed() releases the hash_lock */
812  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
813  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
814 #endif /* UNIV_SYNC_DEBUG */
815 
816 next_page:
817  bpage = prev_bpage;
818  }
819 
820  buf_pool_mutex_exit(buf_pool);
821 
822  if (!all_freed) {
823  os_thread_sleep(20000);
824 
825  goto scan_again;
826  }
827 }
828 
829 /******************************************************************/
835 static __attribute__((nonnull(1)))
836 void
837 buf_LRU_remove_pages(
838 /*=================*/
839  buf_pool_t* buf_pool,
840  ulint id,
841  buf_remove_t buf_remove,
842  const trx_t* trx)
844 {
845  switch (buf_remove) {
847  buf_LRU_remove_all_pages(buf_pool, id);
848  break;
849 
851  ut_a(trx == 0);
852  buf_flush_dirty_pages(buf_pool, id, false, NULL);
853  break;
854 
856  ut_a(trx != 0);
857  buf_flush_dirty_pages(buf_pool, id, true, trx);
858  /* Ensure that all asynchronous IO is completed. */
860  fil_flush(id);
861  break;
862  }
863 }
864 
865 /******************************************************************/
870 UNIV_INTERN
871 void
873 /*==========================*/
874  ulint id,
875  buf_remove_t buf_remove,
876  const trx_t* trx)
878 {
879  ulint i;
880 
881  /* Before we attempt to drop pages one by one we first
882  attempt to drop page hash index entries in batches to make
883  it more efficient. The batching attempt is a best effort
884  attempt and does not guarantee that all pages hash entries
885  will be dropped. We get rid of remaining page hash entries
886  one by one below. */
887  for (i = 0; i < srv_buf_pool_instances; i++) {
888  buf_pool_t* buf_pool;
889 
890  buf_pool = buf_pool_from_array(i);
891 
892  switch (buf_remove) {
894  buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
895  break;
896 
898  /* It is a DROP TABLE for a single table
899  tablespace. No AHI entries exist because
900  we already dealt with them when freeing up
901  extents. */
903  /* We allow read-only queries against the
904  table, there is no need to drop the AHI entries. */
905  break;
906  }
907 
908  buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
909  }
910 }
911 
912 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
913 /********************************************************************/
915 UNIV_INTERN
916 void
917 buf_LRU_insert_zip_clean(
918 /*=====================*/
919  buf_page_t* bpage)
920 {
921  buf_page_t* b;
922  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
923 
924  ut_ad(buf_pool_mutex_own(buf_pool));
926 
927  /* Find the first successor of bpage in the LRU list
928  that is in the zip_clean list. */
929  b = bpage;
930  do {
931  b = UT_LIST_GET_NEXT(LRU, b);
932  } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
933 
934  /* Insert bpage before b, i.e., after the predecessor of b. */
935  if (b) {
936  b = UT_LIST_GET_PREV(list, b);
937  }
938 
939  if (b) {
940  UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
941  } else {
942  UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
943  }
944 }
945 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
946 
947 /******************************************************************/
951 UNIV_INLINE
952 ibool
953 buf_LRU_free_from_unzip_LRU_list(
954 /*=============================*/
955  buf_pool_t* buf_pool,
956  ibool scan_all)
959 {
961  ibool freed;
962  ulint scanned;
963 
964  ut_ad(buf_pool_mutex_own(buf_pool));
965 
966  if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
967  return(FALSE);
968  }
969 
970  for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU),
971  scanned = 1, freed = FALSE;
972  block != NULL && !freed
973  && (scan_all || scanned < srv_LRU_scan_depth);
974  ++scanned) {
975 
976  buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU,
977  block);
978 
980  ut_ad(block->in_unzip_LRU_list);
981  ut_ad(block->page.in_LRU_list);
982 
983  freed = buf_LRU_free_page(&block->page, false);
984 
985  block = prev_block;
986  }
987 
989  MONITOR_LRU_UNZIP_SEARCH_SCANNED,
990  MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
991  MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
992  scanned);
993  return(freed);
994 }
995 
996 /******************************************************************/
999 UNIV_INLINE
1000 ibool
1001 buf_LRU_free_from_common_LRU_list(
1002 /*==============================*/
1003  buf_pool_t* buf_pool,
1004  ibool scan_all)
1007 {
1008  buf_page_t* bpage;
1009  ibool freed;
1010  ulint scanned;
1011 
1012  ut_ad(buf_pool_mutex_own(buf_pool));
1013 
1014  for (bpage = UT_LIST_GET_LAST(buf_pool->LRU),
1015  scanned = 1, freed = FALSE;
1016  bpage != NULL && !freed
1017  && (scan_all || scanned < srv_LRU_scan_depth);
1018  ++scanned) {
1019 
1020  unsigned accessed;
1021  buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU,
1022  bpage);
1023 
1024  ut_ad(buf_page_in_file(bpage));
1025  ut_ad(bpage->in_LRU_list);
1026 
1027  accessed = buf_page_is_accessed(bpage);
1028  freed = buf_LRU_free_page(bpage, true);
1029  if (freed && !accessed) {
1030  /* Keep track of pages that are evicted without
1031  ever being accessed. This gives us a measure of
1032  the effectiveness of readahead */
1033  ++buf_pool->stat.n_ra_pages_evicted;
1034  }
1035 
1036  bpage = prev_bpage;
1037  }
1038 
1040  MONITOR_LRU_SEARCH_SCANNED,
1041  MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
1042  MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
1043  scanned);
1044 
1045  return(freed);
1046 }
1047 
1048 /******************************************************************/
1051 UNIV_INTERN
1052 ibool
1054 /*========================*/
1055  buf_pool_t* buf_pool,
1056  ibool scan_all)
1059 {
1060  ut_ad(buf_pool_mutex_own(buf_pool));
1061 
1062  return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
1063  || buf_LRU_free_from_common_LRU_list(
1064  buf_pool, scan_all));
1065 }
1066 
1067 /******************************************************************/
1072 UNIV_INTERN
1073 ibool
1075 /*==============================*/
1076 {
1077  ulint i;
1078  ibool ret = FALSE;
1079 
1080  for (i = 0; i < srv_buf_pool_instances && !ret; i++) {
1081  buf_pool_t* buf_pool;
1082 
1083  buf_pool = buf_pool_from_array(i);
1084 
1085  buf_pool_mutex_enter(buf_pool);
1086 
1087  if (!recv_recovery_on
1088  && UT_LIST_GET_LEN(buf_pool->free)
1089  + UT_LIST_GET_LEN(buf_pool->LRU)
1090  < buf_pool->curr_size / 4) {
1091 
1092  ret = TRUE;
1093  }
1094 
1095  buf_pool_mutex_exit(buf_pool);
1096  }
1097 
1098  return(ret);
1099 }
1100 
1101 /******************************************************************/
1105 UNIV_INTERN
1106 buf_block_t*
1108 /*==================*/
1109  buf_pool_t* buf_pool)
1110 {
1111  buf_block_t* block;
1112 
1113  ut_ad(buf_pool_mutex_own(buf_pool));
1114 
1115  block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
1116 
1117  if (block) {
1118 
1119  ut_ad(block->page.in_free_list);
1120  ut_d(block->page.in_free_list = FALSE);
1121  ut_ad(!block->page.in_flush_list);
1122  ut_ad(!block->page.in_LRU_list);
1123  ut_a(!buf_page_in_file(&block->page));
1124  UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1125 
1126  mutex_enter(&block->mutex);
1127 
1129  UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
1130 
1131  ut_ad(buf_pool_from_block(block) == buf_pool);
1132 
1133  mutex_exit(&block->mutex);
1134  }
1135 
1136  return(block);
1137 }
1138 
1139 /******************************************************************/
1144 static
1145 void
1146 buf_LRU_check_size_of_non_data_objects(
1147 /*===================================*/
1148  const buf_pool_t* buf_pool)
1149 {
1150  ut_ad(buf_pool_mutex_own(buf_pool));
1151 
1152  if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
1153  + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
1154  ut_print_timestamp(stderr);
1155 
1156  fprintf(stderr,
1157  " InnoDB: ERROR: over 95 percent of the buffer pool"
1158  " is occupied by\n"
1159  "InnoDB: lock heaps or the adaptive hash index!"
1160  " Check that your\n"
1161  "InnoDB: transactions do not set too many row locks.\n"
1162  "InnoDB: Your buffer pool size is %lu MB."
1163  " Maybe you should make\n"
1164  "InnoDB: the buffer pool bigger?\n"
1165  "InnoDB: We intentionally generate a seg fault"
1166  " to print a stack trace\n"
1167  "InnoDB: on Linux!\n",
1168  (ulong) (buf_pool->curr_size
1169  / (1024 * 1024 / UNIV_PAGE_SIZE)));
1170 
1171  ut_error;
1172 
1173  } else if (!recv_recovery_on
1174  && (UT_LIST_GET_LEN(buf_pool->free)
1175  + UT_LIST_GET_LEN(buf_pool->LRU))
1176  < buf_pool->curr_size / 3) {
1177 
1178  if (!buf_lru_switched_on_innodb_mon) {
1179 
1180  /* Over 67 % of the buffer pool is occupied by lock
1181  heaps or the adaptive hash index. This may be a memory
1182  leak! */
1183 
1184  ut_print_timestamp(stderr);
1185  fprintf(stderr,
1186  " InnoDB: WARNING: over 67 percent of"
1187  " the buffer pool is occupied by\n"
1188  "InnoDB: lock heaps or the adaptive"
1189  " hash index! Check that your\n"
1190  "InnoDB: transactions do not set too many"
1191  " row locks.\n"
1192  "InnoDB: Your buffer pool size is %lu MB."
1193  " Maybe you should make\n"
1194  "InnoDB: the buffer pool bigger?\n"
1195  "InnoDB: Starting the InnoDB Monitor to print"
1196  " diagnostics, including\n"
1197  "InnoDB: lock heap and hash index sizes.\n",
1198  (ulong) (buf_pool->curr_size
1199  / (1024 * 1024 / UNIV_PAGE_SIZE)));
1200 
1201  buf_lru_switched_on_innodb_mon = TRUE;
1202  srv_print_innodb_monitor = TRUE;
1204  }
1205  } else if (buf_lru_switched_on_innodb_mon) {
1206 
1207  /* Switch off the InnoDB Monitor; this is a simple way
1208  to stop the monitor if the situation becomes less urgent,
1209  but may also surprise users if the user also switched on the
1210  monitor! */
1211 
1212  buf_lru_switched_on_innodb_mon = FALSE;
1213  srv_print_innodb_monitor = FALSE;
1214  }
1215 }
1216 
1217 /******************************************************************/
1243 UNIV_INTERN
1244 buf_block_t*
1246 /*===================*/
1247  buf_pool_t* buf_pool)
1248 {
1249  buf_block_t* block = NULL;
1250  ibool freed = FALSE;
1251  ulint n_iterations = 0;
1252  ulint flush_failures = 0;
1253  ibool mon_value_was = FALSE;
1254  ibool started_monitor = FALSE;
1255 
1256  MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
1257 loop:
1258  buf_pool_mutex_enter(buf_pool);
1259 
1260  buf_LRU_check_size_of_non_data_objects(buf_pool);
1261 
1262  /* If there is a block in the free list, take it */
1263  block = buf_LRU_get_free_only(buf_pool);
1264 
1265  if (block) {
1266 
1267  buf_pool_mutex_exit(buf_pool);
1268  ut_ad(buf_pool_from_block(block) == buf_pool);
1269  memset(&block->page.zip, 0, sizeof block->page.zip);
1270 
1271  if (started_monitor) {
1272  srv_print_innodb_monitor = mon_value_was;
1273  }
1274 
1275  return(block);
1276  }
1277 
1278  if (buf_pool->init_flush[BUF_FLUSH_LRU]
1279  && srv_use_doublewrite_buf
1280  && buf_dblwr != NULL) {
1281 
1282  /* If there is an LRU flush happening in the background
1283  then we wait for it to end instead of trying a single
1284  page flush. If, however, we are not using doublewrite
1285  buffer then it is better to do our own single page
1286  flush instead of waiting for LRU flush to end. */
1287  buf_pool_mutex_exit(buf_pool);
1289  goto loop;
1290  }
1291 
1292  freed = FALSE;
1293  if (buf_pool->try_LRU_scan || n_iterations > 0) {
1294  /* If no block was in the free list, search from the
1295  end of the LRU list and try to free a block there.
1296  If we are doing for the first time we'll scan only
1297  tail of the LRU list otherwise we scan the whole LRU
1298  list. */
1299  freed = buf_LRU_scan_and_free_block(buf_pool,
1300  n_iterations > 0);
1301 
1302  if (!freed && n_iterations == 0) {
1303  /* Tell other threads that there is no point
1304  in scanning the LRU list. This flag is set to
1305  TRUE again when we flush a batch from this
1306  buffer pool. */
1307  buf_pool->try_LRU_scan = FALSE;
1308  }
1309  }
1310 
1311  buf_pool_mutex_exit(buf_pool);
1312 
1313  if (freed) {
1314  goto loop;
1315 
1316  }
1317 
1318  if (n_iterations > 20) {
1319  ut_print_timestamp(stderr);
1320  fprintf(stderr,
1321  " InnoDB: Warning: difficult to find free blocks in\n"
1322  "InnoDB: the buffer pool (%lu search iterations)!\n"
1323  "InnoDB: %lu failed attempts to flush a page!"
1324  " Consider\n"
1325  "InnoDB: increasing the buffer pool size.\n"
1326  "InnoDB: It is also possible that"
1327  " in your Unix version\n"
1328  "InnoDB: fsync is very slow, or"
1329  " completely frozen inside\n"
1330  "InnoDB: the OS kernel. Then upgrading to"
1331  " a newer version\n"
1332  "InnoDB: of your operating system may help."
1333  " Look at the\n"
1334  "InnoDB: number of fsyncs in diagnostic info below.\n"
1335  "InnoDB: Pending flushes (fsync) log: %lu;"
1336  " buffer pool: %lu\n"
1337  "InnoDB: %lu OS file reads, %lu OS file writes,"
1338  " %lu OS fsyncs\n"
1339  "InnoDB: Starting InnoDB Monitor to print further\n"
1340  "InnoDB: diagnostics to the standard output.\n",
1341  (ulong) n_iterations,
1342  (ulong) flush_failures,
1343  (ulong) fil_n_pending_log_flushes,
1345  (ulong) os_n_file_reads, (ulong) os_n_file_writes,
1346  (ulong) os_n_fsyncs);
1347 
1348  mon_value_was = srv_print_innodb_monitor;
1349  started_monitor = TRUE;
1350  srv_print_innodb_monitor = TRUE;
1352  }
1353 
1354  /* If we have scanned the whole LRU and still are unable to
1355  find a free block then we should sleep here to let the
1356  page_cleaner do an LRU batch for us.
1357  TODO: It'd be better if we can signal the page_cleaner. Perhaps
1358  we should use timed wait for page_cleaner. */
1359  if (n_iterations > 1) {
1360 
1361  os_thread_sleep(100000);
1362  }
1363 
1364  /* No free block was found: try to flush the LRU list.
1365  This call will flush one page from the LRU and put it on the
1366  free list. That means that the free block is up for grabs for
1367  all user threads.
1368  TODO: A more elegant way would have been to return the freed
1369  up block to the caller here but the code that deals with
1370  removing the block from page_hash and LRU_list is fairly
1371  involved (particularly in case of compressed pages). We
1372  can do that in a separate patch sometime in future. */
1373  if (!buf_flush_single_page_from_LRU(buf_pool)) {
1374  MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
1375  ++flush_failures;
1376  }
1377 
1378  srv_stats.buf_pool_wait_free.add(n_iterations, 1);
1379 
1380  n_iterations++;
1381 
1382  goto loop;
1383 }
1384 
1385 /*******************************************************************/
1388 UNIV_INLINE
1389 void
1390 buf_LRU_old_adjust_len(
1391 /*===================*/
1392  buf_pool_t* buf_pool)
1393 {
1394  ulint old_len;
1395  ulint new_len;
1396 
1397  ut_a(buf_pool->LRU_old);
1398  ut_ad(buf_pool_mutex_own(buf_pool));
1401 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
1402 # error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
1403 #endif
1404 #ifdef UNIV_LRU_DEBUG
1405  /* buf_pool->LRU_old must be the first item in the LRU list
1406  whose "old" flag is set. */
1407  ut_a(buf_pool->LRU_old->old);
1408  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1409  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1410  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1411  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1412 #endif /* UNIV_LRU_DEBUG */
1413 
1414  old_len = buf_pool->LRU_old_len;
1415  new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
1416  * buf_pool->LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
1417  UT_LIST_GET_LEN(buf_pool->LRU)
1420 
1421  for (;;) {
1422  buf_page_t* LRU_old = buf_pool->LRU_old;
1423 
1424  ut_a(LRU_old);
1425  ut_ad(LRU_old->in_LRU_list);
1426 #ifdef UNIV_LRU_DEBUG
1427  ut_a(LRU_old->old);
1428 #endif /* UNIV_LRU_DEBUG */
1429 
1430  /* Update the LRU_old pointer if necessary */
1431 
1432  if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
1433 
1434  buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
1435  LRU, LRU_old);
1436 #ifdef UNIV_LRU_DEBUG
1437  ut_a(!LRU_old->old);
1438 #endif /* UNIV_LRU_DEBUG */
1439  old_len = ++buf_pool->LRU_old_len;
1440  buf_page_set_old(LRU_old, TRUE);
1441 
1442  } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
1443 
1444  buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
1445  old_len = --buf_pool->LRU_old_len;
1446  buf_page_set_old(LRU_old, FALSE);
1447  } else {
1448  return;
1449  }
1450  }
1451 }
1452 
1453 /*******************************************************************/
1456 static
1457 void
1458 buf_LRU_old_init(
1459 /*=============*/
1460  buf_pool_t* buf_pool)
1461 {
1462  buf_page_t* bpage;
1463 
1464  ut_ad(buf_pool_mutex_own(buf_pool));
1465  ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
1466 
1467  /* We first initialize all blocks in the LRU list as old and then use
1468  the adjust function to move the LRU_old pointer to the right
1469  position */
1470 
1471  for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
1472  bpage = UT_LIST_GET_PREV(LRU, bpage)) {
1473  ut_ad(bpage->in_LRU_list);
1474  ut_ad(buf_page_in_file(bpage));
1475  /* This loop temporarily violates the
1476  assertions of buf_page_set_old(). */
1477  bpage->old = TRUE;
1478  }
1479 
1480  buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
1481  buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
1482 
1483  buf_LRU_old_adjust_len(buf_pool);
1484 }
1485 
1486 /******************************************************************/
1488 static
1489 void
1490 buf_unzip_LRU_remove_block_if_needed(
1491 /*=================================*/
1492  buf_page_t* bpage)
1493 {
1494  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1495 
1496  ut_ad(buf_pool);
1497  ut_ad(bpage);
1498  ut_ad(buf_page_in_file(bpage));
1499  ut_ad(buf_pool_mutex_own(buf_pool));
1500 
1501  if (buf_page_belongs_to_unzip_LRU(bpage)) {
1502  buf_block_t* block = (buf_block_t*) bpage;
1503 
1504  ut_ad(block->in_unzip_LRU_list);
1505  ut_d(block->in_unzip_LRU_list = FALSE);
1506 
1507  UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
1508  }
1509 }
1510 
1511 /******************************************************************/
1513 UNIV_INLINE
1514 void
1515 buf_LRU_remove_block(
1516 /*=================*/
1517  buf_page_t* bpage)
1518 {
1519  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1520  ulint zip_size;
1521 
1522  ut_ad(buf_pool);
1523  ut_ad(bpage);
1524  ut_ad(buf_pool_mutex_own(buf_pool));
1525 
1526  ut_a(buf_page_in_file(bpage));
1527 
1528  ut_ad(bpage->in_LRU_list);
1529 
1530  /* If the LRU_old pointer is defined and points to just this block,
1531  move it backward one step */
1532 
1533  if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
1534 
1535  /* Below: the previous block is guaranteed to exist,
1536  because the LRU_old pointer is only allowed to differ
1537  by BUF_LRU_OLD_TOLERANCE from strict
1538  buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
1539  list length. */
1540  buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
1541 
1542  ut_a(prev_bpage);
1543 #ifdef UNIV_LRU_DEBUG
1544  ut_a(!prev_bpage->old);
1545 #endif /* UNIV_LRU_DEBUG */
1546  buf_pool->LRU_old = prev_bpage;
1547  buf_page_set_old(prev_bpage, TRUE);
1548 
1549  buf_pool->LRU_old_len++;
1550  }
1551 
1552  /* Remove the block from the LRU list */
1553  UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1554  ut_d(bpage->in_LRU_list = FALSE);
1555 
1556  zip_size = page_zip_get_size(&bpage->zip);
1557  buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
1558 
1559  buf_unzip_LRU_remove_block_if_needed(bpage);
1560 
1561  /* If the LRU list is so short that LRU_old is not defined,
1562  clear the "old" flags and return */
1563  if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1564 
1565  for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
1566  bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
1567  /* This loop temporarily violates the
1568  assertions of buf_page_set_old(). */
1569  bpage->old = FALSE;
1570  }
1571 
1572  buf_pool->LRU_old = NULL;
1573  buf_pool->LRU_old_len = 0;
1574 
1575  return;
1576  }
1577 
1578  ut_ad(buf_pool->LRU_old);
1579 
1580  /* Update the LRU_old_len field if necessary */
1581  if (buf_page_is_old(bpage)) {
1582 
1583  buf_pool->LRU_old_len--;
1584  }
1585 
1586  /* Adjust the length of the old block list if necessary */
1587  buf_LRU_old_adjust_len(buf_pool);
1588 }
1589 
1590 /******************************************************************/
1592 UNIV_INTERN
1593 void
1595 /*====================*/
1596  buf_block_t* block,
1597  ibool old)
1599 {
1600  buf_pool_t* buf_pool = buf_pool_from_block(block);
1601 
1602  ut_ad(buf_pool);
1603  ut_ad(block);
1604  ut_ad(buf_pool_mutex_own(buf_pool));
1605 
1607 
1608  ut_ad(!block->in_unzip_LRU_list);
1609  ut_d(block->in_unzip_LRU_list = TRUE);
1610 
1611  if (old) {
1612  UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
1613  } else {
1614  UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
1615  }
1616 }
1617 
1618 /******************************************************************/
1623 UNIV_INLINE
1624 void
1625 buf_LRU_add_block_to_end_low(
1626 /*=========================*/
1627  buf_page_t* bpage)
1628 {
1629  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1630 
1631  ut_ad(buf_pool);
1632  ut_ad(bpage);
1633  ut_ad(buf_pool_mutex_own(buf_pool));
1634 
1635  ut_a(buf_page_in_file(bpage));
1636 
1637  ut_ad(!bpage->in_LRU_list);
1638  UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
1639  ut_d(bpage->in_LRU_list = TRUE);
1640 
1641  incr_LRU_size_in_bytes(bpage, buf_pool);
1642 
1643  if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
1644 
1645  ut_ad(buf_pool->LRU_old);
1646 
1647  /* Adjust the length of the old block list if necessary */
1648 
1649  buf_page_set_old(bpage, TRUE);
1650  buf_pool->LRU_old_len++;
1651  buf_LRU_old_adjust_len(buf_pool);
1652 
1653  } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
1654 
1655  /* The LRU list is now long enough for LRU_old to become
1656  defined: init it */
1657 
1658  buf_LRU_old_init(buf_pool);
1659  } else {
1660  buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
1661  }
1662 
1663  /* If this is a zipped block with decompressed frame as well
1664  then put it on the unzip_LRU list */
1665  if (buf_page_belongs_to_unzip_LRU(bpage)) {
1666  buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
1667  }
1668 }
1669 
1670 /******************************************************************/
1675 UNIV_INLINE
1676 void
1677 buf_LRU_add_block_low(
1678 /*==================*/
1679  buf_page_t* bpage,
1680  ibool old)
1684 {
1685  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1686 
1687  ut_ad(buf_pool);
1688  ut_ad(bpage);
1689  ut_ad(buf_pool_mutex_own(buf_pool));
1690 
1691  ut_a(buf_page_in_file(bpage));
1692  ut_ad(!bpage->in_LRU_list);
1693 
1694  if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
1695 
1696  UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
1697 
1698  bpage->freed_page_clock = buf_pool->freed_page_clock;
1699  } else {
1700 #ifdef UNIV_LRU_DEBUG
1701  /* buf_pool->LRU_old must be the first item in the LRU list
1702  whose "old" flag is set. */
1703  ut_a(buf_pool->LRU_old->old);
1704  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1705  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1706  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1707  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1708 #endif /* UNIV_LRU_DEBUG */
1709  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
1710  bpage);
1711  buf_pool->LRU_old_len++;
1712  }
1713 
1714  ut_d(bpage->in_LRU_list = TRUE);
1715 
1716  incr_LRU_size_in_bytes(bpage, buf_pool);
1717 
1718  if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
1719 
1720  ut_ad(buf_pool->LRU_old);
1721 
1722  /* Adjust the length of the old block list if necessary */
1723 
1724  buf_page_set_old(bpage, old);
1725  buf_LRU_old_adjust_len(buf_pool);
1726 
1727  } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
1728 
1729  /* The LRU list is now long enough for LRU_old to become
1730  defined: init it */
1731 
1732  buf_LRU_old_init(buf_pool);
1733  } else {
1734  buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
1735  }
1736 
1737  /* If this is a zipped block with decompressed frame as well
1738  then put it on the unzip_LRU list */
1739  if (buf_page_belongs_to_unzip_LRU(bpage)) {
1740  buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
1741  }
1742 }
1743 
1744 /******************************************************************/
1749 UNIV_INTERN
1750 void
1752 /*==============*/
1753  buf_page_t* bpage,
1754  ibool old)
1759 {
1760  buf_LRU_add_block_low(bpage, old);
1761 }
1762 
1763 /******************************************************************/
1765 UNIV_INTERN
1766 void
1768 /*=====================*/
1769  buf_page_t* bpage)
1770 {
1771  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1772 
1773  ut_ad(buf_pool_mutex_own(buf_pool));
1774 
1775  if (bpage->old) {
1776  buf_pool->stat.n_pages_made_young++;
1777  }
1778 
1779  buf_LRU_remove_block(bpage);
1780  buf_LRU_add_block_low(bpage, FALSE);
1781 }
1782 
1783 /******************************************************************/
1785 UNIV_INTERN
1786 void
1788 /*===================*/
1789  buf_page_t* bpage)
1790 {
1791  buf_LRU_remove_block(bpage);
1792  buf_LRU_add_block_to_end_low(bpage);
1793 }
1794 
1795 /******************************************************************/
1806 UNIV_INTERN
1807 bool
1809 /*===============*/
1810  buf_page_t* bpage,
1811  bool zip)
1813 {
1814  buf_page_t* b = NULL;
1815  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1816  const ulint fold = buf_page_address_fold(bpage->space,
1817  bpage->offset);
1818  rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
1819 
1820  ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
1821 
1822  ut_ad(buf_pool_mutex_own(buf_pool));
1823  ut_ad(buf_page_in_file(bpage));
1824  ut_ad(bpage->in_LRU_list);
1825 
1826  rw_lock_x_lock(hash_lock);
1827  mutex_enter(block_mutex);
1828 
1829 #if UNIV_WORD_SIZE == 4
1830  /* On 32-bit systems, there is no padding in buf_page_t. On
1831  other systems, Valgrind could complain about uninitialized pad
1832  bytes. */
1833  UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
1834 #endif
1835 
1836  if (!buf_page_can_relocate(bpage)) {
1837 
1838  /* Do not free buffer-fixed or I/O-fixed blocks. */
1839  goto func_exit;
1840  }
1841 
1842 #ifdef UNIV_IBUF_COUNT_DEBUG
1843  ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
1844 #endif /* UNIV_IBUF_COUNT_DEBUG */
1845 
1846  if (zip || !bpage->zip.data) {
1847  /* This would completely free the block. */
1848  /* Do not completely free dirty blocks. */
1849 
1850  if (bpage->oldest_modification) {
1851  goto func_exit;
1852  }
1853  } else if ((bpage->oldest_modification)
1854  && (buf_page_get_state(bpage)
1855  != BUF_BLOCK_FILE_PAGE)) {
1856 
1857  ut_ad(buf_page_get_state(bpage)
1858  == BUF_BLOCK_ZIP_DIRTY);
1859 
1860 func_exit:
1861  rw_lock_x_unlock(hash_lock);
1862  mutex_exit(block_mutex);
1863  return(false);
1864 
1865  } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1867  ut_a(b);
1868  memcpy(b, bpage, sizeof *b);
1869  }
1870 
1871  ut_ad(buf_pool_mutex_own(buf_pool));
1872  ut_ad(buf_page_in_file(bpage));
1873  ut_ad(bpage->in_LRU_list);
1874  ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
1875 #if UNIV_WORD_SIZE == 4
1876  /* On 32-bit systems, there is no padding in buf_page_t. On
1877  other systems, Valgrind could complain about uninitialized pad
1878  bytes. */
1879  UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
1880 #endif
1881 
1882 #ifdef UNIV_DEBUG
1883  if (buf_debug_prints) {
1884  fprintf(stderr, "Putting space %lu page %lu to free list\n",
1885  (ulong) buf_page_get_space(bpage),
1886  (ulong) buf_page_get_page_no(bpage));
1887  }
1888 #endif /* UNIV_DEBUG */
1889 
1890 #ifdef UNIV_SYNC_DEBUG
1891  ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
1892 #endif /* UNIV_SYNC_DEBUG */
1893  ut_ad(buf_page_can_relocate(bpage));
1894 
1895  if (!buf_LRU_block_remove_hashed(bpage, zip)) {
1896  return(true);
1897  }
1898 
1899 #ifdef UNIV_SYNC_DEBUG
1900  /* buf_LRU_block_remove_hashed() releases the hash_lock */
1901  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
1902  && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
1903 #endif /* UNIV_SYNC_DEBUG */
1904 
1905  /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
1906  then it was a compressed page with an uncompressed frame and
1907  we are interested in freeing only the uncompressed frame.
1908  Therefore we have to reinsert the compressed page descriptor
1909  into the LRU and page_hash (and possibly flush_list).
1910  if b == NULL then it was a regular page that has been freed */
1911 
1912  if (b) {
1913  buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
1914 
1915  rw_lock_x_lock(hash_lock);
1916  mutex_enter(block_mutex);
1917 
1918  ut_a(!buf_page_hash_get_low(buf_pool,
1919  bpage->space,
1920  bpage->offset,
1921  fold));
1922 
1923  b->state = b->oldest_modification
1926  UNIV_MEM_DESC(b->zip.data,
1927  page_zip_get_size(&b->zip));
1928 
1929  /* The fields in_page_hash and in_LRU_list of
1930  the to-be-freed block descriptor should have
1931  been cleared in
1932  buf_LRU_block_remove_hashed(), which
1933  invokes buf_LRU_remove_block(). */
1934  ut_ad(!bpage->in_page_hash);
1935  ut_ad(!bpage->in_LRU_list);
1936  /* bpage->state was BUF_BLOCK_FILE_PAGE because
1937  b != NULL. The type cast below is thus valid. */
1938  ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
1939 
1940  /* The fields of bpage were copied to b before
1941  buf_LRU_block_remove_hashed() was invoked. */
1942  ut_ad(!b->in_zip_hash);
1943  ut_ad(b->in_page_hash);
1944  ut_ad(b->in_LRU_list);
1945 
1946  HASH_INSERT(buf_page_t, hash,
1947  buf_pool->page_hash, fold, b);
1948 
1949  /* Insert b where bpage was in the LRU list. */
1950  if (UNIV_LIKELY(prev_b != NULL)) {
1951  ulint lru_len;
1952 
1953  ut_ad(prev_b->in_LRU_list);
1954  ut_ad(buf_page_in_file(prev_b));
1955 #if UNIV_WORD_SIZE == 4
1956  /* On 32-bit systems, there is no
1957  padding in buf_page_t. On other
1958  systems, Valgrind could complain about
1959  uninitialized pad bytes. */
1960  UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b);
1961 #endif
1962  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
1963  prev_b, b);
1964 
1965  incr_LRU_size_in_bytes(b, buf_pool);
1966 
1967  if (buf_page_is_old(b)) {
1968  buf_pool->LRU_old_len++;
1969  if (UNIV_UNLIKELY
1970  (buf_pool->LRU_old
1971  == UT_LIST_GET_NEXT(LRU, b))) {
1972 
1973  buf_pool->LRU_old = b;
1974  }
1975  }
1976 
1977  lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
1978 
1979  if (lru_len > BUF_LRU_OLD_MIN_LEN) {
1980  ut_ad(buf_pool->LRU_old);
1981  /* Adjust the length of the
1982  old block list if necessary */
1983  buf_LRU_old_adjust_len(buf_pool);
1984  } else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
1985  /* The LRU list is now long
1986  enough for LRU_old to become
1987  defined: init it */
1988  buf_LRU_old_init(buf_pool);
1989  }
1990 #ifdef UNIV_LRU_DEBUG
1991  /* Check that the "old" flag is consistent
1992  in the block and its neighbours. */
1994 #endif /* UNIV_LRU_DEBUG */
1995  } else {
1996  ut_d(b->in_LRU_list = FALSE);
1997  buf_LRU_add_block_low(b, buf_page_is_old(b));
1998  }
1999 
2000  if (b->state == BUF_BLOCK_ZIP_PAGE) {
2001 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2002  buf_LRU_insert_zip_clean(b);
2003 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2004  } else {
2005  /* Relocate on buf_pool->flush_list. */
2007  }
2008 
2009  bpage->zip.data = NULL;
2010  page_zip_set_size(&bpage->zip, 0);
2011  mutex_exit(block_mutex);
2012 
2013  /* Prevent buf_page_get_gen() from
2014  decompressing the block while we release
2015  buf_pool->mutex and block_mutex. */
2016  block_mutex = buf_page_get_mutex(b);
2017  mutex_enter(block_mutex);
2019  mutex_exit(block_mutex);
2020 
2021  rw_lock_x_unlock(hash_lock);
2022 
2023  } else {
2024 
2025  /* There can be multiple threads doing an LRU scan to
2026  free a block. The page_cleaner thread can be doing an
2027  LRU batch whereas user threads can potentially be doing
2028  multiple single page flushes. As we release
2029  buf_pool->mutex below we need to make sure that no one
2030  else considers this block as a victim for page
2031  replacement. This block is already out of page_hash
2032  and we are about to remove it from the LRU list and put
2033  it on the free list. */
2034  mutex_enter(block_mutex);
2035  buf_page_set_sticky(bpage);
2036  mutex_exit(block_mutex);
2037  }
2038 
2039  buf_pool_mutex_exit(buf_pool);
2040 
2041  /* Remove possible adaptive hash index on the page.
2042  The page was declared uninitialized by
2043  buf_LRU_block_remove_hashed(). We need to flag
2044  the contents of the page valid (which it still is) in
2045  order to avoid bogus Valgrind warnings.*/
2046 
2047  UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
2048  UNIV_PAGE_SIZE);
2050  UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
2051  UNIV_PAGE_SIZE);
2052 
2053  if (b) {
2054  ib_uint32_t checksum;
2055  /* Compute and stamp the compressed page
2056  checksum while not holding any mutex. The
2057  block is already half-freed
2058  (BUF_BLOCK_REMOVE_HASH) and removed from
2059  buf_pool->page_hash, thus inaccessible by any
2060  other thread. */
2061 
2062  checksum = page_zip_calc_checksum(
2063  b->zip.data,
2064  page_zip_get_size(&b->zip),
2065  static_cast<srv_checksum_algorithm_t>(
2067 
2069  checksum);
2070  }
2071 
2072  buf_pool_mutex_enter(buf_pool);
2073 
2074  mutex_enter(block_mutex);
2075  buf_page_unset_sticky(b != NULL ? b : bpage);
2076  mutex_exit(block_mutex);
2077 
2078  buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2079  return(true);
2080 }
2081 
2082 /******************************************************************/
2084 UNIV_INTERN
2085 void
2087 /*=============================*/
2088  buf_block_t* block)
2089 {
2090  void* data;
2091  buf_pool_t* buf_pool = buf_pool_from_block(block);
2092 
2093  ut_ad(block);
2094  ut_ad(buf_pool_mutex_own(buf_pool));
2095  ut_ad(mutex_own(&block->mutex));
2096 
2097  switch (buf_block_get_state(block)) {
2098  case BUF_BLOCK_MEMORY:
2100  break;
2101  default:
2102  ut_error;
2103  }
2104 
2105 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
2106  ut_a(block->n_pointers == 0);
2107 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
2108  ut_ad(!block->page.in_free_list);
2109  ut_ad(!block->page.in_flush_list);
2110  ut_ad(!block->page.in_LRU_list);
2111 
2113 
2114  UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
2115 #ifdef UNIV_DEBUG
2116  /* Wipe contents of page to reveal possible stale pointers to it */
2117  memset(block->frame, '\0', UNIV_PAGE_SIZE);
2118 #else
2119  /* Wipe page_no and space_id */
2120  memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
2121  memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
2122 #endif
2123  data = block->page.zip.data;
2124 
2125  if (data) {
2126  block->page.zip.data = NULL;
2127  mutex_exit(&block->mutex);
2128  buf_pool_mutex_exit_forbid(buf_pool);
2129 
2131  buf_pool, data, page_zip_get_size(&block->page.zip));
2132 
2133  buf_pool_mutex_exit_allow(buf_pool);
2134  mutex_enter(&block->mutex);
2135  page_zip_set_size(&block->page.zip, 0);
2136  }
2137 
2138  UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
2139  ut_d(block->page.in_free_list = TRUE);
2140 
2141  UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
2142 }
2143 
2144 /******************************************************************/
2158 static
2159 bool
2160 buf_LRU_block_remove_hashed(
2161 /*========================*/
2162  buf_page_t* bpage,
2165  bool zip)
2167 {
2168  ulint fold;
2169  const buf_page_t* hashed_bpage;
2170  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2171  rw_lock_t* hash_lock;
2172 
2173  ut_ad(bpage);
2174  ut_ad(buf_pool_mutex_own(buf_pool));
2175  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2176 
2177  fold = buf_page_address_fold(bpage->space, bpage->offset);
2178  hash_lock = buf_page_hash_lock_get(buf_pool, fold);
2179 #ifdef UNIV_SYNC_DEBUG
2180  ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
2181 #endif /* UNIV_SYNC_DEBUG */
2182 
2184  ut_a(bpage->buf_fix_count == 0);
2185 
2186 #if UNIV_WORD_SIZE == 4
2187  /* On 32-bit systems, there is no padding in
2188  buf_page_t. On other systems, Valgrind could complain
2189  about uninitialized pad bytes. */
2190  UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2191 #endif
2192 
2193  buf_LRU_remove_block(bpage);
2194 
2195  buf_pool->freed_page_clock += 1;
2196 
2197  switch (buf_page_get_state(bpage)) {
2198  case BUF_BLOCK_FILE_PAGE:
2199  UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
2200  UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
2201  UNIV_PAGE_SIZE);
2203  if (bpage->zip.data) {
2204  const page_t* page = ((buf_block_t*) bpage)->frame;
2205  const ulint zip_size
2206  = page_zip_get_size(&bpage->zip);
2207 
2208  ut_a(!zip || bpage->oldest_modification == 0);
2209 
2210  switch (UNIV_EXPECT(fil_page_get_type(page),
2211  FIL_PAGE_INDEX)) {
2213  case FIL_PAGE_INODE:
2214  case FIL_PAGE_IBUF_BITMAP:
2215  case FIL_PAGE_TYPE_FSP_HDR:
2216  case FIL_PAGE_TYPE_XDES:
2217  /* These are essentially uncompressed pages. */
2218  if (!zip) {
2219  /* InnoDB writes the data to the
2220  uncompressed page frame. Copy it
2221  to the compressed page, which will
2222  be preserved. */
2223  memcpy(bpage->zip.data, page,
2224  zip_size);
2225  }
2226  break;
2227  case FIL_PAGE_TYPE_ZBLOB:
2228  case FIL_PAGE_TYPE_ZBLOB2:
2229  break;
2230  case FIL_PAGE_INDEX:
2231 #ifdef UNIV_ZIP_DEBUG
2232  ut_a(page_zip_validate(
2233  &bpage->zip, page,
2234  ((buf_block_t*) bpage)->index));
2235 #endif /* UNIV_ZIP_DEBUG */
2236  break;
2237  default:
2238  ut_print_timestamp(stderr);
2239  fputs(" InnoDB: ERROR: The compressed page"
2240  " to be evicted seems corrupt:", stderr);
2241  ut_print_buf(stderr, page, zip_size);
2242  fputs("\nInnoDB: Possibly older version"
2243  " of the page:", stderr);
2244  ut_print_buf(stderr, bpage->zip.data,
2245  zip_size);
2246  putc('\n', stderr);
2247  ut_error;
2248  }
2249 
2250  break;
2251  }
2252  /* fall through */
2253  case BUF_BLOCK_ZIP_PAGE:
2254  ut_a(bpage->oldest_modification == 0);
2255  UNIV_MEM_ASSERT_W(bpage->zip.data,
2256  page_zip_get_size(&bpage->zip));
2257  break;
2258  case BUF_BLOCK_POOL_WATCH:
2259  case BUF_BLOCK_ZIP_DIRTY:
2260  case BUF_BLOCK_NOT_USED:
2262  case BUF_BLOCK_MEMORY:
2263  case BUF_BLOCK_REMOVE_HASH:
2264  ut_error;
2265  break;
2266  }
2267 
2268  hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space,
2269  bpage->offset, fold);
2270 
2271  if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
2272  fprintf(stderr,
2273  "InnoDB: Error: page %lu %lu not found"
2274  " in the hash table\n",
2275  (ulong) bpage->space,
2276  (ulong) bpage->offset);
2277  if (hashed_bpage) {
2278  fprintf(stderr,
2279  "InnoDB: In hash table we find block"
2280  " %p of %lu %lu which is not %p\n",
2281  (const void*) hashed_bpage,
2282  (ulong) hashed_bpage->space,
2283  (ulong) hashed_bpage->offset,
2284  (const void*) bpage);
2285  }
2286 
2287 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2288  mutex_exit(buf_page_get_mutex(bpage));
2289  rw_lock_x_unlock(hash_lock);
2290  buf_pool_mutex_exit(buf_pool);
2291  buf_print();
2292  buf_LRU_print();
2293  buf_validate();
2294  buf_LRU_validate();
2295 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2296  ut_error;
2297  }
2298 
2299  ut_ad(!bpage->in_zip_hash);
2300  ut_ad(bpage->in_page_hash);
2301  ut_d(bpage->in_page_hash = FALSE);
2302  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
2303  switch (buf_page_get_state(bpage)) {
2304  case BUF_BLOCK_ZIP_PAGE:
2305  ut_ad(!bpage->in_free_list);
2306  ut_ad(!bpage->in_flush_list);
2307  ut_ad(!bpage->in_LRU_list);
2308  ut_a(bpage->zip.data);
2309  ut_a(buf_page_get_zip_size(bpage));
2310 
2311 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2312  UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
2313 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2314 
2315  mutex_exit(&buf_pool->zip_mutex);
2316  rw_lock_x_unlock(hash_lock);
2317  buf_pool_mutex_exit_forbid(buf_pool);
2318 
2320  buf_pool, bpage->zip.data,
2321  page_zip_get_size(&bpage->zip));
2322 
2323  buf_pool_mutex_exit_allow(buf_pool);
2324  buf_page_free_descriptor(bpage);
2325  return(false);
2326 
2327  case BUF_BLOCK_FILE_PAGE:
2328  memset(((buf_block_t*) bpage)->frame
2329  + FIL_PAGE_OFFSET, 0xff, 4);
2330  memset(((buf_block_t*) bpage)->frame
2332  UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
2333  UNIV_PAGE_SIZE);
2335 
2336  /* Question: If we release bpage and hash mutex here
2337  then what protects us against:
2338  1) Some other thread buffer fixing this page
2339  2) Some other thread trying to read this page and
2340  not finding it in buffer pool attempting to read it
2341  from the disk.
2342  Answer:
2343  1) Cannot happen because the page is no longer in the
2344  page_hash. Only possibility is when while invalidating
2345  a tablespace we buffer fix the prev_page in LRU to
2346  avoid relocation during the scan. But that is not
2347  possible because we are holding buf_pool mutex.
2348 
2349  2) Not possible because in buf_page_init_for_read()
2350  we do a look up of page_hash while holding buf_pool
2351  mutex and since we are holding buf_pool mutex here
2352  and by the time we'll release it in the caller we'd
2353  have inserted the compressed only descriptor in the
2354  page_hash. */
2355  rw_lock_x_unlock(hash_lock);
2356  mutex_exit(&((buf_block_t*) bpage)->mutex);
2357 
2358  if (zip && bpage->zip.data) {
2359  /* Free the compressed page. */
2360  void* data = bpage->zip.data;
2361  bpage->zip.data = NULL;
2362 
2363  ut_ad(!bpage->in_free_list);
2364  ut_ad(!bpage->in_flush_list);
2365  ut_ad(!bpage->in_LRU_list);
2366  buf_pool_mutex_exit_forbid(buf_pool);
2367 
2369  buf_pool, data,
2370  page_zip_get_size(&bpage->zip));
2371 
2372  buf_pool_mutex_exit_allow(buf_pool);
2373  page_zip_set_size(&bpage->zip, 0);
2374  }
2375 
2376  return(true);
2377 
2378  case BUF_BLOCK_POOL_WATCH:
2379  case BUF_BLOCK_ZIP_DIRTY:
2380  case BUF_BLOCK_NOT_USED:
2382  case BUF_BLOCK_MEMORY:
2383  case BUF_BLOCK_REMOVE_HASH:
2384  break;
2385  }
2386 
2387  ut_error;
2388  return(false);
2389 }
2390 
2391 /******************************************************************/
2393 static
2394 void
2395 buf_LRU_block_free_hashed_page(
2396 /*===========================*/
2397  buf_block_t* block)
2399 {
2400 #ifdef UNIV_DEBUG
2401  buf_pool_t* buf_pool = buf_pool_from_block(block);
2402  ut_ad(buf_pool_mutex_own(buf_pool));
2403 #endif
2404 
2405  mutex_enter(&block->mutex);
2407 
2409  mutex_exit(&block->mutex);
2410 }
2411 
2412 /******************************************************************/
2414 UNIV_INTERN
2415 void
2417 /*==================*/
2418  buf_page_t* bpage)
2421 {
2422  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2423  const ulint fold = buf_page_address_fold(bpage->space,
2424  bpage->offset);
2425  rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
2426  ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
2427 
2428  ut_ad(buf_pool_mutex_own(buf_pool));
2429 
2430  rw_lock_x_lock(hash_lock);
2431  mutex_enter(block_mutex);
2432 
2433  if (buf_LRU_block_remove_hashed(bpage, true)) {
2434  buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2435  }
2436 
2437  /* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
2438 #ifdef UNIV_SYNC_DEBUG
2439  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
2440  && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
2441 #endif /* UNIV_SYNC_DEBUG */
2442  ut_ad(!mutex_own(block_mutex));
2443 }
2444 
2445 /**********************************************************************/
2448 static
2449 uint
2450 buf_LRU_old_ratio_update_instance(
2451 /*==============================*/
2452  buf_pool_t* buf_pool,
2453  uint old_pct,
2455  ibool adjust)
2458 {
2459  uint ratio;
2460 
2461  ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
2462  if (ratio < BUF_LRU_OLD_RATIO_MIN) {
2463  ratio = BUF_LRU_OLD_RATIO_MIN;
2464  } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
2465  ratio = BUF_LRU_OLD_RATIO_MAX;
2466  }
2467 
2468  if (adjust) {
2469  buf_pool_mutex_enter(buf_pool);
2470 
2471  if (ratio != buf_pool->LRU_old_ratio) {
2472  buf_pool->LRU_old_ratio = ratio;
2473 
2474  if (UT_LIST_GET_LEN(buf_pool->LRU)
2475  >= BUF_LRU_OLD_MIN_LEN) {
2476 
2477  buf_LRU_old_adjust_len(buf_pool);
2478  }
2479  }
2480 
2481  buf_pool_mutex_exit(buf_pool);
2482  } else {
2483  buf_pool->LRU_old_ratio = ratio;
2484  }
2485  /* the reverse of
2486  ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
2487  return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
2488 }
2489 
2490 /**********************************************************************/
2493 UNIV_INTERN
2494 ulint
2496 /*=====================*/
2497  uint old_pct,
2499  ibool adjust)
2502 {
2503  ulint i;
2504  ulint new_ratio = 0;
2505 
2506  for (i = 0; i < srv_buf_pool_instances; i++) {
2507  buf_pool_t* buf_pool;
2508 
2509  buf_pool = buf_pool_from_array(i);
2510 
2511  new_ratio = buf_LRU_old_ratio_update_instance(
2512  buf_pool, old_pct, adjust);
2513  }
2514 
2515  return(new_ratio);
2516 }
2517 
2518 /********************************************************************/
2521 UNIV_INTERN
2522 void
2523 buf_LRU_stat_update(void)
2524 /*=====================*/
2525 {
2526  ulint i;
2527  buf_LRU_stat_t* item;
2528  buf_pool_t* buf_pool;
2529  ibool evict_started = FALSE;
2530  buf_LRU_stat_t cur_stat;
2531 
2532  /* If we haven't started eviction yet then don't update stats. */
2533  for (i = 0; i < srv_buf_pool_instances; i++) {
2534 
2535  buf_pool = buf_pool_from_array(i);
2536 
2537  if (buf_pool->freed_page_clock != 0) {
2538  evict_started = TRUE;
2539  break;
2540  }
2541  }
2542 
2543  if (!evict_started) {
2544  goto func_exit;
2545  }
2546 
2547  /* Update the index. */
2548  item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
2549  buf_LRU_stat_arr_ind++;
2550  buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
2551 
2552  /* Add the current value and subtract the obsolete entry.
2553  Since buf_LRU_stat_cur is not protected by any mutex,
2554  it can be changing between adding to buf_LRU_stat_sum
2555  and copying to item. Assign it to local variables to make
2556  sure the same value assign to the buf_LRU_stat_sum
2557  and item */
2558  cur_stat = buf_LRU_stat_cur;
2559 
2560  buf_LRU_stat_sum.io += cur_stat.io - item->io;
2561  buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip;
2562 
2563  /* Put current entry in the array. */
2564  memcpy(item, &cur_stat, sizeof *item);
2565 
2566 func_exit:
2567  /* Clear the current entry. */
2568  memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
2569 }
2570 
2571 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2572 /**********************************************************************/
2574 static
2575 void
2576 buf_LRU_validate_instance(
2577 /*======================*/
2578  buf_pool_t* buf_pool)
2579 {
2580  buf_page_t* bpage;
2581  buf_block_t* block;
2582  ulint old_len;
2583  ulint new_len;
2584 
2585  ut_ad(buf_pool);
2586  buf_pool_mutex_enter(buf_pool);
2587 
2588  if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
2589 
2590  ut_a(buf_pool->LRU_old);
2591  old_len = buf_pool->LRU_old_len;
2592  new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
2593  * buf_pool->LRU_old_ratio
2595  UT_LIST_GET_LEN(buf_pool->LRU)
2598  ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
2599  ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
2600  }
2601 
2602  UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, CheckInLRUList());
2603 
2604  old_len = 0;
2605 
2606  for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
2607  bpage != NULL;
2608  bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
2609 
2610  switch (buf_page_get_state(bpage)) {
2611  case BUF_BLOCK_POOL_WATCH:
2612  case BUF_BLOCK_NOT_USED:
2614  case BUF_BLOCK_MEMORY:
2615  case BUF_BLOCK_REMOVE_HASH:
2616  ut_error;
2617  break;
2618  case BUF_BLOCK_FILE_PAGE:
2619  ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
2620  == buf_page_belongs_to_unzip_LRU(bpage));
2621  case BUF_BLOCK_ZIP_PAGE:
2622  case BUF_BLOCK_ZIP_DIRTY:
2623  break;
2624  }
2625 
2626  if (buf_page_is_old(bpage)) {
2627  const buf_page_t* prev
2628  = UT_LIST_GET_PREV(LRU, bpage);
2629  const buf_page_t* next
2630  = UT_LIST_GET_NEXT(LRU, bpage);
2631 
2632  if (!old_len++) {
2633  ut_a(buf_pool->LRU_old == bpage);
2634  } else {
2635  ut_a(!prev || buf_page_is_old(prev));
2636  }
2637 
2638  ut_a(!next || buf_page_is_old(next));
2639  }
2640  }
2641 
2642  ut_a(buf_pool->LRU_old_len == old_len);
2643 
2644  UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, CheckInFreeList());
2645 
2646  for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
2647  bpage != NULL;
2648  bpage = UT_LIST_GET_NEXT(list, bpage)) {
2649 
2651  }
2652 
2654  unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
2655  CheckUnzipLRUAndLRUList());
2656 
2657  for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
2658  block;
2659  block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
2660 
2661  ut_ad(block->in_unzip_LRU_list);
2662  ut_ad(block->page.in_LRU_list);
2664  }
2665 
2666  buf_pool_mutex_exit(buf_pool);
2667 }
2668 
2669 /**********************************************************************/
2672 UNIV_INTERN
2673 ibool
2674 buf_LRU_validate(void)
2675 /*==================*/
2676 {
2677  ulint i;
2678 
2679  for (i = 0; i < srv_buf_pool_instances; i++) {
2680  buf_pool_t* buf_pool;
2681 
2682  buf_pool = buf_pool_from_array(i);
2683  buf_LRU_validate_instance(buf_pool);
2684  }
2685 
2686  return(TRUE);
2687 }
2688 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2689 
2690 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2691 /**********************************************************************/
2693 UNIV_INTERN
2694 void
2695 buf_LRU_print_instance(
2696 /*===================*/
2697  buf_pool_t* buf_pool)
2698 {
2699  const buf_page_t* bpage;
2700 
2701  ut_ad(buf_pool);
2702  buf_pool_mutex_enter(buf_pool);
2703 
2704  bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
2705 
2706  while (bpage != NULL) {
2707 
2708  mutex_enter(buf_page_get_mutex(bpage));
2709  fprintf(stderr, "BLOCK space %lu page %lu ",
2710  (ulong) buf_page_get_space(bpage),
2711  (ulong) buf_page_get_page_no(bpage));
2712 
2713  if (buf_page_is_old(bpage)) {
2714  fputs("old ", stderr);
2715  }
2716 
2717  if (bpage->buf_fix_count) {
2718  fprintf(stderr, "buffix count %lu ",
2719  (ulong) bpage->buf_fix_count);
2720  }
2721 
2722  if (buf_page_get_io_fix(bpage)) {
2723  fprintf(stderr, "io_fix %lu ",
2724  (ulong) buf_page_get_io_fix(bpage));
2725  }
2726 
2727  if (bpage->oldest_modification) {
2728  fputs("modif. ", stderr);
2729  }
2730 
2731  switch (buf_page_get_state(bpage)) {
2732  const byte* frame;
2733  case BUF_BLOCK_FILE_PAGE:
2734  frame = buf_block_get_frame((buf_block_t*) bpage);
2735  fprintf(stderr, "\ntype %lu"
2736  " index id %llu\n",
2737  (ulong) fil_page_get_type(frame),
2738  (ullint) btr_page_get_index_id(frame));
2739  break;
2740  case BUF_BLOCK_ZIP_PAGE:
2741  frame = bpage->zip.data;
2742  fprintf(stderr, "\ntype %lu size %lu"
2743  " index id %llu\n",
2744  (ulong) fil_page_get_type(frame),
2745  (ulong) buf_page_get_zip_size(bpage),
2746  (ullint) btr_page_get_index_id(frame));
2747  break;
2748 
2749  default:
2750  fprintf(stderr, "\n!state %lu!\n",
2751  (ulong) buf_page_get_state(bpage));
2752  break;
2753  }
2754 
2755  mutex_exit(buf_page_get_mutex(bpage));
2756  bpage = UT_LIST_GET_NEXT(LRU, bpage);
2757  }
2758 
2759  buf_pool_mutex_exit(buf_pool);
2760 }
2761 
2762 /**********************************************************************/
2764 UNIV_INTERN
2765 void
2766 buf_LRU_print(void)
2767 /*===============*/
2768 {
2769  ulint i;
2770  buf_pool_t* buf_pool;
2771 
2772  for (i = 0; i < srv_buf_pool_instances; i++) {
2773  buf_pool = buf_pool_from_array(i);
2774  buf_LRU_print_instance(buf_pool);
2775  }
2776 }
2777 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
2778 #endif /* !UNIV_HOTBACKUP */