MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
buf0buddy.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #define THIS_MODULE
27 #include "buf0buddy.h"
28 #ifdef UNIV_NONINL
29 # include "buf0buddy.ic"
30 #endif
31 #undef THIS_MODULE
32 #include "buf0buf.h"
33 #include "buf0lru.h"
34 #include "buf0flu.h"
35 #include "page0zip.h"
36 #include "srv0start.h"
37 
70 #define BUF_BUDDY_STAMP_OFFSET FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
71 
74 #define BUF_BUDDY_STAMP_FREE (SRV_LOG_SPACE_FIRST_ID)
75 
78 #define BUF_BUDDY_STAMP_NONFREE (0XFFFFFFFF)
79 
80 #if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
81 # error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
82 #endif
83 
90 };
91 
92 #ifdef UNIV_DEBUG_VALGRIND
93 /**********************************************************************/
95 UNIV_INLINE
96 void
97 buf_buddy_mem_invalid(
98 /*==================*/
100  ulint i)
101 {
102  const size_t size = BUF_BUDDY_LOW << i;
103  ut_ad(i <= BUF_BUDDY_SIZES);
104 
105  UNIV_MEM_ASSERT_W(buf, size);
106  UNIV_MEM_INVALID(buf, size);
107 }
108 #else /* UNIV_DEBUG_VALGRIND */
109 # define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES)
110 #endif /* UNIV_DEBUG_VALGRIND */
111 
112 /**********************************************************************/
115 UNIV_INLINE __attribute__((warn_unused_result))
116 bool
117 buf_buddy_stamp_is_free(
118 /*====================*/
119  const buf_buddy_free_t* buf)
120 {
121  return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
123 }
124 
125 /**********************************************************************/
127 UNIV_INLINE
128 void
130 /*=================*/
132  ulint i)
133 {
134  ut_d(memset(buf, i, BUF_BUDDY_LOW << i));
135  buf_buddy_mem_invalid(buf, i);
138  buf->stamp.size = i;
139 }
140 
141 /**********************************************************************/
145 #define buf_buddy_stamp_nonfree(buf, i) do { \
146  buf_buddy_mem_invalid(buf, i); \
147  memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4); \
148 } while (0)
149 #if BUF_BUDDY_STAMP_NONFREE != 0xffffffff
150 # error "BUF_BUDDY_STAMP_NONFREE != 0xffffffff"
151 #endif
152 
153 /**********************************************************************/
156 UNIV_INLINE
157 void*
159 /*==========*/
160  byte* page,
161  ulint size)
162 {
163  ut_ad(ut_is_2pow(size));
164  ut_ad(size >= BUF_BUDDY_LOW);
165  ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
166  ut_ad(size < BUF_BUDDY_HIGH);
167  ut_ad(BUF_BUDDY_HIGH == UNIV_PAGE_SIZE);
168  ut_ad(!ut_align_offset(page, size));
169 
170  if (((ulint) page) & size) {
171  return(page - size);
172  } else {
173  return(page + size);
174  }
175 }
176 
178 struct CheckZipFree {
179  ulint i;
180  CheckZipFree(ulint i) : i (i) {}
181 
182  void operator()(const buf_buddy_free_t* elem) const
183  {
184  ut_a(buf_buddy_stamp_is_free(elem));
185  ut_a(elem->stamp.size <= i);
186  }
187 };
188 
189 #define BUF_BUDDY_LIST_VALIDATE(bp, i) \
190  UT_LIST_VALIDATE(list, buf_buddy_free_t, \
191  bp->zip_free[i], CheckZipFree(i))
192 
193 #ifdef UNIV_DEBUG
194 /**********************************************************************/
198 UNIV_INLINE
199 bool
200 buf_buddy_check_free(
201 /*=================*/
202  buf_pool_t* buf_pool,
203  const buf_buddy_free_t* buf,
204  ulint i)
205 {
206  const ulint size = BUF_BUDDY_LOW << i;
207 
208  ut_ad(buf_pool_mutex_own(buf_pool));
209  ut_ad(!ut_align_offset(buf, size));
210  ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
211 
212  buf_buddy_free_t* itr;
213 
214  for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
215  itr && itr != buf;
216  itr = UT_LIST_GET_NEXT(list, itr)) {
217  }
218 
219  return(itr == buf);
220 }
221 #endif /* UNIV_DEBUG */
222 
223 /**********************************************************************/
228 static __attribute__((warn_unused_result))
230 buf_buddy_is_free(
231 /*==============*/
232  buf_buddy_free_t* buf,
233  ulint i)
235 {
236 #ifdef UNIV_DEBUG
237  const ulint size = BUF_BUDDY_LOW << i;
238  ut_ad(!ut_align_offset(buf, size));
239  ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
240 #endif /* UNIV_DEBUG */
241 
242  /* We assume that all memory from buf_buddy_alloc()
243  is used for compressed page frames. */
244 
245  /* We look inside the allocated objects returned by
246  buf_buddy_alloc() and assume that each block is a compressed
247  page that contains one of the following in space_id.
248  * BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or
249  * BUF_BUDDY_STAMP_NONFREE if the block has been allocated but
250  not initialized yet or
251  * A valid space_id of a compressed tablespace
252 
253  The call below attempts to read from free memory. The memory
254  is "owned" by the buddy allocator (and it has been allocated
255  from the buffer pool), so there is nothing wrong about this. */
256  if (!buf_buddy_stamp_is_free(buf)) {
257  return(BUF_BUDDY_STATE_USED);
258  }
259 
260  /* A block may be free but a fragment of it may still be in use.
261  To guard against that we write the free block size in terms of
262  zip_free index at start of stamped block. Note that we can
263  safely rely on this value only if the buf is free. */
264  ut_ad(buf->stamp.size <= i);
265  return(buf->stamp.size == i
268 }
269 
270 /**********************************************************************/
272 UNIV_INLINE
273 void
274 buf_buddy_add_to_free(
275 /*==================*/
276  buf_pool_t* buf_pool,
277  buf_buddy_free_t* buf,
278  ulint i)
280 {
281  ut_ad(buf_pool_mutex_own(buf_pool));
282  ut_ad(buf_pool->zip_free[i].start != buf);
283 
284  buf_buddy_stamp_free(buf, i);
285  UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
286  ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
287 }
288 
289 /**********************************************************************/
291 UNIV_INLINE
292 void
293 buf_buddy_remove_from_free(
294 /*=======================*/
295  buf_pool_t* buf_pool,
296  buf_buddy_free_t* buf,
297  ulint i)
299 {
300  ut_ad(buf_pool_mutex_own(buf_pool));
301  ut_ad(buf_buddy_check_free(buf_pool, buf, i));
302 
303  UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
304  buf_buddy_stamp_nonfree(buf, i);
305 }
306 
307 /**********************************************************************/
310 static
312 buf_buddy_alloc_zip(
313 /*================*/
314  buf_pool_t* buf_pool,
315  ulint i)
316 {
318 
319  ut_ad(buf_pool_mutex_own(buf_pool));
320  ut_a(i < BUF_BUDDY_SIZES);
321  ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
322 
323  ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
324 
325  buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
326 
327  if (buf) {
328  buf_buddy_remove_from_free(buf_pool, buf, i);
329  } else if (i + 1 < BUF_BUDDY_SIZES) {
330  /* Attempt to split. */
331  buf = buf_buddy_alloc_zip(buf_pool, i + 1);
332 
333  if (buf) {
334  buf_buddy_free_t* buddy =
335  reinterpret_cast<buf_buddy_free_t*>(
336  buf->stamp.bytes
337  + (BUF_BUDDY_LOW << i));
338 
339  ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
340  buf_buddy_add_to_free(buf_pool, buddy, i);
341  }
342  }
343 
344  if (buf) {
345  /* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
346  UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET);
347  UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
348  + buf->stamp.bytes, ~i,
349  (BUF_BUDDY_LOW << i)
350  - (BUF_BUDDY_STAMP_OFFSET + 4));
351  ut_ad(mach_read_from_4(buf->stamp.bytes
354  }
355 
356  return(buf);
357 }
358 
359 /**********************************************************************/
361 static
362 void
363 buf_buddy_block_free(
364 /*=================*/
365  buf_pool_t* buf_pool,
366  void* buf)
367 {
368  const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
369  buf_page_t* bpage;
371 
372  ut_ad(buf_pool_mutex_own(buf_pool));
373  ut_ad(!mutex_own(&buf_pool->zip_mutex));
374  ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
375 
376  HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
378  && bpage->in_zip_hash && !bpage->in_page_hash),
379  ((buf_block_t*) bpage)->frame == buf);
380  ut_a(bpage);
382  ut_ad(!bpage->in_page_hash);
383  ut_ad(bpage->in_zip_hash);
384  ut_d(bpage->in_zip_hash = FALSE);
385  HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
386 
387  ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
388  UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
389 
390  block = (buf_block_t*) bpage;
391  mutex_enter(&block->mutex);
393  mutex_exit(&block->mutex);
394 
395  ut_ad(buf_pool->buddy_n_frames > 0);
396  ut_d(buf_pool->buddy_n_frames--);
397 }
398 
399 /**********************************************************************/
401 static
402 void
403 buf_buddy_block_register(
404 /*=====================*/
405  buf_block_t* block)
406 {
407  buf_pool_t* buf_pool = buf_pool_from_block(block);
408  const ulint fold = BUF_POOL_ZIP_FOLD(block);
409  ut_ad(buf_pool_mutex_own(buf_pool));
410  ut_ad(!mutex_own(&buf_pool->zip_mutex));
412 
414 
415  ut_a(block->frame);
416  ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
417 
418  ut_ad(!block->page.in_page_hash);
419  ut_ad(!block->page.in_zip_hash);
420  ut_d(block->page.in_zip_hash = TRUE);
421  HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
422 
423  ut_d(buf_pool->buddy_n_frames++);
424 }
425 
426 /**********************************************************************/
429 static
430 void*
431 buf_buddy_alloc_from(
432 /*=================*/
433  buf_pool_t* buf_pool,
434  void* buf,
435  ulint i,
437  ulint j)
439 {
440  ulint offs = BUF_BUDDY_LOW << j;
441  ut_ad(j <= BUF_BUDDY_SIZES);
442  ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
443  ut_ad(j >= i);
444  ut_ad(!ut_align_offset(buf, offs));
445 
446  /* Add the unused parts of the block to the free lists. */
447  while (j > i) {
448  buf_buddy_free_t* zip_buf;
449 
450  offs >>= 1;
451  j--;
452 
453  zip_buf = reinterpret_cast<buf_buddy_free_t*>(
454  reinterpret_cast<byte*>(buf) + offs);
455  buf_buddy_add_to_free(buf_pool, zip_buf, j);
456  }
457 
458  buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i);
459  return(buf);
460 }
461 
462 /**********************************************************************/
467 UNIV_INTERN
468 void*
469 buf_buddy_alloc_low(
470 /*================*/
471  buf_pool_t* buf_pool,
472  ulint i,
474  ibool* lru)
479 {
481 
482  ut_ad(lru);
483  ut_ad(buf_pool_mutex_own(buf_pool));
484  ut_ad(!mutex_own(&buf_pool->zip_mutex));
485  ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
486 
487  if (i < BUF_BUDDY_SIZES) {
488  /* Try to allocate from the buddy system. */
489  block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i);
490 
491  if (block) {
492  goto func_exit;
493  }
494  }
495 
496  /* Try allocating from the buf_pool->free list. */
497  block = buf_LRU_get_free_only(buf_pool);
498 
499  if (block) {
500 
501  goto alloc_big;
502  }
503 
504  /* Try replacing an uncompressed page in the buffer pool. */
505  buf_pool_mutex_exit(buf_pool);
506  block = buf_LRU_get_free_block(buf_pool);
507  *lru = TRUE;
508  buf_pool_mutex_enter(buf_pool);
509 
510 alloc_big:
511  buf_buddy_block_register(block);
512 
513  block = (buf_block_t*) buf_buddy_alloc_from(
514  buf_pool, block->frame, i, BUF_BUDDY_SIZES);
515 
516 func_exit:
517  buf_pool->buddy_stat[i].used++;
518  return(block);
519 }
520 
521 /**********************************************************************/
524 static
525 bool
526 buf_buddy_relocate(
527 /*===============*/
528  buf_pool_t* buf_pool,
529  void* src,
530  void* dst,
531  ulint i)
533 {
534  buf_page_t* bpage;
535  const ulint size = BUF_BUDDY_LOW << i;
536  ib_mutex_t* mutex;
537  ulint space;
538  ulint offset;
539 
540  ut_ad(buf_pool_mutex_own(buf_pool));
541  ut_ad(!mutex_own(&buf_pool->zip_mutex));
542  ut_ad(!ut_align_offset(src, size));
543  ut_ad(!ut_align_offset(dst, size));
544  ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
545  UNIV_MEM_ASSERT_W(dst, size);
546 
547  space = mach_read_from_4((const byte*) src
549  offset = mach_read_from_4((const byte*) src
550  + FIL_PAGE_OFFSET);
551 
552  /* Suppress Valgrind warnings about conditional jump
553  on uninitialized value. */
554  UNIV_MEM_VALID(&space, sizeof space);
555  UNIV_MEM_VALID(&offset, sizeof offset);
556 
557  ut_ad(space != BUF_BUDDY_STAMP_FREE);
558 
559  bpage = buf_page_hash_get(buf_pool, space, offset);
560 
561  if (!bpage || bpage->zip.data != src) {
562  /* The block has probably been freshly
563  allocated by buf_LRU_get_free_block() but not
564  added to buf_pool->page_hash yet. Obviously,
565  it cannot be relocated. */
566 
567  return(false);
568  }
569 
570  if (page_zip_get_size(&bpage->zip) != size) {
571  /* The block is of different size. We would
572  have to relocate all blocks covered by src.
573  For the sake of simplicity, give up. */
574  ut_ad(page_zip_get_size(&bpage->zip) < size);
575 
576  return(false);
577  }
578 
579  /* The block must have been allocated, but it may
580  contain uninitialized data. */
581  UNIV_MEM_ASSERT_W(src, size);
582 
583  mutex = buf_page_get_mutex(bpage);
584 
585  mutex_enter(mutex);
586 
587  if (buf_page_can_relocate(bpage)) {
588  /* Relocate the compressed page. */
589  ullint usec = ut_time_us(NULL);
590  ut_a(bpage->zip.data == src);
591  memcpy(dst, src, size);
592  bpage->zip.data = (page_zip_t*) dst;
593  mutex_exit(mutex);
594  buf_buddy_mem_invalid(
595  reinterpret_cast<buf_buddy_free_t*>(src), i);
596 
597  buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i];
598  buddy_stat->relocated++;
599  buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
600  return(true);
601  }
602 
603  mutex_exit(mutex);
604  return(false);
605 }
606 
607 /**********************************************************************/
609 UNIV_INTERN
610 void
611 buf_buddy_free_low(
612 /*===============*/
613  buf_pool_t* buf_pool,
614  void* buf,
616  ulint i)
618 {
619  buf_buddy_free_t* buddy;
620 
621  ut_ad(buf_pool_mutex_own(buf_pool));
622  ut_ad(!mutex_own(&buf_pool->zip_mutex));
623  ut_ad(i <= BUF_BUDDY_SIZES);
624  ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
625  ut_ad(buf_pool->buddy_stat[i].used > 0);
626 
627  buf_pool->buddy_stat[i].used--;
628 recombine:
629  UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
630 
631  if (i == BUF_BUDDY_SIZES) {
632  buf_buddy_block_free(buf_pool, buf);
633  return;
634  }
635 
636  ut_ad(i < BUF_BUDDY_SIZES);
637  ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
638  ut_ad(!buf_pool_contains_zip(buf_pool, buf));
639 
640  /* Do not recombine blocks if there are few free blocks.
641  We may waste up to 15360*max_len bytes to free blocks
642  (1024 + 2048 + 4096 + 8192 = 15360) */
643  if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
644  goto func_exit;
645  }
646 
647  /* Try to combine adjacent blocks. */
648  buddy = reinterpret_cast<buf_buddy_free_t*>(
649  buf_buddy_get(reinterpret_cast<byte*>(buf),
650  BUF_BUDDY_LOW << i));
651 
652  switch (buf_buddy_is_free(buddy, i)) {
654  /* The buddy is free: recombine */
655  buf_buddy_remove_from_free(buf_pool, buddy, i);
656 buddy_is_free:
657  ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
658  i++;
659  buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
660 
661  goto recombine;
662 
664  ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
665 
666  /* The buddy is not free. Is there a free block of
667  this size? */
668  if (buf_buddy_free_t* zip_buf =
669  UT_LIST_GET_FIRST(buf_pool->zip_free[i])) {
670 
671  /* Remove the block from the free list, because
672  a successful buf_buddy_relocate() will overwrite
673  zip_free->list. */
674  buf_buddy_remove_from_free(buf_pool, zip_buf, i);
675 
676  /* Try to relocate the buddy of buf to the free
677  block. */
678  if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
679 
680  goto buddy_is_free;
681  }
682 
683  buf_buddy_add_to_free(buf_pool, zip_buf, i);
684  }
685 
686  break;
688  /* Some sub-blocks in the buddy are still in use.
689  Relocation will fail. No need to try. */
690  break;
691  }
692 
693 func_exit:
694  /* Free the block to the buddy list. */
695  buf_buddy_add_to_free(buf_pool,
696  reinterpret_cast<buf_buddy_free_t*>(buf),
697  i);
698 }