MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mf_keycache.c
Go to the documentation of this file.
1 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
42 /*
43  Key Cache Locking
44  =================
45 
46  All key cache locking is done with a single mutex per key cache:
47  keycache->cache_lock. This mutex is locked almost all the time
48  when executing code in this file (mf_keycache.c).
49  However it is released for I/O and some copy operations.
50 
51  The cache_lock is also released when waiting for some event. Waiting
52  and signalling is done via condition variables. In most cases the
53  thread waits on its thread->suspend condition variable. Every thread
54  has a my_thread_var structure, which contains this variable and a
55  '*next' and '**prev' pointer. These pointers are used to insert the
56  thread into a wait queue.
57 
58  A thread can wait for one block and thus be in one wait queue at a
59  time only.
60 
61  Before starting to wait on its condition variable with
62  mysql_cond_wait(), the thread enters itself to a specific wait queue
63  with link_into_queue() (double linked with '*next' + '**prev') or
64  wait_on_queue() (single linked with '*next').
65 
66  Another thread, when releasing a resource, looks up the waiting thread
67  in the related wait queue. It sends a signal with
68  mysql_cond_signal() to the waiting thread.
69 
70  NOTE: Depending on the particular wait situation, either the sending
71  thread removes the waiting thread from the wait queue with
72  unlink_from_queue() or release_whole_queue() respectively, or the waiting
73  thread removes itself.
74 
75  There is one exception from this locking scheme when one thread wants
76  to reuse a block for some other address. This works by first marking
77  the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
78  threads that are reading the block to finish. Each block has a
79  reference to a condition variable (condvar). It holds a reference to
80  the thread->suspend condition variable for the waiting thread (if such
81  a thread exists). When that thread is signaled, the reference is
82  cleared. The number of readers of a block is registered in
83  block->hash_link->requests. See wait_for_readers() / remove_reader()
84  for details. This is similar to the above, but it clearly means that
85  only one thread can wait for a particular block. There is no queue in
86  this case. Strangely enough block->convar is used for waiting for the
87  assigned hash_link only. More precisely it is used to wait for all
88  requests to be unregistered from the assigned hash_link.
89 
90  The resize_queue serves two purposes:
91  1. Threads that want to do a resize wait there if in_resize is set.
92  This is not used in the server. The server refuses a second resize
93  request if one is already active. keycache->in_init is used for the
94  synchronization. See set_var.cc.
95  2. Threads that want to access blocks during resize wait here during
96  the re-initialization phase.
97  When the resize is done, all threads on the queue are signalled.
98  Hypothetical resizers can compete for resizing, and read/write
99  requests will restart to request blocks from the freshly resized
100  cache. If the cache has been resized too small, it is disabled and
101  'can_be_used' is false. In this case read/write requests bypass the
102  cache. Since they increment and decrement 'cnt_for_resize_op', the
103  next resizer can wait on the queue 'waiting_for_resize_cnt' until all
104  I/O finished.
105 */
106 
107 #include "mysys_priv.h"
108 #include "mysys_err.h"
109 #include <keycache.h>
110 #include "my_static.h"
111 #include <m_string.h>
112 #include <my_bit.h>
113 #include <errno.h>
114 #include <stdarg.h>
115 #include "probes_mysql.h"
116 
117 /*
118  Some compilation flags have been added specifically for this module
119  to control the following:
120  - not to let a thread to yield the control when reading directly
121  from key cache, which might improve performance in many cases;
122  to enable this add:
123  #define SERIALIZED_READ_FROM_CACHE
124  - to set an upper bound for number of threads simultaneously
125  using the key cache; this setting helps to determine an optimal
126  size for hash table and improve performance when the number of
127  blocks in the key cache much less than the number of threads
128  accessing it;
129  to set this number equal to <N> add
130  #define MAX_THREADS <N>
131  - to substitute calls of mysql_cond_wait for calls of
132  mysql_cond_timedwait (wait with timeout set up);
133  this setting should be used only when you want to trap a deadlock
134  situation, which theoretically should not happen;
135  to set timeout equal to <T> seconds add
136  #define KEYCACHE_TIMEOUT <T>
137  - to enable the module traps and to send debug information from
138  key cache module to a special debug log add:
139  #define KEYCACHE_DEBUG
140  the name of this debug log file <LOG NAME> can be set through:
141  #define KEYCACHE_DEBUG_LOG <LOG NAME>
142  if the name is not defined, it's set by default;
143  if the KEYCACHE_DEBUG flag is not set up and we are in a debug
144  mode, i.e. when ! defined(DBUG_OFF), the debug information from the
145  module is sent to the regular debug log.
146 
147  Example of the settings:
148  #define SERIALIZED_READ_FROM_CACHE
149  #define MAX_THREADS 100
150  #define KEYCACHE_TIMEOUT 1
151  #define KEYCACHE_DEBUG
152  #define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log"
153 */
154 
155 #define STRUCT_PTR(TYPE, MEMBER, a) \
156  (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
157 
158 /* types of condition variables */
159 #define COND_FOR_REQUESTED 0
160 #define COND_FOR_SAVED 1
161 #define COND_FOR_READERS 2
162 
164 
165 /* descriptor of the page in the key cache block buffer */
167 {
168  int file; /* file to which the page belongs to */
169  my_off_t filepos; /* position of the page in the file */
170 };
171 
172 /* element in the chain of a hash table bucket */
174 {
175  struct st_hash_link *next, **prev; /* to connect links in the same bucket */
176  struct st_block_link *block; /* reference to the block for the page: */
177  File file; /* from such a file */
178  my_off_t diskpos; /* with such an offset */
179  uint requests; /* number of requests for the page */
180 };
181 
182 /* simple states of a block */
183 #define BLOCK_ERROR 1 /* an error occured when performing file i/o */
184 #define BLOCK_READ 2 /* file block is in the block buffer */
185 #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */
186 #define BLOCK_REASSIGNED 8 /* blk does not accept requests for old page */
187 #define BLOCK_IN_FLUSH 16 /* block is selected for flush */
188 #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */
189 #define BLOCK_IN_USE 64 /* block is not free */
190 #define BLOCK_IN_EVICTION 128 /* block is selected for eviction */
191 #define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file */
192 #define BLOCK_FOR_UPDATE 512 /* block is selected for buffer modification */
193 
194 /* page status, returned by find_key_block */
195 #define PAGE_READ 0
196 #define PAGE_TO_BE_READ 1
197 #define PAGE_WAIT_TO_BE_READ 2
198 
199 /* block temperature determines in which (sub-)chain the block currently is */
200 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
201 
202 /* key cache block */
204 {
205  struct st_block_link
206  *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
207  struct st_block_link
208  *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
209  struct st_hash_link *hash_link; /* backward ptr to referring hash_link */
210  KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
211  uint requests; /* number of requests for the block */
212  uchar *buffer; /* buffer for the block page */
213  uint offset; /* beginning of modified data in the buffer */
214  uint length; /* end of data in the buffer */
215  uint status; /* state of the block */
216  enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
217  uint hits_left; /* number of hits left until promotion */
218  ulonglong last_hit_time; /* timestamp of the last hit */
219  KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
220 };
221 
222 KEY_CACHE dflt_key_cache_var;
223 KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
224 
225 #define FLUSH_CACHE 2000 /* sort this many blocks at once */
226 
227 static int flush_all_key_blocks(KEY_CACHE *keycache);
228 
229 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
230  mysql_mutex_t *mutex);
231 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
232 
233 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
234 #if !defined(DBUG_OFF)
235 static void test_key_cache(KEY_CACHE *keycache,
236  const char *where, my_bool lock);
237 #endif
238 
239 #define KEYCACHE_HASH(f, pos) \
240 (((ulong) ((pos) / keycache->key_cache_block_size) + \
241  (ulong) (f)) & (keycache->hash_entries-1))
242 #define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
243 
244 #define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log"
245 
246 #if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
247 #define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG
248 #endif
249 
250 #if defined(KEYCACHE_DEBUG_LOG)
251 static FILE *keycache_debug_log=NULL;
252 static void keycache_debug_print(const char *fmt,...);
253 #define KEYCACHE_DEBUG_OPEN \
254  if (!keycache_debug_log) \
255  { \
256  keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \
257  (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \
258  }
259 
260 #define KEYCACHE_DEBUG_CLOSE \
261  if (keycache_debug_log) \
262  { \
263  fclose(keycache_debug_log); \
264  keycache_debug_log= 0; \
265  }
266 #else
267 #define KEYCACHE_DEBUG_OPEN
268 #define KEYCACHE_DEBUG_CLOSE
269 #endif /* defined(KEYCACHE_DEBUG_LOG) */
270 
271 #if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
272 #define KEYCACHE_DBUG_PRINT(l, m) \
273  { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
274  keycache_debug_print m; }
275 
276 #define KEYCACHE_DBUG_ASSERT(a) \
277  { if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \
278  assert(a); }
279 #else
280 #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
281 #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
282 #endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */
283 
284 #if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
285 
286 static long keycache_thread_id;
287 #define KEYCACHE_THREAD_TRACE(l) \
288  KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
289 
290 #define KEYCACHE_THREAD_TRACE_BEGIN(l) \
291  { struct st_my_thread_var *thread_var= my_thread_var; \
292  keycache_thread_id= thread_var->id; \
293  KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
294 
295 #define KEYCACHE_THREAD_TRACE_END(l) \
296  KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
297 #else
298 #define KEYCACHE_THREAD_TRACE_BEGIN(l)
299 #define KEYCACHE_THREAD_TRACE_END(l)
300 #define KEYCACHE_THREAD_TRACE(l)
301 #endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */
302 
303 #define BLOCK_NUMBER(b) \
304  ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
305 #define HASH_LINK_NUMBER(h) \
306  ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
307 
308 #if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
309 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
310  mysql_mutex_t *mutex);
311 #else
312 #define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M)
313 #endif
314 
315 #if defined(KEYCACHE_DEBUG)
316 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex);
317 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex);
318 static int keycache_pthread_cond_signal(mysql_cond_t *cond);
319 #else
320 #define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M)
321 #define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M)
322 #define keycache_pthread_cond_signal(C) mysql_cond_signal(C)
323 #endif /* defined(KEYCACHE_DEBUG) */
324 
325 #if !defined(DBUG_OFF)
326 #if defined(inline)
327 #undef inline
328 #endif
329 #define inline /* disabled inline for easier debugging */
330 static int fail_block(BLOCK_LINK *block);
331 static int fail_hlink(HASH_LINK *hlink);
332 static int cache_empty(KEY_CACHE *keycache);
333 #endif
334 
335 static inline uint next_power(uint value)
336 {
337  return (uint) my_round_up_to_next_power((uint32) value) << 1;
338 }
339 
340 
341 /*
342  Initialize a key cache
343 
344  SYNOPSIS
345  init_key_cache()
346  keycache pointer to a key cache data structure
347  key_cache_block_size size of blocks to keep cached data
348  use_mem total memory to use for the key cache
349  division_limit division limit (may be zero)
350  age_threshold age threshold (may be zero)
351 
352  RETURN VALUE
353  number of blocks in the key cache, if successful,
354  0 - otherwise.
355 
356  NOTES.
357  if keycache->key_cache_inited != 0 we assume that the key cache
358  is already initialized. This is for now used by myisamchk, but shouldn't
359  be something that a program should rely on!
360 
361  It's assumed that no two threads call this function simultaneously
362  referring to the same key cache handle.
363 
364 */
365 
366 int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
367  size_t use_mem, uint division_limit,
368  uint age_threshold)
369 {
370  ulong blocks, hash_links;
371  size_t length;
372  int error;
373  DBUG_ENTER("init_key_cache");
374  DBUG_ASSERT(key_cache_block_size >= 512);
375 
376  KEYCACHE_DEBUG_OPEN;
377  if (keycache->key_cache_inited && keycache->disk_blocks > 0)
378  {
379  DBUG_PRINT("warning",("key cache already in use"));
380  DBUG_RETURN(0);
381  }
382 
383  keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
384  keycache->global_cache_read= keycache->global_cache_write= 0;
385  keycache->disk_blocks= -1;
386  if (! keycache->key_cache_inited)
387  {
388  keycache->key_cache_inited= 1;
389  /*
390  Initialize these variables once only.
391  Their value must survive re-initialization during resizing.
392  */
393  keycache->in_resize= 0;
394  keycache->resize_in_flush= 0;
395  keycache->cnt_for_resize_op= 0;
396  keycache->waiting_for_resize_cnt.last_thread= NULL;
397  keycache->in_init= 0;
398  mysql_mutex_init(key_KEY_CACHE_cache_lock,
399  &keycache->cache_lock, MY_MUTEX_INIT_FAST);
400  keycache->resize_queue.last_thread= NULL;
401  }
402 
403  keycache->key_cache_mem_size= use_mem;
404  keycache->key_cache_block_size= key_cache_block_size;
405  DBUG_PRINT("info", ("key_cache_block_size: %u",
406  key_cache_block_size));
407 
408  blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
409  sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
410  /* It doesn't make sense to have too few blocks (less than 8) */
411  if (blocks >= 8)
412  {
413  for ( ; ; )
414  {
415  /* Set my_hash_entries to the next bigger 2 power */
416  if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
417  keycache->hash_entries<<= 1;
418  hash_links= 2 * blocks;
419 #if defined(MAX_THREADS)
420  if (hash_links < MAX_THREADS + blocks - 1)
421  hash_links= MAX_THREADS + blocks - 1;
422 #endif
423  while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
424  ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
425  ALIGN_SIZE(sizeof(HASH_LINK*) *
426  keycache->hash_entries))) +
427  ((size_t) blocks * keycache->key_cache_block_size) > use_mem)
428  blocks--;
429  /* Allocate memory for cache page buffers */
430  if ((keycache->block_mem=
431  my_large_malloc((size_t) blocks * keycache->key_cache_block_size,
432  MYF(0))))
433  {
434  /*
435  Allocate memory for blocks, hash_links and hash entries;
436  For each block 2 hash links are allocated
437  */
438  if ((keycache->block_root= (BLOCK_LINK*) my_malloc(length,
439  MYF(0))))
440  break;
441  my_large_free(keycache->block_mem);
442  keycache->block_mem= 0;
443  }
444  if (blocks < 8)
445  {
446  my_errno= ENOMEM;
447  my_error(EE_OUTOFMEMORY, MYF(ME_FATALERROR),
448  blocks * keycache->key_cache_block_size);
449  goto err;
450  }
451  blocks= blocks / 4*3;
452  }
453  keycache->blocks_unused= blocks;
454  keycache->disk_blocks= (int) blocks;
455  keycache->hash_links= hash_links;
456  keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
457  ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
458  keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
459  ALIGN_SIZE((sizeof(HASH_LINK*) *
460  keycache->hash_entries)));
461  memset(keycache->block_root, 0,
462  keycache->disk_blocks * sizeof(BLOCK_LINK));
463  memset(keycache->hash_root, 0,
464  keycache->hash_entries * sizeof(HASH_LINK*));
465  memset(keycache->hash_link_root, 0,
466  keycache->hash_links * sizeof(HASH_LINK));
467  keycache->hash_links_used= 0;
468  keycache->free_hash_list= NULL;
469  keycache->blocks_used= keycache->blocks_changed= 0;
470 
471  keycache->global_blocks_changed= 0;
472  keycache->blocks_available=0; /* For debugging */
473 
474  /* The LRU chain is empty after initialization */
475  keycache->used_last= NULL;
476  keycache->used_ins= NULL;
477  keycache->free_block_list= NULL;
478  keycache->keycache_time= 0;
479  keycache->warm_blocks= 0;
480  keycache->min_warm_blocks= (division_limit ?
481  blocks * division_limit / 100 + 1 :
482  blocks);
483  keycache->age_threshold= (age_threshold ?
484  blocks * age_threshold / 100 :
485  blocks);
486 
487  keycache->can_be_used= 1;
488 
489  keycache->waiting_for_hash_link.last_thread= NULL;
490  keycache->waiting_for_block.last_thread= NULL;
491  DBUG_PRINT("exit",
492  ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\
493  hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx",
494  keycache->disk_blocks, (long) keycache->block_root,
495  keycache->hash_entries, (long) keycache->hash_root,
496  keycache->hash_links, (long) keycache->hash_link_root));
497  memset(keycache->changed_blocks, 0,
498  sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
499  memset(keycache->file_blocks, 0,
500  sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
501  }
502  else
503  {
504  /* key_buffer_size is specified too small. Disable the cache. */
505  keycache->can_be_used= 0;
506  }
507 
508  keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
509  DBUG_RETURN((int) keycache->disk_blocks);
510 
511 err:
512  error= my_errno;
513  keycache->disk_blocks= 0;
514  keycache->blocks= 0;
515  if (keycache->block_mem)
516  {
517  my_large_free((uchar*) keycache->block_mem);
518  keycache->block_mem= NULL;
519  }
520  if (keycache->block_root)
521  {
522  my_free(keycache->block_root);
523  keycache->block_root= NULL;
524  }
525  my_errno= error;
526  keycache->can_be_used= 0;
527  DBUG_RETURN(0);
528 }
529 
530 
531 /*
532  Resize a key cache
533 
534  SYNOPSIS
535  resize_key_cache()
536  keycache pointer to a key cache data structure
537  key_cache_block_size size of blocks to keep cached data
538  use_mem total memory to use for the new key cache
539  division_limit new division limit (if not zero)
540  age_threshold new age threshold (if not zero)
541 
542  RETURN VALUE
543  number of blocks in the key cache, if successful,
544  0 - otherwise.
545 
546  NOTES.
547  The function first compares the memory size and the block size parameters
548  with the key cache values.
549 
550  If they differ the function free the the memory allocated for the
551  old key cache blocks by calling the end_key_cache function and
552  then rebuilds the key cache with new blocks by calling
553  init_key_cache.
554 
555  The function starts the operation only when all other threads
556  performing operations with the key cache let her to proceed
557  (when cnt_for_resize=0).
558 */
559 
560 int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
561  size_t use_mem, uint division_limit,
562  uint age_threshold)
563 {
564  int blocks;
565  DBUG_ENTER("resize_key_cache");
566 
567  if (!keycache->key_cache_inited)
568  DBUG_RETURN(keycache->disk_blocks);
569 
570  if(key_cache_block_size == keycache->key_cache_block_size &&
571  use_mem == keycache->key_cache_mem_size)
572  {
573  change_key_cache_param(keycache, division_limit, age_threshold);
574  DBUG_RETURN(keycache->disk_blocks);
575  }
576 
577  keycache_pthread_mutex_lock(&keycache->cache_lock);
578 
579  /*
580  We may need to wait for another thread which is doing a resize
581  already. This cannot happen in the MySQL server though. It allows
582  one resizer only. In set_var.cc keycache->in_init is used to block
583  multiple attempts.
584  */
585  while (keycache->in_resize)
586  {
587  /* purecov: begin inspected */
588  wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
589  /* purecov: end */
590  }
591 
592  /*
593  Mark the operation in progress. This blocks other threads from doing
594  a resize in parallel. It prohibits new blocks to enter the cache.
595  Read/write requests can bypass the cache during the flush phase.
596  */
597  keycache->in_resize= 1;
598 
599  /* Need to flush only if keycache is enabled. */
600  if (keycache->can_be_used)
601  {
602  /* Start the flush phase. */
603  keycache->resize_in_flush= 1;
604 
605  if (flush_all_key_blocks(keycache))
606  {
607  /* TODO: if this happens, we should write a warning in the log file ! */
608  keycache->resize_in_flush= 0;
609  blocks= 0;
610  keycache->can_be_used= 0;
611  goto finish;
612  }
613  DBUG_ASSERT(cache_empty(keycache));
614 
615  /* End the flush phase. */
616  keycache->resize_in_flush= 0;
617  }
618 
619  /*
620  Some direct read/write operations (bypassing the cache) may still be
621  unfinished. Wait until they are done. If the key cache can be used,
622  direct I/O is done in increments of key_cache_block_size. That is,
623  every block is checked if it is in the cache. We need to wait for
624  pending I/O before re-initializing the cache, because we may change
625  the block size. Otherwise they could check for blocks at file
626  positions where the new block division has none. We do also want to
627  wait for I/O done when (if) the cache was disabled. It must not
628  run in parallel with normal cache operation.
629  */
630  while (keycache->cnt_for_resize_op)
631  wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
632 
633  /*
634  Free old cache structures, allocate new structures, and initialize
635  them. Note that the cache_lock mutex and the resize_queue are left
636  untouched. We do not lose the cache_lock and will release it only at
637  the end of this function.
638  */
639  end_key_cache(keycache, 0); /* Don't free mutex */
640  /* The following will work even if use_mem is 0 */
641  blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
642  division_limit, age_threshold);
643 
644 finish:
645  /*
646  Mark the resize finished. This allows other threads to start a
647  resize or to request new cache blocks.
648  */
649  keycache->in_resize= 0;
650 
651  /* Signal waiting threads. */
652  release_whole_queue(&keycache->resize_queue);
653 
654  keycache_pthread_mutex_unlock(&keycache->cache_lock);
655  DBUG_RETURN(blocks);
656 }
657 
658 
659 /*
660  Increment counter blocking resize key cache operation
661 */
662 static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
663 {
664  keycache->cnt_for_resize_op++;
665 }
666 
667 
668 /*
669  Decrement counter blocking resize key cache operation;
670  Signal the operation to proceed when counter becomes equal zero
671 */
672 static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
673 {
674  if (!--keycache->cnt_for_resize_op)
675  release_whole_queue(&keycache->waiting_for_resize_cnt);
676 }
677 
678 /*
679  Change the key cache parameters
680 
681  SYNOPSIS
682  change_key_cache_param()
683  keycache pointer to a key cache data structure
684  division_limit new division limit (if not zero)
685  age_threshold new age threshold (if not zero)
686 
687  RETURN VALUE
688  none
689 
690  NOTES.
691  Presently the function resets the key cache parameters
692  concerning midpoint insertion strategy - division_limit and
693  age_threshold.
694 */
695 
696 void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
697  uint age_threshold)
698 {
699  DBUG_ENTER("change_key_cache_param");
700 
701  keycache_pthread_mutex_lock(&keycache->cache_lock);
702  if (division_limit)
703  keycache->min_warm_blocks= (keycache->disk_blocks *
704  division_limit / 100 + 1);
705  if (age_threshold)
706  keycache->age_threshold= (keycache->disk_blocks *
707  age_threshold / 100);
708  keycache_pthread_mutex_unlock(&keycache->cache_lock);
709  DBUG_VOID_RETURN;
710 }
711 
712 
713 /*
714  Remove key_cache from memory
715 
716  SYNOPSIS
717  end_key_cache()
718  keycache key cache handle
719  cleanup Complete free (Free also mutex for key cache)
720 
721  RETURN VALUE
722  none
723 */
724 
725 void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
726 {
727  DBUG_ENTER("end_key_cache");
728  DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
729 
730  if (!keycache->key_cache_inited)
731  DBUG_VOID_RETURN;
732 
733  if (keycache->disk_blocks > 0)
734  {
735  if (keycache->block_mem)
736  {
737  my_large_free((uchar*) keycache->block_mem);
738  keycache->block_mem= NULL;
739  my_free(keycache->block_root);
740  keycache->block_root= NULL;
741  }
742  keycache->disk_blocks= -1;
743  /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
744  keycache->blocks_changed= 0;
745  }
746 
747  DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
748  "writes: %lu r_requests: %lu reads: %lu",
749  keycache->blocks_used, keycache->global_blocks_changed,
750  (ulong) keycache->global_cache_w_requests,
751  (ulong) keycache->global_cache_write,
752  (ulong) keycache->global_cache_r_requests,
753  (ulong) keycache->global_cache_read));
754 
755  /*
756  Reset these values to be able to detect a disabled key cache.
757  See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
758  */
759  keycache->blocks_used= 0;
760  keycache->blocks_unused= 0;
761 
762  if (cleanup)
763  {
764  mysql_mutex_destroy(&keycache->cache_lock);
765  keycache->key_cache_inited= keycache->can_be_used= 0;
766  KEYCACHE_DEBUG_CLOSE;
767  }
768  DBUG_VOID_RETURN;
769 } /* end_key_cache */
770 
771 
772 /*
773  Link a thread into double-linked queue of waiting threads.
774 
775  SYNOPSIS
776  link_into_queue()
777  wqueue pointer to the queue structure
778  thread pointer to the thread to be added to the queue
779 
780  RETURN VALUE
781  none
782 
783  NOTES.
784  Queue is represented by a circular list of the thread structures
785  The list is double-linked of the type (**prev,*next), accessed by
786  a pointer to the last element.
787 */
788 
789 static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
790  struct st_my_thread_var *thread)
791 {
792  struct st_my_thread_var *last;
793 
794  DBUG_ASSERT(!thread->next && !thread->prev);
795  if (! (last= wqueue->last_thread))
796  {
797  /* Queue is empty */
798  thread->next= thread;
799  thread->prev= &thread->next;
800  }
801  else
802  {
803  thread->prev= last->next->prev;
804  last->next->prev= &thread->next;
805  thread->next= last->next;
806  last->next= thread;
807  }
808  wqueue->last_thread= thread;
809 }
810 
811 /*
812  Unlink a thread from double-linked queue of waiting threads
813 
814  SYNOPSIS
815  unlink_from_queue()
816  wqueue pointer to the queue structure
817  thread pointer to the thread to be removed from the queue
818 
819  RETURN VALUE
820  none
821 
822  NOTES.
823  See NOTES for link_into_queue
824 */
825 
826 static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
827  struct st_my_thread_var *thread)
828 {
829  KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id));
830  DBUG_ASSERT(thread->next && thread->prev);
831  if (thread->next == thread)
832  /* The queue contains only one member */
833  wqueue->last_thread= NULL;
834  else
835  {
836  thread->next->prev= thread->prev;
837  *thread->prev=thread->next;
838  if (wqueue->last_thread == thread)
839  wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
840  thread->prev);
841  }
842  thread->next= NULL;
843 #if !defined(DBUG_OFF)
844  /*
845  This makes it easier to see it's not in a chain during debugging.
846  And some DBUG_ASSERT() rely on it.
847  */
848  thread->prev= NULL;
849 #endif
850 }
851 
852 
853 /*
854  Add a thread to single-linked queue of waiting threads
855 
856  SYNOPSIS
857  wait_on_queue()
858  wqueue Pointer to the queue structure.
859  mutex Cache_lock to acquire after awake.
860 
861  RETURN VALUE
862  none
863 
864  NOTES.
865  Queue is represented by a circular list of the thread structures
866  The list is single-linked of the type (*next), accessed by a pointer
867  to the last element.
868 
869  The function protects against stray signals by verifying that the
870  current thread is unlinked from the queue when awaking. However,
871  since several threads can wait for the same event, it might be
872  necessary for the caller of the function to check again if the
873  condition for awake is indeed matched.
874 */
875 
876 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
877  mysql_mutex_t *mutex)
878 {
879  struct st_my_thread_var *last;
880  struct st_my_thread_var *thread= my_thread_var;
881 
882  /* Add to queue. */
883  DBUG_ASSERT(!thread->next);
884  DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */
885  if (! (last= wqueue->last_thread))
886  thread->next= thread;
887  else
888  {
889  thread->next= last->next;
890  last->next= thread;
891  }
892  wqueue->last_thread= thread;
893 
894  /*
895  Wait until thread is removed from queue by the signalling thread.
896  The loop protects against stray signals.
897  */
898  do
899  {
900  KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", thread->id));
901  keycache_pthread_cond_wait(&thread->suspend, mutex);
902  }
903  while (thread->next);
904 }
905 
906 
907 /*
908  Remove all threads from queue signaling them to proceed
909 
910  SYNOPSIS
911  release_whole_queue()
912  wqueue pointer to the queue structure
913 
914  RETURN VALUE
915  none
916 
917  NOTES.
918  See notes for wait_on_queue().
919  When removed from the queue each thread is signaled via condition
920  variable thread->suspend.
921 */
922 
923 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
924 {
925  struct st_my_thread_var *last;
926  struct st_my_thread_var *next;
927  struct st_my_thread_var *thread;
928 
929  /* Queue may be empty. */
930  if (!(last= wqueue->last_thread))
931  return;
932 
933  next= last->next;
934  do
935  {
936  thread=next;
937  KEYCACHE_DBUG_PRINT("release_whole_queue: signal",
938  ("thread %ld", thread->id));
939  /* Signal the thread. */
940  keycache_pthread_cond_signal(&thread->suspend);
941  /* Take thread from queue. */
942  next=thread->next;
943  thread->next= NULL;
944  }
945  while (thread != last);
946 
947  /* Now queue is definitely empty. */
948  wqueue->last_thread= NULL;
949 }
950 
951 
952 /*
953  Unlink a block from the chain of dirty/clean blocks
954 */
955 
956 static inline void unlink_changed(BLOCK_LINK *block)
957 {
958  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
959  if (block->next_changed)
960  block->next_changed->prev_changed= block->prev_changed;
961  *block->prev_changed= block->next_changed;
962 
963 #if !defined(DBUG_OFF)
964  /*
965  This makes it easier to see it's not in a chain during debugging.
966  And some DBUG_ASSERT() rely on it.
967  */
968  block->next_changed= NULL;
969  block->prev_changed= NULL;
970 #endif
971 }
972 
973 
974 /*
975  Link a block into the chain of dirty/clean blocks
976 */
977 
978 static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
979 {
980  DBUG_ASSERT(!block->next_changed);
981  DBUG_ASSERT(!block->prev_changed);
982  block->prev_changed= phead;
983  if ((block->next_changed= *phead))
984  (*phead)->prev_changed= &block->next_changed;
985  *phead= block;
986 }
987 
988 
989 /*
990  Link a block in a chain of clean blocks of a file.
991 
992  SYNOPSIS
993  link_to_file_list()
994  keycache Key cache handle
995  block Block to relink
996  file File to be linked to
997  unlink If to unlink first
998 
999  DESCRIPTION
1000  Unlink a block from whichever chain it is linked in, if it's
1001  asked for, and link it to the chain of clean blocks of the
1002  specified file.
1003 
1004  NOTE
1005  Please do never set/clear BLOCK_CHANGED outside of
1006  link_to_file_list() or link_to_changed_list().
1007  You would risk to damage correct counting of changed blocks
1008  and to find blocks in the wrong hash.
1009 
1010  RETURN
1011  void
1012 */
1013 
1014 static void link_to_file_list(KEY_CACHE *keycache,
1015  BLOCK_LINK *block, int file,
1016  my_bool unlink_block)
1017 {
1018  DBUG_ASSERT(block->status & BLOCK_IN_USE);
1019  DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1020  DBUG_ASSERT(block->hash_link->file == file);
1021  if (unlink_block)
1022  unlink_changed(block);
1023  link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
1024  if (block->status & BLOCK_CHANGED)
1025  {
1026  block->status&= ~BLOCK_CHANGED;
1027  keycache->blocks_changed--;
1028  keycache->global_blocks_changed--;
1029  }
1030 }
1031 
1032 
1033 /*
1034  Re-link a block from the clean chain to the dirty chain of a file.
1035 
1036  SYNOPSIS
1037  link_to_changed_list()
1038  keycache key cache handle
1039  block block to relink
1040 
1041  DESCRIPTION
1042  Unlink a block from the chain of clean blocks of a file
1043  and link it to the chain of dirty blocks of the same file.
1044 
1045  NOTE
1046  Please do never set/clear BLOCK_CHANGED outside of
1047  link_to_file_list() or link_to_changed_list().
1048  You would risk to damage correct counting of changed blocks
1049  and to find blocks in the wrong hash.
1050 
1051  RETURN
1052  void
1053 */
1054 
1055 static void link_to_changed_list(KEY_CACHE *keycache,
1056  BLOCK_LINK *block)
1057 {
1058  DBUG_ASSERT(block->status & BLOCK_IN_USE);
1059  DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
1060  DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1061 
1062  unlink_changed(block);
1063  link_changed(block,
1064  &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
1065  block->status|=BLOCK_CHANGED;
1066  keycache->blocks_changed++;
1067  keycache->global_blocks_changed++;
1068 }
1069 
1070 
1071 /*
1072  Link a block to the LRU chain at the beginning or at the end of
1073  one of two parts.
1074 
1075  SYNOPSIS
1076  link_block()
1077  keycache pointer to a key cache data structure
1078  block pointer to the block to link to the LRU chain
1079  hot <-> to link the block into the hot subchain
1080  at_end <-> to link the block at the end of the subchain
1081 
1082  RETURN VALUE
1083  none
1084 
1085  NOTES.
1086  The LRU ring is represented by a circular list of block structures.
1087  The list is double-linked of the type (**prev,*next) type.
1088  The LRU ring is divided into two parts - hot and warm.
1089  There are two pointers to access the last blocks of these two
1090  parts. The beginning of the warm part follows right after the
1091  end of the hot part.
1092  Only blocks of the warm part can be used for eviction.
1093  The first block from the beginning of this subchain is always
1094  taken for eviction (keycache->last_used->next)
1095 
1096  LRU chain: +------+ H O T +------+
1097  +----| end |----...<----| beg |----+
1098  | +------+last +------+ |
1099  v<-link in latest hot (new end) |
1100  | link in latest warm (new end)->^
1101  | +------+ W A R M +------+ |
1102  +----| beg |---->...----| end |----+
1103  +------+ +------+ins
1104  first for eviction
1105 
1106  It is also possible that the block is selected for eviction and thus
1107  not linked in the LRU ring.
1108 */
1109 
1110 static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
1111  my_bool at_end)
1112 {
1113  BLOCK_LINK *ins;
1114  BLOCK_LINK **pins;
1115 
1116  DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1117  DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1118  DBUG_ASSERT(!block->requests);
1119  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1120  DBUG_ASSERT(!block->next_used);
1121  DBUG_ASSERT(!block->prev_used);
1122 
1123  if (!hot && keycache->waiting_for_block.last_thread)
1124  {
1125  /* Signal that in the LRU warm sub-chain an available block has appeared */
1126  struct st_my_thread_var *last_thread=
1127  keycache->waiting_for_block.last_thread;
1128  struct st_my_thread_var *first_thread= last_thread->next;
1129  struct st_my_thread_var *next_thread= first_thread;
1130  HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
1131  struct st_my_thread_var *thread;
1132  do
1133  {
1134  thread= next_thread;
1135  next_thread= thread->next;
1136  /*
1137  We notify about the event all threads that ask
1138  for the same page as the first thread in the queue
1139  */
1140  if ((HASH_LINK *) thread->opt_info == hash_link)
1141  {
1142  KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id));
1143  keycache_pthread_cond_signal(&thread->suspend);
1144  unlink_from_queue(&keycache->waiting_for_block, thread);
1145  block->requests++;
1146  }
1147  }
1148  while (thread != last_thread);
1149  hash_link->block= block;
1150  /*
1151  NOTE: We assigned the block to the hash_link and signalled the
1152  requesting thread(s). But it is possible that other threads runs
1153  first. These threads see the hash_link assigned to a block which
1154  is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1155  This can be a problem for functions that do not select the block
1156  via its hash_link: flush and free. They do only see a block which
1157  is in a "normal" state and don't know that it will be evicted soon.
1158 
1159  We cannot set BLOCK_IN_SWITCH here because only one of the
1160  requesting threads must handle the eviction. All others must wait
1161  for it to complete. If we set the flag here, the threads would not
1162  know who is in charge of the eviction. Without the flag, the first
1163  thread takes the stick and sets the flag.
1164 
1165  But we need to note in the block that is has been selected for
1166  eviction. It must not be freed. The evicting thread will not
1167  expect the block in the free list. Before freeing we could also
1168  check if block->requests > 1. But I think including another flag
1169  in the check of block->status is slightly more efficient and
1170  probably easier to read.
1171  */
1172  block->status|= BLOCK_IN_EVICTION;
1173  KEYCACHE_THREAD_TRACE("link_block: after signaling");
1174 #if defined(KEYCACHE_DEBUG)
1175  KEYCACHE_DBUG_PRINT("link_block",
1176  ("linked,unlinked block %u status=%x #requests=%u #available=%u",
1177  BLOCK_NUMBER(block), block->status,
1178  block->requests, keycache->blocks_available));
1179 #endif
1180  return;
1181  }
1182 
1183  pins= hot ? &keycache->used_ins : &keycache->used_last;
1184  ins= *pins;
1185  if (ins)
1186  {
1187  ins->next_used->prev_used= &block->next_used;
1188  block->next_used= ins->next_used;
1189  block->prev_used= &ins->next_used;
1190  ins->next_used= block;
1191  if (at_end)
1192  *pins= block;
1193  }
1194  else
1195  {
1196  /* The LRU ring is empty. Let the block point to itself. */
1197  keycache->used_last= keycache->used_ins= block->next_used= block;
1198  block->prev_used= &block->next_used;
1199  }
1200  KEYCACHE_THREAD_TRACE("link_block");
1201 #if defined(KEYCACHE_DEBUG)
1202  keycache->blocks_available++;
1203  KEYCACHE_DBUG_PRINT("link_block",
1204  ("linked block %u:%1u status=%x #requests=%u #available=%u",
1205  BLOCK_NUMBER(block), at_end, block->status,
1206  block->requests, keycache->blocks_available));
1207  KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
1208  keycache->blocks_used);
1209 #endif
1210 }
1211 
1212 
1213 /*
1214  Unlink a block from the LRU chain
1215 
1216  SYNOPSIS
1217  unlink_block()
1218  keycache pointer to a key cache data structure
1219  block pointer to the block to unlink from the LRU chain
1220 
1221  RETURN VALUE
1222  none
1223 
1224  NOTES.
1225  See NOTES for link_block
1226 */
1227 
1228 static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1229 {
1230  DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1231  DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1232  DBUG_ASSERT(!block->requests);
1233  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1234  DBUG_ASSERT(block->next_used && block->prev_used &&
1235  (block->next_used->prev_used == &block->next_used) &&
1236  (*block->prev_used == block));
1237  if (block->next_used == block)
1238  /* The list contains only one member */
1239  keycache->used_last= keycache->used_ins= NULL;
1240  else
1241  {
1242  block->next_used->prev_used= block->prev_used;
1243  *block->prev_used= block->next_used;
1244  if (keycache->used_last == block)
1245  keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1246  if (keycache->used_ins == block)
1247  keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1248  }
1249  block->next_used= NULL;
1250 #if !defined(DBUG_OFF)
1251  /*
1252  This makes it easier to see it's not in a chain during debugging.
1253  And some DBUG_ASSERT() rely on it.
1254  */
1255  block->prev_used= NULL;
1256 #endif
1257 
1258  KEYCACHE_THREAD_TRACE("unlink_block");
1259 #if defined(KEYCACHE_DEBUG)
1260  KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
1261  keycache->blocks_available--;
1262  KEYCACHE_DBUG_PRINT("unlink_block",
1263  ("unlinked block %u status=%x #requests=%u #available=%u",
1264  BLOCK_NUMBER(block), block->status,
1265  block->requests, keycache->blocks_available));
1266 #endif
1267 }
1268 
1269 
1270 /*
1271  Register requests for a block.
1272 
1273  SYNOPSIS
1274  reg_requests()
1275  keycache Pointer to a key cache data structure.
1276  block Pointer to the block to register a request on.
1277  count Number of requests. Always 1.
1278 
1279  NOTE
1280  The first request unlinks the block from the LRU ring. This means
1281  that it is protected against eveiction.
1282 
1283  RETURN
1284  void
1285 */
1286 static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1287 {
1288  DBUG_ASSERT(block->status & BLOCK_IN_USE);
1289  DBUG_ASSERT(block->hash_link);
1290 
1291  if (!block->requests)
1292  unlink_block(keycache, block);
1293  block->requests+=count;
1294 }
1295 
1296 
1297 /*
1298  Unregister request for a block
1299  linking it to the LRU chain if it's the last request
1300 
1301  SYNOPSIS
1302  unreg_request()
1303  keycache pointer to a key cache data structure
1304  block pointer to the block to link to the LRU chain
1305  at_end <-> to link the block at the end of the LRU chain
1306 
1307  RETURN VALUE
1308  none
1309 
1310  NOTES.
1311  Every linking to the LRU ring decrements by one a special block
1312  counter (if it's positive). If the at_end parameter is TRUE the block is
1313  added either at the end of warm sub-chain or at the end of hot sub-chain.
1314  It is added to the hot subchain if its counter is zero and number of
1315  blocks in warm sub-chain is not less than some low limit (determined by
1316  the division_limit parameter). Otherwise the block is added to the warm
1317  sub-chain. If the at_end parameter is FALSE the block is always added
1318  at beginning of the warm sub-chain.
1319  Thus a warm block can be promoted to the hot sub-chain when its counter
1320  becomes zero for the first time.
1321  At the same time the block at the very beginning of the hot subchain
1322  might be moved to the beginning of the warm subchain if it stays untouched
1323  for a too long time (this time is determined by parameter age_threshold).
1324 
1325  It is also possible that the block is selected for eviction and thus
1326  not linked in the LRU ring.
1327 */
1328 
1329 static void unreg_request(KEY_CACHE *keycache,
1330  BLOCK_LINK *block, int at_end)
1331 {
1332  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1333  DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1334  DBUG_ASSERT(block->requests);
1335  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1336  DBUG_ASSERT(!block->next_used);
1337  DBUG_ASSERT(!block->prev_used);
1338  /*
1339  Unregister the request, but do not link erroneous blocks into the
1340  LRU ring.
1341  */
1342  if (!--block->requests && !(block->status & BLOCK_ERROR))
1343  {
1344  my_bool hot;
1345  if (block->hits_left)
1346  block->hits_left--;
1347  hot= !block->hits_left && at_end &&
1348  keycache->warm_blocks > keycache->min_warm_blocks;
1349  if (hot)
1350  {
1351  if (block->temperature == BLOCK_WARM)
1352  keycache->warm_blocks--;
1353  block->temperature= BLOCK_HOT;
1354  KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1355  keycache->warm_blocks));
1356  }
1357  link_block(keycache, block, hot, (my_bool)at_end);
1358  block->last_hit_time= keycache->keycache_time;
1359  keycache->keycache_time++;
1360  /*
1361  At this place, the block might be in the LRU ring or not. If an
1362  evicter was waiting for a block, it was selected for eviction and
1363  not linked in the LRU ring.
1364  */
1365 
1366  /*
1367  Check if we should link a hot block to the warm block sub-chain.
1368  It is possible that we select the same block as above. But it can
1369  also be another block. In any case a block from the LRU ring is
1370  selected. In other words it works even if the above block was
1371  selected for eviction and not linked in the LRU ring. Since this
1372  happens only if the LRU ring is empty, the block selected below
1373  would be NULL and the rest of the function skipped.
1374  */
1375  block= keycache->used_ins;
1376  if (block && keycache->keycache_time - block->last_hit_time >
1377  keycache->age_threshold)
1378  {
1379  unlink_block(keycache, block);
1380  link_block(keycache, block, 0, 0);
1381  if (block->temperature != BLOCK_WARM)
1382  {
1383  keycache->warm_blocks++;
1384  block->temperature= BLOCK_WARM;
1385  }
1386  KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1387  keycache->warm_blocks));
1388  }
1389  }
1390 }
1391 
1392 /*
1393  Remove a reader of the page in block
1394 */
1395 
1396 static void remove_reader(BLOCK_LINK *block)
1397 {
1398  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1399  DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1400  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1401  DBUG_ASSERT(!block->next_used);
1402  DBUG_ASSERT(!block->prev_used);
1403  DBUG_ASSERT(block->hash_link->requests);
1404 
1405  if (! --block->hash_link->requests && block->condvar)
1406  keycache_pthread_cond_signal(block->condvar);
1407 }
1408 
1409 
1410 /*
1411  Wait until the last reader of the page in block
1412  signals on its termination
1413 */
1414 
1415 static void wait_for_readers(KEY_CACHE *keycache,
1416  BLOCK_LINK *block)
1417 {
1418  struct st_my_thread_var *thread= my_thread_var;
1419  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1420  DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1421  DBUG_ASSERT(block->hash_link);
1422  DBUG_ASSERT(block->hash_link->block == block);
1423  /* Linked in file_blocks or changed_blocks hash. */
1424  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1425  /* Not linked in LRU ring. */
1426  DBUG_ASSERT(!block->next_used);
1427  DBUG_ASSERT(!block->prev_used);
1428  while (block->hash_link->requests)
1429  {
1430  KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1431  ("suspend thread %ld block %u",
1432  thread->id, BLOCK_NUMBER(block)));
1433  /* There must be no other waiter. We have no queue here. */
1434  DBUG_ASSERT(!block->condvar);
1435  block->condvar= &thread->suspend;
1436  keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1437  block->condvar= NULL;
1438  }
1439 }
1440 
1441 
1442 /*
1443  Add a hash link to a bucket in the hash_table
1444 */
1445 
1446 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1447 {
1448  if (*start)
1449  (*start)->prev= &hash_link->next;
1450  hash_link->next= *start;
1451  hash_link->prev= start;
1452  *start= hash_link;
1453 }
1454 
1455 
1456 /*
1457  Remove a hash link from the hash table
1458 */
1459 
1460 static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1461 {
1462  KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
1463  (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
1464  KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1465  if ((*hash_link->prev= hash_link->next))
1466  hash_link->next->prev= hash_link->prev;
1467  hash_link->block= NULL;
1468 
1469  if (keycache->waiting_for_hash_link.last_thread)
1470  {
1471  /* Signal that a free hash link has appeared */
1472  struct st_my_thread_var *last_thread=
1473  keycache->waiting_for_hash_link.last_thread;
1474  struct st_my_thread_var *first_thread= last_thread->next;
1475  struct st_my_thread_var *next_thread= first_thread;
1476  KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1477  struct st_my_thread_var *thread;
1478 
1479  hash_link->file= first_page->file;
1480  hash_link->diskpos= first_page->filepos;
1481  do
1482  {
1484  thread= next_thread;
1485  page= (KEYCACHE_PAGE *) thread->opt_info;
1486  next_thread= thread->next;
1487  /*
1488  We notify about the event all threads that ask
1489  for the same page as the first thread in the queue
1490  */
1491  if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1492  {
1493  KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
1494  keycache_pthread_cond_signal(&thread->suspend);
1495  unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1496  }
1497  }
1498  while (thread != last_thread);
1499  link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1500  hash_link->diskpos)],
1501  hash_link);
1502  return;
1503  }
1504  hash_link->next= keycache->free_hash_list;
1505  keycache->free_hash_list= hash_link;
1506 }
1507 
1508 
1509 /*
1510  Get the hash link for a page
1511 */
1512 
1513 static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1514  int file, my_off_t filepos)
1515 {
1516  reg1 HASH_LINK *hash_link, **start;
1517 #if defined(KEYCACHE_DEBUG)
1518  int cnt;
1519 #endif
1520 
1521  KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1522  (uint) file,(ulong) filepos));
1523 
1524 restart:
1525  /*
1526  Find the bucket in the hash table for the pair (file, filepos);
1527  start contains the head of the bucket list,
1528  hash_link points to the first member of the list
1529  */
1530  hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1531 #if defined(KEYCACHE_DEBUG)
1532  cnt= 0;
1533 #endif
1534  /* Look for an element for the pair (file, filepos) in the bucket chain */
1535  while (hash_link &&
1536  (hash_link->diskpos != filepos || hash_link->file != file))
1537  {
1538  hash_link= hash_link->next;
1539 #if defined(KEYCACHE_DEBUG)
1540  cnt++;
1541  if (! (cnt <= keycache->hash_links_used))
1542  {
1543  int i;
1544  for (i=0, hash_link= *start ;
1545  i < cnt ; i++, hash_link= hash_link->next)
1546  {
1547  KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1548  (uint) hash_link->file,(ulong) hash_link->diskpos));
1549  }
1550  }
1551  KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1552 #endif
1553  }
1554  if (! hash_link)
1555  {
1556  /* There is no hash link in the hash table for the pair (file, filepos) */
1557  if (keycache->free_hash_list)
1558  {
1559  hash_link= keycache->free_hash_list;
1560  keycache->free_hash_list= hash_link->next;
1561  }
1562  else if (keycache->hash_links_used < keycache->hash_links)
1563  {
1564  hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1565  }
1566  else
1567  {
1568  /* Wait for a free hash link */
1569  struct st_my_thread_var *thread= my_thread_var;
1571  KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1572  page.file= file;
1573  page.filepos= filepos;
1574  thread->opt_info= (void *) &page;
1575  link_into_queue(&keycache->waiting_for_hash_link, thread);
1576  KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1577  ("suspend thread %ld", thread->id));
1578  keycache_pthread_cond_wait(&thread->suspend,
1579  &keycache->cache_lock);
1580  thread->opt_info= NULL;
1581  goto restart;
1582  }
1583  hash_link->file= file;
1584  hash_link->diskpos= filepos;
1585  link_hash(start, hash_link);
1586  }
1587  /* Register the request for the page */
1588  hash_link->requests++;
1589 
1590  return hash_link;
1591 }
1592 
1593 
1594 /*
1595  Get a block for the file page requested by a keycache read/write operation;
1596  If the page is not in the cache return a free block, if there is none
1597  return the lru block after saving its buffer if the page is dirty.
1598 
1599  SYNOPSIS
1600 
1601  find_key_block()
1602  keycache pointer to a key cache data structure
1603  file handler for the file to read page from
1604  filepos position of the page in the file
1605  init_hits_left how initialize the block counter for the page
1606  wrmode <-> get for writing
1607  page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1608 
1609  RETURN VALUE
1610  Pointer to the found block if successful, 0 - otherwise
1611 
1612  NOTES.
1613  For the page from file positioned at filepos the function checks whether
1614  the page is in the key cache specified by the first parameter.
1615  If this is the case it immediately returns the block.
1616  If not, the function first chooses a block for this page. If there is
1617  no not used blocks in the key cache yet, the function takes the block
1618  at the very beginning of the warm sub-chain. It saves the page in that
1619  block if it's dirty before returning the pointer to it.
1620  The function returns in the page_st parameter the following values:
1621  PAGE_READ - if page already in the block,
1622  PAGE_TO_BE_READ - if it is to be read yet by the current thread
1623  WAIT_TO_BE_READ - if it is to be read by another thread
1624  If an error occurs THE BLOCK_ERROR bit is set in the block status.
1625  It might happen that there are no blocks in LRU chain (in warm part) -
1626  all blocks are unlinked for some read/write operations. Then the function
1627  waits until first of this operations links any block back.
1628 */
1629 
1630 static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1631  File file, my_off_t filepos,
1632  int init_hits_left,
1633  int wrmode, int *page_st)
1634 {
1635  HASH_LINK *hash_link;
1636  BLOCK_LINK *block;
1637  int error= 0;
1638  int page_status;
1639 
1640  DBUG_ENTER("find_key_block");
1641  KEYCACHE_THREAD_TRACE("find_key_block:begin");
1642  DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1643  file, (ulong) filepos, wrmode));
1644  KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d",
1645  file, (ulong) filepos,
1646  wrmode));
1647 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1648  DBUG_EXECUTE("check_keycache2",
1649  test_key_cache(keycache, "start of find_key_block", 0););
1650 #endif
1651 
1652 restart:
1653  /*
1654  If the flush phase of a resize operation fails, the cache is left
1655  unusable. This will be detected only after "goto restart".
1656  */
1657  if (!keycache->can_be_used)
1658  DBUG_RETURN(0);
1659 
1660  /*
1661  Find the hash_link for the requested file block (file, filepos). We
1662  do always get a hash_link here. It has registered our request so
1663  that no other thread can use it for another file block until we
1664  release the request (which is done by remove_reader() usually). The
1665  hash_link can have a block assigned to it or not. If there is a
1666  block, it may be assigned to this hash_link or not. In cases where a
1667  block is evicted from the cache, it is taken from the LRU ring and
1668  referenced by the new hash_link. But the block can still be assigned
1669  to its old hash_link for some time if it needs to be flushed first,
1670  or if there are other threads still reading it.
1671 
1672  Summary:
1673  hash_link is always returned.
1674  hash_link->block can be:
1675  - NULL or
1676  - not assigned to this hash_link or
1677  - assigned to this hash_link. If assigned, the block can have
1678  - invalid data (when freshly assigned) or
1679  - valid data. Valid data can be
1680  - changed over the file contents (dirty) or
1681  - not changed (clean).
1682  */
1683  hash_link= get_hash_link(keycache, file, filepos);
1684  DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos));
1685 
1686  page_status= -1;
1687  if ((block= hash_link->block) &&
1688  block->hash_link == hash_link && (block->status & BLOCK_READ))
1689  {
1690  /* Assigned block with valid (changed or unchanged) contents. */
1691  page_status= PAGE_READ;
1692  }
1693  /*
1694  else (page_status == -1)
1695  - block == NULL or
1696  - block not assigned to this hash_link or
1697  - block assigned but not yet read from file (invalid data).
1698  */
1699 
1700  if (keycache->in_resize)
1701  {
1702  /* This is a request during a resize operation */
1703 
1704  if (!block)
1705  {
1706  struct st_my_thread_var *thread;
1707 
1708  /*
1709  The file block is not in the cache. We don't need it in the
1710  cache: we are going to read or write directly to file. Cancel
1711  the request. We can simply decrement hash_link->requests because
1712  we did not release cache_lock since increasing it. So no other
1713  thread can wait for our request to become released.
1714  */
1715  if (hash_link->requests == 1)
1716  {
1717  /*
1718  We are the only one to request this hash_link (this file/pos).
1719  Free the hash_link.
1720  */
1721  hash_link->requests--;
1722  unlink_hash(keycache, hash_link);
1723  DBUG_RETURN(0);
1724  }
1725 
1726  /*
1727  More requests on the hash_link. Someone tries to evict a block
1728  for this hash_link (could have started before resizing started).
1729  This means that the LRU ring is empty. Otherwise a block could
1730  be assigned immediately. Behave like a thread that wants to
1731  evict a block for this file/pos. Add to the queue of threads
1732  waiting for a block. Wait until there is one assigned.
1733 
1734  Refresh the request on the hash-link so that it cannot be reused
1735  for another file/pos.
1736  */
1737  thread= my_thread_var;
1738  thread->opt_info= (void *) hash_link;
1739  link_into_queue(&keycache->waiting_for_block, thread);
1740  do
1741  {
1742  KEYCACHE_DBUG_PRINT("find_key_block: wait",
1743  ("suspend thread %ld", thread->id));
1744  keycache_pthread_cond_wait(&thread->suspend,
1745  &keycache->cache_lock);
1746  } while (thread->next);
1747  thread->opt_info= NULL;
1748  /*
1749  A block should now be assigned to the hash_link. But it may
1750  still need to be evicted. Anyway, we should re-check the
1751  situation. page_status must be set correctly.
1752  */
1753  hash_link->requests--;
1754  goto restart;
1755  } /* end of if (!block) */
1756 
1757  /*
1758  There is a block for this file/pos in the cache. Register a
1759  request on it. This unlinks it from the LRU ring (if it is there)
1760  and hence protects it against eviction (if not already in
1761  eviction). We need this for returning the block to the caller, for
1762  calling remove_reader() (for debugging purposes), and for calling
1763  free_block(). The only case where we don't need the request is if
1764  the block is in eviction. In that case we have to unregister the
1765  request later.
1766  */
1767  reg_requests(keycache, block, 1);
1768 
1769  if (page_status != PAGE_READ)
1770  {
1771  /*
1772  - block not assigned to this hash_link or
1773  - block assigned but not yet read from file (invalid data).
1774 
1775  This must be a block in eviction. It will be read soon. We need
1776  to wait here until this happened. Otherwise the caller could
1777  access a wrong block or a block which is in read. While waiting
1778  we cannot lose hash_link nor block. We have registered a request
1779  on the hash_link. Everything can happen to the block but changes
1780  in the hash_link -> block relationship. In other words:
1781  everything can happen to the block but free or another completed
1782  eviction.
1783 
1784  Note that we bahave like a secondary requestor here. We just
1785  cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1786  read requests and writes on dirty blocks that are not in flush
1787  only. Waiting here on COND_FOR_REQUESTED works in all
1788  situations.
1789  */
1790  DBUG_ASSERT(((block->hash_link != hash_link) &&
1791  (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1792  ((block->hash_link == hash_link) &&
1793  !(block->status & BLOCK_READ)));
1794  wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1795  /*
1796  Here we can trust that the block has been assigned to this
1797  hash_link (block->hash_link == hash_link) and read into the
1798  buffer (BLOCK_READ). The worst things possible here are that the
1799  block is in free (BLOCK_REASSIGNED). But the block is still
1800  assigned to the hash_link. The freeing thread waits until we
1801  release our request on the hash_link. The block must not be
1802  again in eviction because we registered an request on it before
1803  starting to wait.
1804  */
1805  DBUG_ASSERT(block->hash_link == hash_link);
1806  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1807  DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1808  }
1809  /*
1810  The block is in the cache. Assigned to the hash_link. Valid data.
1811  Note that in case of page_st == PAGE_READ, the block can be marked
1812  for eviction. In any case it can be marked for freeing.
1813  */
1814 
1815  if (!wrmode)
1816  {
1817  /* A reader can just read the block. */
1818  *page_st= PAGE_READ;
1819  DBUG_ASSERT((hash_link->file == file) &&
1820  (hash_link->diskpos == filepos) &&
1821  (block->hash_link == hash_link));
1822  DBUG_RETURN(block);
1823  }
1824 
1825  /*
1826  This is a writer. No two writers for the same block can exist.
1827  This must be assured by locks outside of the key cache.
1828  */
1829  DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1830 
1831  while (block->status & BLOCK_IN_FLUSH)
1832  {
1833  /*
1834  Wait until the block is flushed to file. Do not release the
1835  request on the hash_link yet to prevent that the block is freed
1836  or reassigned while we wait. While we wait, several things can
1837  happen to the block, including another flush. But the block
1838  cannot be reassigned to another hash_link until we release our
1839  request on it. But it can be marked BLOCK_REASSIGNED from free
1840  or eviction, while they wait for us to release the hash_link.
1841  */
1842  wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1843  /*
1844  If the flush phase failed, the resize could have finished while
1845  we waited here.
1846  */
1847  if (!keycache->in_resize)
1848  {
1849  remove_reader(block);
1850  unreg_request(keycache, block, 1);
1851  goto restart;
1852  }
1853  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1854  DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1855  DBUG_ASSERT(block->hash_link == hash_link);
1856  }
1857 
1858  if (block->status & BLOCK_CHANGED)
1859  {
1860  /*
1861  We want to write a block with changed contents. If the cache
1862  block size is bigger than the callers block size (e.g. MyISAM),
1863  the caller may replace part of the block only. Changes of the
1864  other part of the block must be preserved. Since the block has
1865  not yet been selected for flush, we can still add our changes.
1866  */
1867  *page_st= PAGE_READ;
1868  DBUG_ASSERT((hash_link->file == file) &&
1869  (hash_link->diskpos == filepos) &&
1870  (block->hash_link == hash_link));
1871  DBUG_RETURN(block);
1872  }
1873 
1874  /*
1875  This is a write request for a clean block. We do not want to have
1876  new dirty blocks in the cache while resizing. We will free the
1877  block and write directly to file. If the block is in eviction or
1878  in free, we just let it go.
1879 
1880  Unregister from the hash_link. This must be done before freeing
1881  the block. And it must be done if not freeing the block. Because
1882  we could have waited above, we need to call remove_reader(). Other
1883  threads could wait for us to release our request on the hash_link.
1884  */
1885  remove_reader(block);
1886 
1887  /* If the block is not in eviction and not in free, we can free it. */
1888  if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1889  BLOCK_REASSIGNED)))
1890  {
1891  /*
1892  Free block as we are going to write directly to file.
1893  Although we have an exlusive lock for the updated key part,
1894  the control can be yielded by the current thread as we might
1895  have unfinished readers of other key parts in the block
1896  buffer. Still we are guaranteed not to have any readers
1897  of the key part we are writing into until the block is
1898  removed from the cache as we set the BLOCK_REASSIGNED
1899  flag (see the code below that handles reading requests).
1900  */
1901  free_block(keycache, block);
1902  }
1903  else
1904  {
1905  /*
1906  The block will be evicted/freed soon. Don't touch it in any way.
1907  Unregister the request that we registered above.
1908  */
1909  unreg_request(keycache, block, 1);
1910 
1911  /*
1912  The block is still assigned to the hash_link (the file/pos that
1913  we are going to write to). Wait until the eviction/free is
1914  complete. Otherwise the direct write could complete before all
1915  readers are done with the block. So they could read outdated
1916  data.
1917 
1918  Since we released our request on the hash_link, it can be reused
1919  for another file/pos. Hence we cannot just check for
1920  block->hash_link == hash_link. As long as the resize is
1921  proceeding the block cannot be reassigned to the same file/pos
1922  again. So we can terminate the loop when the block is no longer
1923  assigned to this file/pos.
1924  */
1925  do
1926  {
1927  wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1928  &keycache->cache_lock);
1929  /*
1930  If the flush phase failed, the resize could have finished
1931  while we waited here.
1932  */
1933  if (!keycache->in_resize)
1934  goto restart;
1935  } while (block->hash_link &&
1936  (block->hash_link->file == file) &&
1937  (block->hash_link->diskpos == filepos));
1938  }
1939  DBUG_RETURN(0);
1940  }
1941 
1942  if (page_status == PAGE_READ &&
1943  (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1944  BLOCK_REASSIGNED)))
1945  {
1946  /*
1947  This is a request for a block to be removed from cache. The block
1948  is assigned to this hash_link and contains valid data, but is
1949  marked for eviction or to be freed. Possible reasons why it has
1950  not yet been evicted/freed can be a flush before reassignment
1951  (BLOCK_IN_SWITCH), readers of the block have not finished yet
1952  (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1953  the block has been selected for it (BLOCK_IN_EVICTION).
1954  */
1955 
1956  KEYCACHE_DBUG_PRINT("find_key_block",
1957  ("request for old page in block %u "
1958  "wrmode: %d block->status: %d",
1959  BLOCK_NUMBER(block), wrmode, block->status));
1960  /*
1961  Only reading requests can proceed until the old dirty page is flushed,
1962  all others are to be suspended, then resubmitted
1963  */
1964  if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1965  {
1966  /*
1967  This is a read request and the block not yet reassigned. We can
1968  register our request and proceed. This unlinks the block from
1969  the LRU ring and protects it against eviction.
1970  */
1971  reg_requests(keycache, block, 1);
1972  }
1973  else
1974  {
1975  /*
1976  Either this is a write request for a block that is in eviction
1977  or in free. We must not use it any more. Instead we must evict
1978  another block. But we cannot do this before the eviction/free is
1979  done. Otherwise we would find the same hash_link + block again
1980  and again.
1981 
1982  Or this is a read request for a block in eviction/free that does
1983  not require a flush, but waits for readers to finish with the
1984  block. We do not read this block to let the eviction/free happen
1985  as soon as possible. Again we must wait so that we don't find
1986  the same hash_link + block again and again.
1987  */
1988  DBUG_ASSERT(hash_link->requests);
1989  hash_link->requests--;
1990  KEYCACHE_DBUG_PRINT("find_key_block",
1991  ("request waiting for old page to be saved"));
1992  wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1993  KEYCACHE_DBUG_PRINT("find_key_block",
1994  ("request for old page resubmitted"));
1995  /*
1996  The block is no longer assigned to this hash_link.
1997  Get another one.
1998  */
1999  goto restart;
2000  }
2001  }
2002  else
2003  {
2004  /*
2005  This is a request for a new block or for a block not to be removed.
2006  Either
2007  - block == NULL or
2008  - block not assigned to this hash_link or
2009  - block assigned but not yet read from file,
2010  or
2011  - block assigned with valid (changed or unchanged) data and
2012  - it will not be reassigned/freed.
2013  */
2014  if (! block)
2015  {
2016  /* No block is assigned to the hash_link yet. */
2017  if (keycache->blocks_unused)
2018  {
2019  if (keycache->free_block_list)
2020  {
2021  /* There is a block in the free list. */
2022  block= keycache->free_block_list;
2023  keycache->free_block_list= block->next_used;
2024  block->next_used= NULL;
2025  }
2026  else
2027  {
2028  size_t block_mem_offset;
2029  /* There are some never used blocks, take first of them */
2030  DBUG_ASSERT(keycache->blocks_used <
2031  (ulong) keycache->disk_blocks);
2032  block= &keycache->block_root[keycache->blocks_used];
2033  block_mem_offset=
2034  ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
2035  block->buffer= ADD_TO_PTR(keycache->block_mem,
2036  block_mem_offset,
2037  uchar*);
2038  keycache->blocks_used++;
2039  DBUG_ASSERT(!block->next_used);
2040  }
2041  DBUG_ASSERT(!block->prev_used);
2042  DBUG_ASSERT(!block->next_changed);
2043  DBUG_ASSERT(!block->prev_changed);
2044  DBUG_ASSERT(!block->hash_link);
2045  DBUG_ASSERT(!block->status);
2046  DBUG_ASSERT(!block->requests);
2047  keycache->blocks_unused--;
2048  block->status= BLOCK_IN_USE;
2049  block->length= 0;
2050  block->offset= keycache->key_cache_block_size;
2051  block->requests= 1;
2052  block->temperature= BLOCK_COLD;
2053  block->hits_left= init_hits_left;
2054  block->last_hit_time= 0;
2055  block->hash_link= hash_link;
2056  hash_link->block= block;
2057  link_to_file_list(keycache, block, file, 0);
2058  page_status= PAGE_TO_BE_READ;
2059  KEYCACHE_DBUG_PRINT("find_key_block",
2060  ("got free or never used block %u",
2061  BLOCK_NUMBER(block)));
2062  }
2063  else
2064  {
2065  /*
2066  There are no free blocks and no never used blocks, use a block
2067  from the LRU ring.
2068  */
2069 
2070  if (! keycache->used_last)
2071  {
2072  /*
2073  The LRU ring is empty. Wait until a new block is added to
2074  it. Several threads might wait here for the same hash_link,
2075  all of them must get the same block. While waiting for a
2076  block, after a block is selected for this hash_link, other
2077  threads can run first before this one awakes. During this
2078  time interval other threads find this hash_link pointing to
2079  the block, which is still assigned to another hash_link. In
2080  this case the block is not marked BLOCK_IN_SWITCH yet, but
2081  it is marked BLOCK_IN_EVICTION.
2082  */
2083 
2084  struct st_my_thread_var *thread= my_thread_var;
2085  thread->opt_info= (void *) hash_link;
2086  link_into_queue(&keycache->waiting_for_block, thread);
2087  do
2088  {
2089  KEYCACHE_DBUG_PRINT("find_key_block: wait",
2090  ("suspend thread %ld", thread->id));
2091  keycache_pthread_cond_wait(&thread->suspend,
2092  &keycache->cache_lock);
2093  }
2094  while (thread->next);
2095  thread->opt_info= NULL;
2096  /* Assert that block has a request registered. */
2097  DBUG_ASSERT(hash_link->block->requests);
2098  /* Assert that block is not in LRU ring. */
2099  DBUG_ASSERT(!hash_link->block->next_used);
2100  DBUG_ASSERT(!hash_link->block->prev_used);
2101  }
2102 
2103  /*
2104  If we waited above, hash_link->block has been assigned by
2105  link_block(). Otherwise it is still NULL. In the latter case
2106  we need to grab a block from the LRU ring ourselves.
2107  */
2108  block= hash_link->block;
2109  if (! block)
2110  {
2111  /* Select the last block from the LRU ring. */
2112  block= keycache->used_last->next_used;
2113  block->hits_left= init_hits_left;
2114  block->last_hit_time= 0;
2115  hash_link->block= block;
2116  /*
2117  Register a request on the block. This unlinks it from the
2118  LRU ring and protects it against eviction.
2119  */
2120  DBUG_ASSERT(!block->requests);
2121  reg_requests(keycache, block,1);
2122  /*
2123  We do not need to set block->status|= BLOCK_IN_EVICTION here
2124  because we will set block->status|= BLOCK_IN_SWITCH
2125  immediately without releasing the lock in between. This does
2126  also support debugging. When looking at the block, one can
2127  see if the block has been selected by link_block() after the
2128  LRU ring was empty, or if it was grabbed directly from the
2129  LRU ring in this branch.
2130  */
2131  }
2132 
2133  /*
2134  If we had to wait above, there is a small chance that another
2135  thread grabbed this block for the same file block already. But
2136  in most cases the first condition is true.
2137  */
2138  if (block->hash_link != hash_link &&
2139  ! (block->status & BLOCK_IN_SWITCH) )
2140  {
2141  /* this is a primary request for a new page */
2142  block->status|= BLOCK_IN_SWITCH;
2143 
2144  KEYCACHE_DBUG_PRINT("find_key_block",
2145  ("got block %u for new page", BLOCK_NUMBER(block)));
2146 
2147  if (block->status & BLOCK_CHANGED)
2148  {
2149  /* The block contains a dirty page - push it out of the cache */
2150 
2151  KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
2152  if (block->status & BLOCK_IN_FLUSH)
2153  {
2154  /*
2155  The block is marked for flush. If we do not wait here,
2156  it could happen that we write the block, reassign it to
2157  another file block, then, before the new owner can read
2158  the new file block, the flusher writes the cache block
2159  (which still has the old contents) to the new file block!
2160  */
2161  wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2162  &keycache->cache_lock);
2163  /*
2164  The block is marked BLOCK_IN_SWITCH. It should be left
2165  alone except for reading. No free, no write.
2166  */
2167  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2168  DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED |
2169  BLOCK_CHANGED |
2170  BLOCK_FOR_UPDATE)));
2171  }
2172  else
2173  {
2174  block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
2175  /*
2176  BLOCK_IN_EVICTION may be true or not. Other flags must
2177  have a fixed value.
2178  */
2179  DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2180  (BLOCK_READ | BLOCK_IN_SWITCH |
2181  BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2182  BLOCK_CHANGED | BLOCK_IN_USE));
2183  DBUG_ASSERT(block->hash_link);
2184 
2185  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2186  /*
2187  The call is thread safe because only the current
2188  thread might change the block->hash_link value
2189  */
2190  error= my_pwrite(block->hash_link->file,
2191  block->buffer + block->offset,
2192  block->length - block->offset,
2193  block->hash_link->diskpos + block->offset,
2194  MYF(MY_NABP | MY_WAIT_IF_FULL));
2195  keycache_pthread_mutex_lock(&keycache->cache_lock);
2196 
2197  /* Block status must not have changed. */
2198  DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2199  (BLOCK_READ | BLOCK_IN_SWITCH |
2200  BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2201  BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2202  keycache->global_cache_write++;
2203  }
2204  }
2205 
2206  block->status|= BLOCK_REASSIGNED;
2207  /*
2208  The block comes from the LRU ring. It must have a hash_link
2209  assigned.
2210  */
2211  DBUG_ASSERT(block->hash_link);
2212  if (block->hash_link)
2213  {
2214  /*
2215  All pending requests for this page must be resubmitted.
2216  This must be done before waiting for readers. They could
2217  wait for the flush to complete. And we must also do it
2218  after the wait. Flushers might try to free the block while
2219  we wait. They would wait until the reassignment is
2220  complete. Also the block status must reflect the correct
2221  situation: The block is not changed nor in flush any more.
2222  Note that we must not change the BLOCK_CHANGED flag
2223  outside of link_to_file_list() so that it is always in the
2224  correct queue and the *blocks_changed counters are
2225  correct.
2226  */
2227  block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2228  link_to_file_list(keycache, block, block->hash_link->file, 1);
2229  release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2230  /*
2231  The block is still assigned to its old hash_link.
2232  Wait until all pending read requests
2233  for this page are executed
2234  (we could have avoided this waiting, if we had read
2235  a page in the cache in a sweep, without yielding control)
2236  */
2237  wait_for_readers(keycache, block);
2238  DBUG_ASSERT(block->hash_link && block->hash_link->block == block &&
2239  block->prev_changed);
2240  /* The reader must not have been a writer. */
2241  DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
2242 
2243  /* Wake flushers that might have found the block in between. */
2244  release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2245 
2246  /* Remove the hash link for the old file block from the hash. */
2247  unlink_hash(keycache, block->hash_link);
2248 
2249  /*
2250  For sanity checks link_to_file_list() asserts that block
2251  and hash_link refer to each other. Hence we need to assign
2252  the hash_link first, but then we would not know if it was
2253  linked before. Hence we would not know if to unlink it. So
2254  unlink it here and call link_to_file_list(..., FALSE).
2255  */
2256  unlink_changed(block);
2257  }
2258  block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2259  block->length= 0;
2260  block->offset= keycache->key_cache_block_size;
2261  block->hash_link= hash_link;
2262  link_to_file_list(keycache, block, file, 0);
2263  page_status= PAGE_TO_BE_READ;
2264 
2265  KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2266  KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2267  }
2268  else
2269  {
2270  /*
2271  Either (block->hash_link == hash_link),
2272  or (block->status & BLOCK_IN_SWITCH).
2273 
2274  This is for secondary requests for a new file block only.
2275  Either it is already assigned to the new hash_link meanwhile
2276  (if we had to wait due to empty LRU), or it is already in
2277  eviction by another thread. Since this block has been
2278  grabbed from the LRU ring and attached to this hash_link,
2279  another thread cannot grab the same block from the LRU ring
2280  anymore. If the block is in eviction already, it must become
2281  attached to the same hash_link and as such destined for the
2282  same file block.
2283  */
2284  KEYCACHE_DBUG_PRINT("find_key_block",
2285  ("block->hash_link: %p hash_link: %p "
2286  "block->status: %u", block->hash_link,
2287  hash_link, block->status ));
2288  page_status= (((block->hash_link == hash_link) &&
2289  (block->status & BLOCK_READ)) ?
2290  PAGE_READ : PAGE_WAIT_TO_BE_READ);
2291  }
2292  }
2293  }
2294  else
2295  {
2296  /*
2297  Block is not NULL. This hash_link points to a block.
2298  Either
2299  - block not assigned to this hash_link (yet) or
2300  - block assigned but not yet read from file,
2301  or
2302  - block assigned with valid (changed or unchanged) data and
2303  - it will not be reassigned/freed.
2304 
2305  The first condition means hash_link points to a block in
2306  eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2307  But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2308  link_block(). In both cases it is destined for this hash_link
2309  and its file block address. When this hash_link got its block
2310  address, the block was removed from the LRU ring and cannot be
2311  selected for eviction (for another hash_link) again.
2312 
2313  Register a request on the block. This is another protection
2314  against eviction.
2315  */
2316  DBUG_ASSERT(((block->hash_link != hash_link) &&
2317  (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2318  ((block->hash_link == hash_link) &&
2319  !(block->status & BLOCK_READ)) ||
2320  ((block->status & BLOCK_READ) &&
2321  !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2322  reg_requests(keycache, block, 1);
2323  KEYCACHE_DBUG_PRINT("find_key_block",
2324  ("block->hash_link: %p hash_link: %p "
2325  "block->status: %u", block->hash_link,
2326  hash_link, block->status ));
2327  page_status= (((block->hash_link == hash_link) &&
2328  (block->status & BLOCK_READ)) ?
2329  PAGE_READ : PAGE_WAIT_TO_BE_READ);
2330  }
2331  }
2332 
2333  KEYCACHE_DBUG_ASSERT(page_status != -1);
2334  /* Same assert basically, but be very sure. */
2335  KEYCACHE_DBUG_ASSERT(block);
2336  /* Assert that block has a request and is not in LRU ring. */
2337  DBUG_ASSERT(block->requests);
2338  DBUG_ASSERT(!block->next_used);
2339  DBUG_ASSERT(!block->prev_used);
2340  /* Assert that we return the correct block. */
2341  DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) ||
2342  ((block->hash_link->file == file) &&
2343  (block->hash_link->diskpos == filepos)));
2344  *page_st=page_status;
2345  KEYCACHE_DBUG_PRINT("find_key_block",
2346  ("fd: %d pos: %lu block->status: %u page_status: %d",
2347  file, (ulong) filepos, block->status,
2348  page_status));
2349 
2350 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2351  DBUG_EXECUTE("check_keycache2",
2352  test_key_cache(keycache, "end of find_key_block",0););
2353 #endif
2354  KEYCACHE_THREAD_TRACE("find_key_block:end");
2355  DBUG_RETURN(block);
2356 }
2357 
2358 
2359 /*
2360  Read into a key cache block buffer from disk.
2361 
2362  SYNOPSIS
2363 
2364  read_block()
2365  keycache pointer to a key cache data structure
2366  block block to which buffer the data is to be read
2367  read_length size of data to be read
2368  min_length at least so much data must be read
2369  primary <-> the current thread will read the data
2370 
2371  RETURN VALUE
2372  None
2373 
2374  NOTES.
2375  The function either reads a page data from file to the block buffer,
2376  or waits until another thread reads it. What page to read is determined
2377  by a block parameter - reference to a hash link for this page.
2378  If an error occurs THE BLOCK_ERROR bit is set in the block status.
2379  We do not report error when the size of successfully read
2380  portion is less than read_length, but not less than min_length.
2381 */
2382 
2383 static void read_block(KEY_CACHE *keycache,
2384  BLOCK_LINK *block, uint read_length,
2385  uint min_length, my_bool primary)
2386 {
2387  size_t got_length;
2388 
2389  /* On entry cache_lock is locked */
2390 
2391  KEYCACHE_THREAD_TRACE("read_block");
2392  if (primary)
2393  {
2394  /*
2395  This code is executed only by threads that submitted primary
2396  requests. Until block->status contains BLOCK_READ, all other
2397  request for the block become secondary requests. For a primary
2398  request the block must be properly initialized.
2399  */
2400  DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2401  fail_block(block));
2402  DBUG_ASSERT((block->length == 0) || fail_block(block));
2403  DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2404  fail_block(block));
2405  DBUG_ASSERT((block->requests > 0) || fail_block(block));
2406 
2407  KEYCACHE_DBUG_PRINT("read_block",
2408  ("page to be read by primary request"));
2409 
2410  keycache->global_cache_read++;
2411  /* Page is not in buffer yet, is to be read from disk */
2412  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2413  /*
2414  Here other threads may step in and register as secondary readers.
2415  They will register in block->wqueue[COND_FOR_REQUESTED].
2416  */
2417  got_length= my_pread(block->hash_link->file, block->buffer,
2418  read_length, block->hash_link->diskpos, MYF(0));
2419  keycache_pthread_mutex_lock(&keycache->cache_lock);
2420  /*
2421  The block can now have been marked for free (in case of
2422  FLUSH_RELEASE). Otherwise the state must be unchanged.
2423  */
2424  DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED |
2425  BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2426  fail_block(block));
2427  DBUG_ASSERT((block->length == 0) || fail_block(block));
2428  DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2429  fail_block(block));
2430  DBUG_ASSERT((block->requests > 0) || fail_block(block));
2431 
2432  if (got_length < min_length)
2433  block->status|= BLOCK_ERROR;
2434  else
2435  {
2436  block->status|= BLOCK_READ;
2437  block->length= got_length;
2438  /*
2439  Do not set block->offset here. If this block is marked
2440  BLOCK_CHANGED later, we want to flush only the modified part. So
2441  only a writer may set block->offset down from
2442  keycache->key_cache_block_size.
2443  */
2444  }
2445  KEYCACHE_DBUG_PRINT("read_block",
2446  ("primary request: new page in cache"));
2447  /* Signal that all pending requests for this page now can be processed */
2448  release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2449  }
2450  else
2451  {
2452  /*
2453  This code is executed only by threads that submitted secondary
2454  requests. At this point it could happen that the cache block is
2455  not yet assigned to the hash_link for the requested file block.
2456  But at awake from the wait this should be the case. Unfortunately
2457  we cannot assert this here because we do not know the hash_link
2458  for the requested file block nor the file and position. So we have
2459  to assert this in the caller.
2460  */
2461  KEYCACHE_DBUG_PRINT("read_block",
2462  ("secondary request waiting for new page to be read"));
2463  wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2464  KEYCACHE_DBUG_PRINT("read_block",
2465  ("secondary request: new page in cache"));
2466  }
2467 }
2468 
2469 
2470 /*
2471  Read a block of data from a cached file into a buffer;
2472 
2473  SYNOPSIS
2474 
2475  key_cache_read()
2476  keycache pointer to a key cache data structure
2477  file handler for the file for the block of data to be read
2478  filepos position of the block of data in the file
2479  level determines the weight of the data
2480  buff buffer to where the data must be placed
2481  length length of the buffer
2482  block_length length of the block in the key cache buffer
2483  return_buffer return pointer to the key cache buffer with the data
2484 
2485  RETURN VALUE
2486  Returns address from where the data is placed if sucessful, 0 - otherwise.
2487 
2488  NOTES.
2489  The function ensures that a block of data of size length from file
2490  positioned at filepos is in the buffers for some key cache blocks.
2491  Then the function either copies the data into the buffer buff, or,
2492  if return_buffer is TRUE, it just returns the pointer to the key cache
2493  buffer with the data.
2494  Filepos must be a multiple of 'block_length', but it doesn't
2495  have to be a multiple of key_cache_block_size;
2496 */
2497 
2498 uchar *key_cache_read(KEY_CACHE *keycache,
2499  File file, my_off_t filepos, int level,
2500  uchar *buff, uint length,
2501  uint block_length __attribute__((unused)),
2502  int return_buffer __attribute__((unused)))
2503 {
2504  my_bool locked_and_incremented= FALSE;
2505  int error=0;
2506  uchar *start= buff;
2507  DBUG_ENTER("key_cache_read");
2508  DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2509  (uint) file, (ulong) filepos, length));
2510 
2511  if (keycache->key_cache_inited)
2512  {
2513  /* Key cache is used */
2514  reg1 BLOCK_LINK *block;
2515  uint read_length;
2516  uint offset;
2517  int page_st;
2518 
2519  if (MYSQL_KEYCACHE_READ_START_ENABLED())
2520  {
2521  MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2522  (ulong) (keycache->blocks_used *
2523  keycache->key_cache_block_size),
2524  (ulong) (keycache->blocks_unused *
2525  keycache->key_cache_block_size));
2526  }
2527 
2528  /*
2529  When the key cache is once initialized, we use the cache_lock to
2530  reliably distinguish the cases of normal operation, resizing, and
2531  disabled cache. We always increment and decrement
2532  'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2533  */
2534  keycache_pthread_mutex_lock(&keycache->cache_lock);
2535  /*
2536  Cache resizing has two phases: Flushing and re-initializing. In
2537  the flush phase read requests are allowed to bypass the cache for
2538  blocks not in the cache. find_key_block() returns NULL in this
2539  case.
2540 
2541  After the flush phase new I/O requests must wait until the
2542  re-initialization is done. The re-initialization can be done only
2543  if no I/O request is in progress. The reason is that
2544  key_cache_block_size can change. With enabled cache, I/O is done
2545  in chunks of key_cache_block_size. Every chunk tries to use a
2546  cache block first. If the block size changes in the middle, a
2547  block could be missed and old data could be read.
2548  */
2549  while (keycache->in_resize && !keycache->resize_in_flush)
2550  wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2551  /* Register the I/O for the next resize. */
2552  inc_counter_for_resize_op(keycache);
2553  locked_and_incremented= TRUE;
2554  /* Requested data may not always be aligned to cache blocks. */
2555  offset= (uint) (filepos % keycache->key_cache_block_size);
2556  /* Read data in key_cache_block_size increments */
2557  do
2558  {
2559  /* Cache could be disabled in a later iteration. */
2560  if (!keycache->can_be_used)
2561  {
2562  KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used"));
2563  goto no_key_cache;
2564  }
2565  /* Start reading at the beginning of the cache block. */
2566  filepos-= offset;
2567  /* Do not read beyond the end of the cache block. */
2568  read_length= length;
2569  set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2570  KEYCACHE_DBUG_ASSERT(read_length > 0);
2571 
2572  if (block_length > keycache->key_cache_block_size || offset)
2573  return_buffer=0;
2574 
2575  /* Request the cache block that matches file/pos. */
2576  keycache->global_cache_r_requests++;
2577 
2578  MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2579 
2580  block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2581  if (!block)
2582  {
2583  /*
2584  This happens only for requests submitted during key cache
2585  resize. The block is not in the cache and shall not go in.
2586  Read directly from file.
2587  */
2588  keycache->global_cache_read++;
2589  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2590  error= (my_pread(file, (uchar*) buff, read_length,
2591  filepos + offset, MYF(MY_NABP)) != 0);
2592  keycache_pthread_mutex_lock(&keycache->cache_lock);
2593  goto next_block;
2594  }
2595  if (!(block->status & BLOCK_ERROR))
2596  {
2597  if (page_st != PAGE_READ)
2598  {
2599  MYSQL_KEYCACHE_READ_MISS();
2600  /* The requested page is to be read into the block buffer */
2601  read_block(keycache, block,
2602  keycache->key_cache_block_size, read_length+offset,
2603  (my_bool)(page_st == PAGE_TO_BE_READ));
2604  /*
2605  A secondary request must now have the block assigned to the
2606  requested file block. It does not hurt to check it for
2607  primary requests too.
2608  */
2609  DBUG_ASSERT(keycache->can_be_used);
2610  DBUG_ASSERT(block->hash_link->file == file);
2611  DBUG_ASSERT(block->hash_link->diskpos == filepos);
2612  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2613  }
2614  else if (block->length < read_length + offset)
2615  {
2616  /*
2617  Impossible if nothing goes wrong:
2618  this could only happen if we are using a file with
2619  small key blocks and are trying to read outside the file
2620  */
2621  my_errno= -1;
2622  block->status|= BLOCK_ERROR;
2623  }
2624  else
2625  {
2626  MYSQL_KEYCACHE_READ_HIT();
2627  }
2628  }
2629 
2630  /* block status may have added BLOCK_ERROR in the above 'if'. */
2631  if (!(block->status & BLOCK_ERROR))
2632  {
2633  {
2634  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2635 #if !defined(SERIALIZED_READ_FROM_CACHE)
2636  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2637 #endif
2638 
2639  /* Copy data from the cache buffer */
2640  memcpy(buff, block->buffer+offset, (size_t) read_length);
2641 
2642 #if !defined(SERIALIZED_READ_FROM_CACHE)
2643  keycache_pthread_mutex_lock(&keycache->cache_lock);
2644  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2645 #endif
2646  }
2647  }
2648 
2649  remove_reader(block);
2650 
2651  /* Error injection for coverage testing. */
2652  DBUG_EXECUTE_IF("key_cache_read_block_error",
2653  block->status|= BLOCK_ERROR;);
2654 
2655  /* Do not link erroneous blocks into the LRU ring, but free them. */
2656  if (!(block->status & BLOCK_ERROR))
2657  {
2658  /*
2659  Link the block into the LRU ring if it's the last submitted
2660  request for the block. This enables eviction for the block.
2661  */
2662  unreg_request(keycache, block, 1);
2663  }
2664  else
2665  {
2666  free_block(keycache, block);
2667  error= 1;
2668  break;
2669  }
2670 
2671  next_block:
2672  buff+= read_length;
2673  filepos+= read_length+offset;
2674  offset= 0;
2675 
2676  } while ((length-= read_length));
2677  if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2678  {
2679  MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2680  keycache->key_cache_block_size),
2681  (ulong) (keycache->blocks_unused *
2682  keycache->key_cache_block_size));
2683  }
2684  goto end;
2685  }
2686  KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
2687 
2688 no_key_cache:
2689  /* Key cache is not used */
2690 
2691  keycache->global_cache_r_requests++;
2692  keycache->global_cache_read++;
2693 
2694  if (locked_and_incremented)
2695  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2696  if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2697  error= 1;
2698  if (locked_and_incremented)
2699  keycache_pthread_mutex_lock(&keycache->cache_lock);
2700 
2701 end:
2702  if (locked_and_incremented)
2703  {
2704  dec_counter_for_resize_op(keycache);
2705  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2706  }
2707  DBUG_PRINT("exit", ("error: %d", error ));
2708  DBUG_RETURN(error ? (uchar*) 0 : start);
2709 }
2710 
2711 
2712 /*
2713  Insert a block of file data from a buffer into key cache
2714 
2715  SYNOPSIS
2716  key_cache_insert()
2717  keycache pointer to a key cache data structure
2718  file handler for the file to insert data from
2719  filepos position of the block of data in the file to insert
2720  level determines the weight of the data
2721  buff buffer to read data from
2722  length length of the data in the buffer
2723 
2724  NOTES
2725  This is used by MyISAM to move all blocks from a index file to the key
2726  cache
2727 
2728  RETURN VALUE
2729  0 if a success, 1 - otherwise.
2730 */
2731 
2732 int key_cache_insert(KEY_CACHE *keycache,
2733  File file, my_off_t filepos, int level,
2734  uchar *buff, uint length)
2735 {
2736  int error= 0;
2737  DBUG_ENTER("key_cache_insert");
2738  DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2739  (uint) file,(ulong) filepos, length));
2740 
2741  if (keycache->key_cache_inited)
2742  {
2743  /* Key cache is used */
2744  reg1 BLOCK_LINK *block;
2745  uint read_length;
2746  uint offset;
2747  int page_st;
2748  my_bool locked_and_incremented= FALSE;
2749 
2750  /*
2751  When the keycache is once initialized, we use the cache_lock to
2752  reliably distinguish the cases of normal operation, resizing, and
2753  disabled cache. We always increment and decrement
2754  'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2755  */
2756  keycache_pthread_mutex_lock(&keycache->cache_lock);
2757  /*
2758  We do not load index data into a disabled cache nor into an
2759  ongoing resize.
2760  */
2761  if (!keycache->can_be_used || keycache->in_resize)
2762  goto no_key_cache;
2763  /* Register the pseudo I/O for the next resize. */
2764  inc_counter_for_resize_op(keycache);
2765  locked_and_incremented= TRUE;
2766  /* Loaded data may not always be aligned to cache blocks. */
2767  offset= (uint) (filepos % keycache->key_cache_block_size);
2768  /* Load data in key_cache_block_size increments. */
2769  do
2770  {
2771  /* Cache could be disabled or resizing in a later iteration. */
2772  if (!keycache->can_be_used || keycache->in_resize)
2773  goto no_key_cache;
2774  /* Start loading at the beginning of the cache block. */
2775  filepos-= offset;
2776  /* Do not load beyond the end of the cache block. */
2777  read_length= length;
2778  set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2779  KEYCACHE_DBUG_ASSERT(read_length > 0);
2780 
2781  /* The block has been read by the caller already. */
2782  keycache->global_cache_read++;
2783  /* Request the cache block that matches file/pos. */
2784  keycache->global_cache_r_requests++;
2785  block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2786  if (!block)
2787  {
2788  /*
2789  This happens only for requests submitted during key cache
2790  resize. The block is not in the cache and shall not go in.
2791  Stop loading index data.
2792  */
2793  goto no_key_cache;
2794  }
2795  if (!(block->status & BLOCK_ERROR))
2796  {
2797  if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2798  ((page_st == PAGE_TO_BE_READ) &&
2799  (offset || (read_length < keycache->key_cache_block_size))))
2800  {
2801  /*
2802  Either
2803 
2804  this is a secondary request for a block to be read into the
2805  cache. The block is in eviction. It is not yet assigned to
2806  the requested file block (It does not point to the right
2807  hash_link). So we cannot call remove_reader() on the block.
2808  And we cannot access the hash_link directly here. We need to
2809  wait until the assignment is complete. read_block() executes
2810  the correct wait when called with primary == FALSE.
2811 
2812  Or
2813 
2814  this is a primary request for a block to be read into the
2815  cache and the supplied data does not fill the whole block.
2816 
2817  This function is called on behalf of a LOAD INDEX INTO CACHE
2818  statement, which is a read-only task and allows other
2819  readers. It is possible that a parallel running reader tries
2820  to access this block. If it needs more data than has been
2821  supplied here, it would report an error. To be sure that we
2822  have all data in the block that is available in the file, we
2823  read the block ourselves.
2824 
2825  Though reading again what the caller did read already is an
2826  expensive operation, we need to do this for correctness.
2827  */
2828  read_block(keycache, block, keycache->key_cache_block_size,
2829  read_length + offset, (page_st == PAGE_TO_BE_READ));
2830  /*
2831  A secondary request must now have the block assigned to the
2832  requested file block. It does not hurt to check it for
2833  primary requests too.
2834  */
2835  DBUG_ASSERT(keycache->can_be_used);
2836  DBUG_ASSERT(block->hash_link->file == file);
2837  DBUG_ASSERT(block->hash_link->diskpos == filepos);
2838  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2839  }
2840  else if (page_st == PAGE_TO_BE_READ)
2841  {
2842  /*
2843  This is a new block in the cache. If we come here, we have
2844  data for the whole block.
2845  */
2846  DBUG_ASSERT(block->hash_link->requests);
2847  DBUG_ASSERT(block->status & BLOCK_IN_USE);
2848  DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2849  (block->status & BLOCK_READ));
2850 
2851 #if !defined(SERIALIZED_READ_FROM_CACHE)
2852  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2853  /*
2854  Here other threads may step in and register as secondary readers.
2855  They will register in block->wqueue[COND_FOR_REQUESTED].
2856  */
2857 #endif
2858 
2859  /* Copy data from buff */
2860  memcpy(block->buffer+offset, buff, (size_t) read_length);
2861 
2862 #if !defined(SERIALIZED_READ_FROM_CACHE)
2863  keycache_pthread_mutex_lock(&keycache->cache_lock);
2864  DBUG_ASSERT(block->status & BLOCK_IN_USE);
2865  DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2866  (block->status & BLOCK_READ));
2867 #endif
2868  /*
2869  After the data is in the buffer, we can declare the block
2870  valid. Now other threads do not need to register as
2871  secondary readers any more. They can immediately access the
2872  block.
2873  */
2874  block->status|= BLOCK_READ;
2875  block->length= read_length+offset;
2876  /*
2877  Do not set block->offset here. If this block is marked
2878  BLOCK_CHANGED later, we want to flush only the modified part. So
2879  only a writer may set block->offset down from
2880  keycache->key_cache_block_size.
2881  */
2882  KEYCACHE_DBUG_PRINT("key_cache_insert",
2883  ("primary request: new page in cache"));
2884  /* Signal all pending requests. */
2885  release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2886  }
2887  else
2888  {
2889  /*
2890  page_st == PAGE_READ. The block is in the buffer. All data
2891  must already be present. Blocks are always read with all
2892  data available on file. Assert that the block does not have
2893  less contents than the preloader supplies. If the caller has
2894  data beyond block->length, it means that a file write has
2895  been done while this block was in cache and not extended
2896  with the new data. If the condition is met, we can simply
2897  ignore the block.
2898  */
2899  DBUG_ASSERT((page_st == PAGE_READ) &&
2900  (read_length + offset <= block->length));
2901  }
2902 
2903  /*
2904  A secondary request must now have the block assigned to the
2905  requested file block. It does not hurt to check it for primary
2906  requests too.
2907  */
2908  DBUG_ASSERT(block->hash_link->file == file);
2909  DBUG_ASSERT(block->hash_link->diskpos == filepos);
2910  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2911  } /* end of if (!(block->status & BLOCK_ERROR)) */
2912 
2913  remove_reader(block);
2914 
2915  /* Error injection for coverage testing. */
2916  DBUG_EXECUTE_IF("key_cache_insert_block_error",
2917  block->status|= BLOCK_ERROR; errno=EIO;);
2918 
2919  /* Do not link erroneous blocks into the LRU ring, but free them. */
2920  if (!(block->status & BLOCK_ERROR))
2921  {
2922  /*
2923  Link the block into the LRU ring if it's the last submitted
2924  request for the block. This enables eviction for the block.
2925  */
2926  unreg_request(keycache, block, 1);
2927  }
2928  else
2929  {
2930  free_block(keycache, block);
2931  error= 1;
2932  break;
2933  }
2934 
2935  buff+= read_length;
2936  filepos+= read_length+offset;
2937  offset= 0;
2938 
2939  } while ((length-= read_length));
2940 
2941  no_key_cache:
2942  if (locked_and_incremented)
2943  dec_counter_for_resize_op(keycache);
2944  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2945  }
2946  DBUG_RETURN(error);
2947 }
2948 
2949 
2950 /*
2951  Write a buffer into a cached file.
2952 
2953  SYNOPSIS
2954 
2955  key_cache_write()
2956  keycache pointer to a key cache data structure
2957  file handler for the file to write data to
2958  filepos position in the file to write data to
2959  level determines the weight of the data
2960  buff buffer with the data
2961  length length of the buffer
2962  dont_write if is 0 then all dirty pages involved in writing
2963  should have been flushed from key cache
2964 
2965  RETURN VALUE
2966  0 if a success, 1 - otherwise.
2967 
2968  NOTES.
2969  The function copies the data of size length from buff into buffers
2970  for key cache blocks that are assigned to contain the portion of
2971  the file starting with position filepos.
2972  It ensures that this data is flushed to the file if dont_write is FALSE.
2973  Filepos must be a multiple of 'block_length', but it doesn't
2974  have to be a multiple of key_cache_block_size;
2975 
2976  dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
2977 */
2978 
2979 int key_cache_write(KEY_CACHE *keycache,
2980  File file, my_off_t filepos, int level,
2981  uchar *buff, uint length,
2982  uint block_length __attribute__((unused)),
2983  int dont_write)
2984 {
2985  my_bool locked_and_incremented= FALSE;
2986  int error=0;
2987  DBUG_ENTER("key_cache_write");
2988  DBUG_PRINT("enter",
2989  ("fd: %u pos: %lu length: %u block_length: %u"
2990  " key_block_length: %u",
2991  (uint) file, (ulong) filepos, length, block_length,
2992  keycache ? keycache->key_cache_block_size : 0));
2993 
2994  if (!dont_write)
2995  {
2996  /* purecov: begin inspected */
2997  /* Not used in the server. */
2998  /* Force writing from buff into disk. */
2999  keycache->global_cache_w_requests++;
3000  keycache->global_cache_write++;
3001  if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3002  DBUG_RETURN(1);
3003  /* purecov: end */
3004  }
3005 
3006 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3007  DBUG_EXECUTE("check_keycache",
3008  test_key_cache(keycache, "start of key_cache_write", 1););
3009 #endif
3010 
3011  if (keycache->key_cache_inited)
3012  {
3013  /* Key cache is used */
3014  reg1 BLOCK_LINK *block;
3015  uint read_length;
3016  uint offset;
3017  int page_st;
3018 
3019  if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
3020  {
3021  MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
3022  (ulong) (keycache->blocks_used *
3023  keycache->key_cache_block_size),
3024  (ulong) (keycache->blocks_unused *
3025  keycache->key_cache_block_size));
3026  }
3027 
3028  /*
3029  When the key cache is once initialized, we use the cache_lock to
3030  reliably distinguish the cases of normal operation, resizing, and
3031  disabled cache. We always increment and decrement
3032  'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3033  */
3034  keycache_pthread_mutex_lock(&keycache->cache_lock);
3035  /*
3036  Cache resizing has two phases: Flushing and re-initializing. In
3037  the flush phase write requests can modify dirty blocks that are
3038  not yet in flush. Otherwise they are allowed to bypass the cache.
3039  find_key_block() returns NULL in both cases (clean blocks and
3040  non-cached blocks).
3041 
3042  After the flush phase new I/O requests must wait until the
3043  re-initialization is done. The re-initialization can be done only
3044  if no I/O request is in progress. The reason is that
3045  key_cache_block_size can change. With enabled cache I/O is done in
3046  chunks of key_cache_block_size. Every chunk tries to use a cache
3047  block first. If the block size changes in the middle, a block
3048  could be missed and data could be written below a cached block.
3049  */
3050  while (keycache->in_resize && !keycache->resize_in_flush)
3051  wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
3052  /* Register the I/O for the next resize. */
3053  inc_counter_for_resize_op(keycache);
3054  locked_and_incremented= TRUE;
3055  /* Requested data may not always be aligned to cache blocks. */
3056  offset= (uint) (filepos % keycache->key_cache_block_size);
3057  /* Write data in key_cache_block_size increments. */
3058  do
3059  {
3060  /* Cache could be disabled in a later iteration. */
3061  if (!keycache->can_be_used)
3062  goto no_key_cache;
3063 
3064  MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
3065  /* Start writing at the beginning of the cache block. */
3066  filepos-= offset;
3067  /* Do not write beyond the end of the cache block. */
3068  read_length= length;
3069  set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3070  KEYCACHE_DBUG_ASSERT(read_length > 0);
3071 
3072  /* Request the cache block that matches file/pos. */
3073  keycache->global_cache_w_requests++;
3074  block= find_key_block(keycache, file, filepos, level, 1, &page_st);
3075  if (!block)
3076  {
3077  /*
3078  This happens only for requests submitted during key cache
3079  resize. The block is not in the cache and shall not go in.
3080  Write directly to file.
3081  */
3082  if (dont_write)
3083  {
3084  /* Used in the server. */
3085  keycache->global_cache_write++;
3086  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3087  if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
3088  MYF(MY_NABP | MY_WAIT_IF_FULL)))
3089  error=1;
3090  keycache_pthread_mutex_lock(&keycache->cache_lock);
3091  }
3092  goto next_block;
3093  }
3094  /*
3095  Prevent block from flushing and from being selected for to be
3096  freed. This must be set when we release the cache_lock.
3097  However, we must not set the status of the block before it is
3098  assigned to this file/pos.
3099  */
3100  if (page_st != PAGE_WAIT_TO_BE_READ)
3101  block->status|= BLOCK_FOR_UPDATE;
3102  /*
3103  We must read the file block first if it is not yet in the cache
3104  and we do not replace all of its contents.
3105 
3106  In cases where the cache block is big enough to contain (parts
3107  of) index blocks of different indexes, our request can be
3108  secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
3109  reading the file block. If the read completes after us, it
3110  overwrites our new contents with the old contents. So we have to
3111  wait for the other thread to complete the read of this block.
3112  read_block() takes care for the wait.
3113  */
3114  if (!(block->status & BLOCK_ERROR) &&
3115  ((page_st == PAGE_TO_BE_READ &&
3116  (offset || read_length < keycache->key_cache_block_size)) ||
3117  (page_st == PAGE_WAIT_TO_BE_READ)))
3118  {
3119  read_block(keycache, block,
3120  offset + read_length >= keycache->key_cache_block_size?
3121  offset : keycache->key_cache_block_size,
3122  offset, (page_st == PAGE_TO_BE_READ));
3123  DBUG_ASSERT(keycache->can_be_used);
3124  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3125  /*
3126  Prevent block from flushing and from being selected for to be
3127  freed. This must be set when we release the cache_lock.
3128  Here we set it in case we could not set it above.
3129  */
3130  block->status|= BLOCK_FOR_UPDATE;
3131  }
3132  /*
3133  The block should always be assigned to the requested file block
3134  here. It need not be BLOCK_READ when overwriting the whole block.
3135  */
3136  DBUG_ASSERT(block->hash_link->file == file);
3137  DBUG_ASSERT(block->hash_link->diskpos == filepos);
3138  DBUG_ASSERT(block->status & BLOCK_IN_USE);
3139  DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
3140  /*
3141  The block to be written must not be marked BLOCK_REASSIGNED.
3142  Otherwise it could be freed in dirty state or reused without
3143  another flush during eviction. It must also not be in flush.
3144  Otherwise the old contens may have been flushed already and
3145  the flusher could clear BLOCK_CHANGED without flushing the
3146  new changes again.
3147  */
3148  DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3149 
3150  while (block->status & BLOCK_IN_FLUSHWRITE)
3151  {
3152  /*
3153  Another thread is flushing the block. It was dirty already.
3154  Wait until the block is flushed to file. Otherwise we could
3155  modify the buffer contents just while it is written to file.
3156  An unpredictable file block contents would be the result.
3157  While we wait, several things can happen to the block,
3158  including another flush. But the block cannot be reassigned to
3159  another hash_link until we release our request on it.
3160  */
3161  wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
3162  DBUG_ASSERT(keycache->can_be_used);
3163  DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3164  /* Still must not be marked for free. */
3165  DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3166  DBUG_ASSERT(block->hash_link && (block->hash_link->block == block));
3167  }
3168 
3169  /*
3170  We could perhaps release the cache_lock during access of the
3171  data like in the other functions. Locks outside of the key cache
3172  assure that readers and a writer do not access the same range of
3173  data. Parallel accesses should happen only if the cache block
3174  contains multiple index block(fragment)s. So different parts of
3175  the buffer would be read/written. An attempt to flush during
3176  memcpy() is prevented with BLOCK_FOR_UPDATE.
3177  */
3178  if (!(block->status & BLOCK_ERROR))
3179  {
3180 #if !defined(SERIALIZED_READ_FROM_CACHE)
3181  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3182 #endif
3183  memcpy(block->buffer+offset, buff, (size_t) read_length);
3184 
3185 #if !defined(SERIALIZED_READ_FROM_CACHE)
3186  keycache_pthread_mutex_lock(&keycache->cache_lock);
3187 #endif
3188  }
3189 
3190  if (!dont_write)
3191  {
3192  /* Not used in the server. buff has been written to disk at start. */
3193  if ((block->status & BLOCK_CHANGED) &&
3194  (!offset && read_length >= keycache->key_cache_block_size))
3195  link_to_file_list(keycache, block, block->hash_link->file, 1);
3196  }
3197  else if (! (block->status & BLOCK_CHANGED))
3198  link_to_changed_list(keycache, block);
3199  block->status|=BLOCK_READ;
3200  /*
3201  Allow block to be selected for to be freed. Since it is marked
3202  BLOCK_CHANGED too, it won't be selected for to be freed without
3203  a flush.
3204  */
3205  block->status&= ~BLOCK_FOR_UPDATE;
3206  set_if_smaller(block->offset, offset);
3207  set_if_bigger(block->length, read_length+offset);
3208 
3209  /* Threads may be waiting for the changes to be complete. */
3210  release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3211 
3212  /*
3213  If only a part of the cache block is to be replaced, and the
3214  rest has been read from file, then the cache lock has been
3215  released for I/O and it could be possible that another thread
3216  wants to evict or free the block and waits for it to be
3217  released. So we must not just decrement hash_link->requests, but
3218  also wake a waiting thread.
3219  */
3220  remove_reader(block);
3221 
3222  /* Error injection for coverage testing. */
3223  DBUG_EXECUTE_IF("key_cache_write_block_error",
3224  block->status|= BLOCK_ERROR;);
3225 
3226  /* Do not link erroneous blocks into the LRU ring, but free them. */
3227  if (!(block->status & BLOCK_ERROR))
3228  {
3229  /*
3230  Link the block into the LRU ring if it's the last submitted
3231  request for the block. This enables eviction for the block.
3232  */
3233  unreg_request(keycache, block, 1);
3234  }
3235  else
3236  {
3237  /* Pretend a "clean" block to avoid complications. */
3238  block->status&= ~(BLOCK_CHANGED);
3239  free_block(keycache, block);
3240  error= 1;
3241  break;
3242  }
3243 
3244  next_block:
3245  buff+= read_length;
3246  filepos+= read_length+offset;
3247  offset= 0;
3248 
3249  } while ((length-= read_length));
3250  goto end;
3251  }
3252 
3253 no_key_cache:
3254  /* Key cache is not used */
3255  if (dont_write)
3256  {
3257  /* Used in the server. */
3258  keycache->global_cache_w_requests++;
3259  keycache->global_cache_write++;
3260  if (locked_and_incremented)
3261  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3262  if (my_pwrite(file, (uchar*) buff, length, filepos,
3263  MYF(MY_NABP | MY_WAIT_IF_FULL)))
3264  error=1;
3265  if (locked_and_incremented)
3266  keycache_pthread_mutex_lock(&keycache->cache_lock);
3267  }
3268 
3269 end:
3270  if (locked_and_incremented)
3271  {
3272  dec_counter_for_resize_op(keycache);
3273  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3274  }
3275 
3276  if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3277  {
3278  MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3279  keycache->key_cache_block_size),
3280  (ulong) (keycache->blocks_unused *
3281  keycache->key_cache_block_size));
3282  }
3283 
3284 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3285  DBUG_EXECUTE("exec",
3286  test_key_cache(keycache, "end of key_cache_write", 1););
3287 #endif
3288  DBUG_RETURN(error);
3289 }
3290 
3291 
3292 /*
3293  Free block.
3294 
3295  SYNOPSIS
3296  free_block()
3297  keycache Pointer to a key cache data structure
3298  block Pointer to the block to free
3299 
3300  DESCRIPTION
3301  Remove reference to block from hash table.
3302  Remove block from the chain of clean blocks.
3303  Add block to the free list.
3304 
3305  NOTE
3306  Block must not be free (status == 0).
3307  Block must not be in free_block_list.
3308  Block must not be in the LRU ring.
3309  Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3310  Block must not be in free (BLOCK_REASSIGNED).
3311  Block must not be in flush (BLOCK_IN_FLUSH).
3312  Block must not be dirty (BLOCK_CHANGED).
3313  Block must not be in changed_blocks (dirty) hash.
3314  Block must be in file_blocks (clean) hash.
3315  Block must refer to a hash_link.
3316  Block must have a request registered on it.
3317 */
3318 
3319 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
3320 {
3321  KEYCACHE_THREAD_TRACE("free block");
3322  KEYCACHE_DBUG_PRINT("free_block",
3323  ("block %u to be freed, hash_link %p status: %u",
3324  BLOCK_NUMBER(block), block->hash_link,
3325  block->status));
3326  /*
3327  Assert that the block is not free already. And that it is in a clean
3328  state. Note that the block might just be assigned to a hash_link and
3329  not yet read (BLOCK_READ may not be set here). In this case a reader
3330  is registered in the hash_link and free_block() will wait for it
3331  below.
3332  */
3333  DBUG_ASSERT((block->status & BLOCK_IN_USE) &&
3334  !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3335  BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3336  BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3337  /* Assert that the block is in a file_blocks chain. */
3338  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3339  /* Assert that the block is not in the LRU ring. */
3340  DBUG_ASSERT(!block->next_used && !block->prev_used);
3341  /*
3342  IMHO the below condition (if()) makes no sense. I can't see how it
3343  could be possible that free_block() is entered with a NULL hash_link
3344  pointer. The only place where it can become NULL is in free_block()
3345  (or before its first use ever, but for those blocks free_block() is
3346  not called). I don't remove the conditional as it cannot harm, but
3347  place an DBUG_ASSERT to confirm my hypothesis. Eventually the
3348  condition (if()) can be removed.
3349  */
3350  DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3351  if (block->hash_link)
3352  {
3353  /*
3354  While waiting for readers to finish, new readers might request the
3355  block. But since we set block->status|= BLOCK_REASSIGNED, they
3356  will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3357  later.
3358  */
3359  block->status|= BLOCK_REASSIGNED;
3360  wait_for_readers(keycache, block);
3361  /*
3362  The block must not have been freed by another thread. Repeat some
3363  checks. An additional requirement is that it must be read now
3364  (BLOCK_READ).
3365  */
3366  DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3367  DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE |
3368  BLOCK_REASSIGNED)) &&
3369  !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3370  BLOCK_IN_FLUSH | BLOCK_CHANGED |
3371  BLOCK_FOR_UPDATE)));
3372  DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3373  DBUG_ASSERT(!block->prev_used);
3374  /*
3375  Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3376  thread (through unreg_request() below), other threads must not see
3377  this flag. They could become confused.
3378  */
3379  block->status&= ~BLOCK_REASSIGNED;
3380  /*
3381  Do not release the hash_link until the block is off all lists.
3382  At least not if we hand it over for eviction in unreg_request().
3383  */
3384  }
3385 
3386  /*
3387  Unregister the block request and link the block into the LRU ring.
3388  This enables eviction for the block. If the LRU ring was empty and
3389  threads are waiting for a block, then the block wil be handed over
3390  for eviction immediately. Otherwise we will unlink it from the LRU
3391  ring again, without releasing the lock in between. So decrementing
3392  the request counter and updating statistics are the only relevant
3393  operation in this case. Assert that there are no other requests
3394  registered.
3395  */
3396  DBUG_ASSERT(block->requests == 1);
3397  unreg_request(keycache, block, 0);
3398  /*
3399  Note that even without releasing the cache lock it is possible that
3400  the block is immediately selected for eviction by link_block() and
3401  thus not added to the LRU ring. In this case we must not touch the
3402  block any more.
3403  */
3404  if (block->status & BLOCK_IN_EVICTION)
3405  return;
3406 
3407  /* Error blocks are not put into the LRU ring. */
3408  if (!(block->status & BLOCK_ERROR))
3409  {
3410  /* Here the block must be in the LRU ring. Unlink it again. */
3411  DBUG_ASSERT(block->next_used && block->prev_used &&
3412  *block->prev_used == block);
3413  unlink_block(keycache, block);
3414  }
3415  if (block->temperature == BLOCK_WARM)
3416  keycache->warm_blocks--;
3417  block->temperature= BLOCK_COLD;
3418 
3419  /* Remove from file_blocks hash. */
3420  unlink_changed(block);
3421 
3422  /* Remove reference to block from hash table. */
3423  unlink_hash(keycache, block->hash_link);
3424  block->hash_link= NULL;
3425 
3426  block->status= 0;
3427  block->length= 0;
3428  block->offset= keycache->key_cache_block_size;
3429  KEYCACHE_THREAD_TRACE("free block");
3430  KEYCACHE_DBUG_PRINT("free_block", ("block is freed"));
3431 
3432  /* Enforced by unlink_changed(), but just to be sure. */
3433  DBUG_ASSERT(!block->next_changed && !block->prev_changed);
3434  /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3435  DBUG_ASSERT(!block->next_used && !block->prev_used);
3436  /* Insert the free block in the free list. */
3437  block->next_used= keycache->free_block_list;
3438  keycache->free_block_list= block;
3439  /* Keep track of the number of currently unused blocks. */
3440  keycache->blocks_unused++;
3441 
3442  /* All pending requests for this page must be resubmitted. */
3443  release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3444 }
3445 
3446 
3447 static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3448 {
3449  return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3450  ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3451 }
3452 
3453 
3454 /*
3455  Flush a portion of changed blocks to disk,
3456  free used blocks if requested
3457 */
3458 
3459 static int flush_cached_blocks(KEY_CACHE *keycache,
3460  File file, BLOCK_LINK **cache,
3461  BLOCK_LINK **end,
3462  enum flush_type type)
3463 {
3464  int error;
3465  int last_errno= 0;
3466  uint count= (uint) (end-cache);
3467 
3468  /* Don't lock the cache during the flush */
3469  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3470  /*
3471  As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3472  we are guarunteed no thread will change them
3473  */
3474  my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3475 
3476  keycache_pthread_mutex_lock(&keycache->cache_lock);
3477  /*
3478  Note: Do not break the loop. We have registered a request on every
3479  block in 'cache'. These must be unregistered by free_block() or
3480  unreg_request().
3481  */
3482  for ( ; cache != end ; cache++)
3483  {
3484  BLOCK_LINK *block= *cache;
3485 
3486  KEYCACHE_DBUG_PRINT("flush_cached_blocks",
3487  ("block %u to be flushed", BLOCK_NUMBER(block)));
3488  /*
3489  If the block contents is going to be changed, we abandon the flush
3490  for this block. flush_key_blocks_int() will restart its search and
3491  handle the block properly.
3492  */
3493  if (!(block->status & BLOCK_FOR_UPDATE))
3494  {
3495  /* Blocks coming here must have a certain status. */
3496  DBUG_ASSERT(block->hash_link);
3497  DBUG_ASSERT(block->hash_link->block == block);
3498  DBUG_ASSERT(block->hash_link->file == file);
3499  DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
3500  (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3501  block->status|= BLOCK_IN_FLUSHWRITE;
3502  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3503  error= my_pwrite(file, block->buffer+block->offset,
3504  block->length - block->offset,
3505  block->hash_link->diskpos+ block->offset,
3506  MYF(MY_NABP | MY_WAIT_IF_FULL));
3507  keycache_pthread_mutex_lock(&keycache->cache_lock);
3508  keycache->global_cache_write++;
3509  if (error)
3510  {
3511  block->status|= BLOCK_ERROR;
3512  if (!last_errno)
3513  last_errno= errno ? errno : -1;
3514  }
3515  block->status&= ~BLOCK_IN_FLUSHWRITE;
3516  /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3517  DBUG_ASSERT(block->hash_link);
3518  DBUG_ASSERT(block->hash_link->block == block);
3519  DBUG_ASSERT(block->hash_link->file == file);
3520  DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3521  (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3522  /*
3523  Set correct status and link in right queue for free or later use.
3524  free_block() must not see BLOCK_CHANGED and it may need to wait
3525  for readers of the block. These should not see the block in the
3526  wrong hash. If not freeing the block, we need to have it in the
3527  right queue anyway.
3528  */
3529  link_to_file_list(keycache, block, file, 1);
3530  }
3531  block->status&= ~BLOCK_IN_FLUSH;
3532  /*
3533  Let to proceed for possible waiting requests to write to the block page.
3534  It might happen only during an operation to resize the key cache.
3535  */
3536  release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3537  /* type will never be FLUSH_IGNORE_CHANGED here */
3538  if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3539  !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3540  BLOCK_FOR_UPDATE)))
3541  {
3542  /*
3543  Note that a request has been registered against the block in
3544  flush_key_blocks_int().
3545  */
3546  free_block(keycache, block);
3547  }
3548  else
3549  {
3550  /*
3551  Link the block into the LRU ring if it's the last submitted
3552  request for the block. This enables eviction for the block.
3553  Note that a request has been registered against the block in
3554  flush_key_blocks_int().
3555  */
3556  unreg_request(keycache, block, 1);
3557  }
3558 
3559  } /* end of for ( ; cache != end ; cache++) */
3560  return last_errno;
3561 }
3562 
3563 
3564 /*
3565  Flush all key blocks for a file to disk, but don't do any mutex locks.
3566 
3567  SYNOPSIS
3568  flush_key_blocks_int()
3569  keycache pointer to a key cache data structure
3570  file handler for the file to flush to
3571  flush_type type of the flush
3572 
3573  NOTES
3574  This function doesn't do any mutex locks because it needs to be called both
3575  from flush_key_blocks and flush_all_key_blocks (the later one does the
3576  mutex lock in the resize_key_cache() function).
3577 
3578  We do only care about changed blocks that exist when the function is
3579  entered. We do not guarantee that all changed blocks of the file are
3580  flushed if more blocks change while this function is running.
3581 
3582  RETURN
3583  0 ok
3584  1 error
3585 */
3586 
3587 static int flush_key_blocks_int(KEY_CACHE *keycache,
3588  File file, enum flush_type type)
3589 {
3590  BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3591  int last_errno= 0;
3592  int last_errcnt= 0;
3593  DBUG_ENTER("flush_key_blocks_int");
3594  DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
3595  file, keycache->blocks_used, keycache->blocks_changed));
3596 
3597 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3598  DBUG_EXECUTE("check_keycache",
3599  test_key_cache(keycache, "start of flush_key_blocks", 0););
3600 #endif
3601 
3602  cache= cache_buff;
3603  if (keycache->disk_blocks > 0 &&
3604  (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3605  {
3606  /* Key cache exists and flush is not disabled */
3607  int error= 0;
3608  uint count= FLUSH_CACHE;
3609  BLOCK_LINK **pos,**end;
3610  BLOCK_LINK *first_in_switch= NULL;
3611  BLOCK_LINK *last_in_flush;
3612  BLOCK_LINK *last_for_update;
3613  BLOCK_LINK *block, *next;
3614 #if defined(KEYCACHE_DEBUG)
3615  uint cnt=0;
3616 #endif
3617 
3618  if (type != FLUSH_IGNORE_CHANGED)
3619  {
3620  /*
3621  Count how many key blocks we have to cache to be able
3622  to flush all dirty pages with minimum seek moves
3623  */
3624  count= 0;
3625  for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3626  block ;
3627  block= block->next_changed)
3628  {
3629  if ((block->hash_link->file == file) &&
3630  !(block->status & BLOCK_IN_FLUSH))
3631  {
3632  count++;
3633  KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
3634  }
3635  }
3636  /*
3637  Allocate a new buffer only if its bigger than the one we have.
3638  Assure that we always have some entries for the case that new
3639  changed blocks appear while we need to wait for something.
3640  */
3641  if ((count > FLUSH_CACHE) &&
3642  !(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
3643  MYF(0))))
3644  cache= cache_buff;
3645  /*
3646  After a restart there could be more changed blocks than now.
3647  So we should not let count become smaller than the fixed buffer.
3648  */
3649  if (cache == cache_buff)
3650  count= FLUSH_CACHE;
3651  }
3652 
3653  /* Retrieve the blocks and write them to a buffer to be flushed */
3654 restart:
3655  last_in_flush= NULL;
3656  last_for_update= NULL;
3657  end= (pos= cache)+count;
3658  for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3659  block ;
3660  block= next)
3661  {
3662 #if defined(KEYCACHE_DEBUG)
3663  cnt++;
3664  KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3665 #endif
3666  next= block->next_changed;
3667  if (block->hash_link->file == file)
3668  {
3669  if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3670  {
3671  /*
3672  Note: The special handling of BLOCK_IN_SWITCH is obsolete
3673  since we set BLOCK_IN_FLUSH if the eviction includes a
3674  flush. It can be removed in a later version.
3675  */
3676  if (!(block->status & BLOCK_IN_SWITCH))
3677  {
3678  /*
3679  We care only for the blocks for which flushing was not
3680  initiated by another thread and which are not in eviction.
3681  Registering a request on the block unlinks it from the LRU
3682  ring and protects against eviction.
3683  */
3684  reg_requests(keycache, block, 1);
3685  if (type != FLUSH_IGNORE_CHANGED)
3686  {
3687  /* It's not a temporary file */
3688  if (pos == end)
3689  {
3690  /*
3691  This should happen relatively seldom. Remove the
3692  request because we won't do anything with the block
3693  but restart and pick it again in the next iteration.
3694  */
3695  unreg_request(keycache, block, 0);
3696  /*
3697  This happens only if there is not enough
3698  memory for the big block
3699  */
3700  if ((error= flush_cached_blocks(keycache, file, cache,
3701  end,type)))
3702  {
3703  /* Do not loop infinitely trying to flush in vain. */
3704  if ((last_errno == error) && (++last_errcnt > 5))
3705  goto err;
3706  last_errno= error;
3707  }
3708  /*
3709  Restart the scan as some other thread might have changed
3710  the changed blocks chain: the blocks that were in switch
3711  state before the flush started have to be excluded
3712  */
3713  goto restart;
3714  }
3715  /*
3716  Mark the block with BLOCK_IN_FLUSH in order not to let
3717  other threads to use it for new pages and interfere with
3718  our sequence of flushing dirty file pages. We must not
3719  set this flag before actually putting the block on the
3720  write burst array called 'cache'.
3721  */
3722  block->status|= BLOCK_IN_FLUSH;
3723  /* Add block to the array for a write burst. */
3724  *pos++= block;
3725  }
3726  else
3727  {
3728  /* It's a temporary file */
3729  DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3730  /*
3731  free_block() must not be called with BLOCK_CHANGED. Note
3732  that we must not change the BLOCK_CHANGED flag outside of
3733  link_to_file_list() so that it is always in the correct
3734  queue and the *blocks_changed counters are correct.
3735  */
3736  link_to_file_list(keycache, block, file, 1);
3737  if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3738  {
3739  /* A request has been registered against the block above. */
3740  free_block(keycache, block);
3741  }
3742  else
3743  {
3744  /*
3745  Link the block into the LRU ring if it's the last
3746  submitted request for the block. This enables eviction
3747  for the block. A request has been registered against
3748  the block above.
3749  */
3750  unreg_request(keycache, block, 1);
3751  }
3752  }
3753  }
3754  else
3755  {
3756  /*
3757  Link the block into a list of blocks 'in switch'.
3758 
3759  WARNING: Here we introduce a place where a changed block
3760  is not in the changed_blocks hash! This is acceptable for
3761  a BLOCK_IN_SWITCH. Never try this for another situation.
3762  Other parts of the key cache code rely on changed blocks
3763  being in the changed_blocks hash.
3764  */
3765  unlink_changed(block);
3766  link_changed(block, &first_in_switch);
3767  }
3768  }
3769  else if (type != FLUSH_KEEP)
3770  {
3771  /*
3772  During the normal flush at end of statement (FLUSH_KEEP) we
3773  do not need to ensure that blocks in flush or update by
3774  other threads are flushed. They will be flushed by them
3775  later. In all other cases we must assure that we do not have
3776  any changed block of this file in the cache when this
3777  function returns.
3778  */
3779  if (block->status & BLOCK_IN_FLUSH)
3780  {
3781  /* Remember the last block found to be in flush. */
3782  last_in_flush= block;
3783  }
3784  else
3785  {
3786  /* Remember the last block found to be selected for update. */
3787  last_for_update= block;
3788  }
3789  }
3790  }
3791  }
3792  if (pos != cache)
3793  {
3794  if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3795  {
3796  /* Do not loop inifnitely trying to flush in vain. */
3797  if ((last_errno == error) && (++last_errcnt > 5))
3798  goto err;
3799  last_errno= error;
3800  }
3801  /*
3802  Do not restart here during the normal flush at end of statement
3803  (FLUSH_KEEP). We have now flushed at least all blocks that were
3804  changed when entering this function. In all other cases we must
3805  assure that we do not have any changed block of this file in the
3806  cache when this function returns.
3807  */
3808  if (type != FLUSH_KEEP)
3809  goto restart;
3810  }
3811  if (last_in_flush)
3812  {
3813  /*
3814  There are no blocks to be flushed by this thread, but blocks in
3815  flush by other threads. Wait until one of the blocks is flushed.
3816  Re-check the condition for last_in_flush. We may have unlocked
3817  the cache_lock in flush_cached_blocks(). The state of the block
3818  could have changed.
3819  */
3820  if (last_in_flush->status & BLOCK_IN_FLUSH)
3821  wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3822  &keycache->cache_lock);
3823  /* Be sure not to lose a block. They may be flushed in random order. */
3824  goto restart;
3825  }
3826  if (last_for_update)
3827  {
3828  /*
3829  There are no blocks to be flushed by this thread, but blocks for
3830  update by other threads. Wait until one of the blocks is updated.
3831  Re-check the condition for last_for_update. We may have unlocked
3832  the cache_lock in flush_cached_blocks(). The state of the block
3833  could have changed.
3834  */
3835  if (last_for_update->status & BLOCK_FOR_UPDATE)
3836  wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3837  &keycache->cache_lock);
3838  /* The block is now changed. Flush it. */
3839  goto restart;
3840  }
3841 
3842  /*
3843  Wait until the list of blocks in switch is empty. The threads that
3844  are switching these blocks will relink them to clean file chains
3845  while we wait and thus empty the 'first_in_switch' chain.
3846  */
3847  while (first_in_switch)
3848  {
3849 #if defined(KEYCACHE_DEBUG)
3850  cnt= 0;
3851 #endif
3852  wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3853  &keycache->cache_lock);
3854 #if defined(KEYCACHE_DEBUG)
3855  cnt++;
3856  KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3857 #endif
3858  /*
3859  Do not restart here. We have flushed all blocks that were
3860  changed when entering this function and were not marked for
3861  eviction. Other threads have now flushed all remaining blocks in
3862  the course of their eviction.
3863  */
3864  }
3865 
3866  if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3867  {
3868  BLOCK_LINK *last_for_update= NULL;
3869  BLOCK_LINK *last_in_switch= NULL;
3870  uint total_found= 0;
3871  uint found;
3872 
3873  /*
3874  Finally free all clean blocks for this file.
3875  During resize this may be run by two threads in parallel.
3876  */
3877  do
3878  {
3879  found= 0;
3880  for (block= keycache->file_blocks[FILE_HASH(file)] ;
3881  block ;
3882  block= next)
3883  {
3884  /* Remember the next block. After freeing we cannot get at it. */
3885  next= block->next_changed;
3886 
3887  /* Changed blocks cannot appear in the file_blocks hash. */
3888  DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
3889  if (block->hash_link->file == file)
3890  {
3891  /* We must skip blocks that will be changed. */
3892  if (block->status & BLOCK_FOR_UPDATE)
3893  {
3894  last_for_update= block;
3895  continue;
3896  }
3897 
3898  /*
3899  We must not free blocks in eviction (BLOCK_IN_EVICTION |
3900  BLOCK_IN_SWITCH) or blocks intended to be freed
3901  (BLOCK_REASSIGNED).
3902  */
3903  if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3904  BLOCK_REASSIGNED)))
3905  {
3906  struct st_hash_link *UNINIT_VAR(next_hash_link);
3907  my_off_t UNINIT_VAR(next_diskpos);
3908  File UNINIT_VAR(next_file);
3909  uint UNINIT_VAR(next_status);
3910  uint UNINIT_VAR(hash_requests);
3911 
3912  total_found++;
3913  found++;
3914  KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
3915 
3916  /*
3917  Register a request. This unlinks the block from the LRU
3918  ring and protects it against eviction. This is required
3919  by free_block().
3920  */
3921  reg_requests(keycache, block, 1);
3922 
3923  /*
3924  free_block() may need to wait for readers of the block.
3925  This is the moment where the other thread can move the
3926  'next' block from the chain. free_block() needs to wait
3927  if there are requests for the block pending.
3928  */
3929  if (next && (hash_requests= block->hash_link->requests))
3930  {
3931  /* Copy values from the 'next' block and its hash_link. */
3932  next_status= next->status;
3933  next_hash_link= next->hash_link;
3934  next_diskpos= next_hash_link->diskpos;
3935  next_file= next_hash_link->file;
3936  DBUG_ASSERT(next == next_hash_link->block);
3937  }
3938 
3939  free_block(keycache, block);
3940  /*
3941  If we had to wait and the state of the 'next' block
3942  changed, break the inner loop. 'next' may no longer be
3943  part of the current chain.
3944 
3945  We do not want to break the loop after every free_block(),
3946  not even only after waits. The chain might be quite long
3947  and contain blocks for many files. Traversing it again and
3948  again to find more blocks for this file could become quite
3949  inefficient.
3950  */
3951  if (next && hash_requests &&
3952  ((next_status != next->status) ||
3953  (next_hash_link != next->hash_link) ||
3954  (next_file != next_hash_link->file) ||
3955  (next_diskpos != next_hash_link->diskpos) ||
3956  (next != next_hash_link->block)))
3957  break;
3958  }
3959  else
3960  {
3961  last_in_switch= block;
3962  }
3963  }
3964  } /* end for block in file_blocks */
3965  } while (found);
3966 
3967  /*
3968  If any clean block has been found, we may have waited for it to
3969  become free. In this case it could be possible that another clean
3970  block became dirty. This is possible if the write request existed
3971  before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3972  */
3973  if (total_found)
3974  goto restart;
3975 
3976  /*
3977  To avoid an infinite loop, wait until one of the blocks marked
3978  for update is updated.
3979  */
3980  if (last_for_update)
3981  {
3982  /* We did not wait. Block must not have changed status. */
3983  DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE);
3984  wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3985  &keycache->cache_lock);
3986  goto restart;
3987  }
3988 
3989  /*
3990  To avoid an infinite loop wait until one of the blocks marked
3991  for eviction is switched.
3992  */
3993  if (last_in_switch)
3994  {
3995  /* We did not wait. Block must not have changed status. */
3996  DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION |
3997  BLOCK_IN_SWITCH |
3998  BLOCK_REASSIGNED));
3999  wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
4000  &keycache->cache_lock);
4001  goto restart;
4002  }
4003 
4004  } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
4005 
4006  } /* if (keycache->disk_blocks > 0 */
4007 
4008 #ifndef DBUG_OFF
4009  DBUG_EXECUTE("check_keycache",
4010  test_key_cache(keycache, "end of flush_key_blocks", 0););
4011 #endif
4012 err:
4013  if (cache != cache_buff)
4014  my_free(cache);
4015  if (last_errno)
4016  errno=last_errno; /* Return first error */
4017  DBUG_RETURN(last_errno != 0);
4018 }
4019 
4020 
4021 /*
4022  Flush all blocks for a file to disk
4023 
4024  SYNOPSIS
4025 
4026  flush_key_blocks()
4027  keycache pointer to a key cache data structure
4028  file handler for the file to flush to
4029  flush_type type of the flush
4030 
4031  RETURN
4032  0 ok
4033  1 error
4034 */
4035 
4036 int flush_key_blocks(KEY_CACHE *keycache,
4037  File file, enum flush_type type)
4038 {
4039  int res= 0;
4040  DBUG_ENTER("flush_key_blocks");
4041  DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache));
4042 
4043  if (!keycache->key_cache_inited)
4044  DBUG_RETURN(0);
4045 
4046  keycache_pthread_mutex_lock(&keycache->cache_lock);
4047  /* While waiting for lock, keycache could have been ended. */
4048  if (keycache->disk_blocks > 0)
4049  {
4050  inc_counter_for_resize_op(keycache);
4051  res= flush_key_blocks_int(keycache, file, type);
4052  dec_counter_for_resize_op(keycache);
4053  }
4054  keycache_pthread_mutex_unlock(&keycache->cache_lock);
4055  DBUG_RETURN(res);
4056 }
4057 
4058 
4059 /*
4060  Flush all blocks in the key cache to disk.
4061 
4062  SYNOPSIS
4063  flush_all_key_blocks()
4064  keycache pointer to key cache root structure
4065 
4066  DESCRIPTION
4067 
4068  Flushing of the whole key cache is done in two phases.
4069 
4070  1. Flush all changed blocks, waiting for them if necessary. Loop
4071  until there is no changed block left in the cache.
4072 
4073  2. Free all clean blocks. Normally this means free all blocks. The
4074  changed blocks were flushed in phase 1 and became clean. However we
4075  may need to wait for blocks that are read by other threads. While we
4076  wait, a clean block could become changed if that operation started
4077  before the resize operation started. To be safe we must restart at
4078  phase 1.
4079 
4080  When we can run through the changed_blocks and file_blocks hashes
4081  without finding a block any more, then we are done.
4082 
4083  Note that we hold keycache->cache_lock all the time unless we need
4084  to wait for something.
4085 
4086  RETURN
4087  0 OK
4088  != 0 Error
4089 */
4090 
4091 static int flush_all_key_blocks(KEY_CACHE *keycache)
4092 {
4093  BLOCK_LINK *block;
4094  uint total_found;
4095  uint found;
4096  uint idx;
4097  DBUG_ENTER("flush_all_key_blocks");
4098 
4099  do
4100  {
4101  mysql_mutex_assert_owner(&keycache->cache_lock);
4102  total_found= 0;
4103 
4104  /*
4105  Phase1: Flush all changed blocks, waiting for them if necessary.
4106  Loop until there is no changed block left in the cache.
4107  */
4108  do
4109  {
4110  found= 0;
4111  /* Step over the whole changed_blocks hash array. */
4112  for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4113  {
4114  /*
4115  If an array element is non-empty, use the first block from its
4116  chain to find a file for flush. All changed blocks for this
4117  file are flushed. So the same block will not appear at this
4118  place again with the next iteration. New writes for blocks are
4119  not accepted during the flush. If multiple files share the
4120  same hash bucket, one of them will be flushed per iteration
4121  of the outer loop of phase 1.
4122  */
4123  if ((block= keycache->changed_blocks[idx]))
4124  {
4125  found++;
4126  /*
4127  Flush dirty blocks but do not free them yet. They can be used
4128  for reading until all other blocks are flushed too.
4129  */
4130  if (flush_key_blocks_int(keycache, block->hash_link->file,
4131  FLUSH_FORCE_WRITE))
4132  DBUG_RETURN(1);
4133  }
4134  }
4135 
4136  } while (found);
4137 
4138  /*
4139  Phase 2: Free all clean blocks. Normally this means free all
4140  blocks. The changed blocks were flushed in phase 1 and became
4141  clean. However we may need to wait for blocks that are read by
4142  other threads. While we wait, a clean block could become changed
4143  if that operation started before the resize operation started. To
4144  be safe we must restart at phase 1.
4145  */
4146  do
4147  {
4148  found= 0;
4149  /* Step over the whole file_blocks hash array. */
4150  for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4151  {
4152  /*
4153  If an array element is non-empty, use the first block from its
4154  chain to find a file for flush. All blocks for this file are
4155  freed. So the same block will not appear at this place again
4156  with the next iteration. If multiple files share the
4157  same hash bucket, one of them will be flushed per iteration
4158  of the outer loop of phase 2.
4159  */
4160  if ((block= keycache->file_blocks[idx]))
4161  {
4162  total_found++;
4163  found++;
4164  if (flush_key_blocks_int(keycache, block->hash_link->file,
4165  FLUSH_RELEASE))
4166  DBUG_RETURN(1);
4167  }
4168  }
4169 
4170  } while (found);
4171 
4172  /*
4173  If any clean block has been found, we may have waited for it to
4174  become free. In this case it could be possible that another clean
4175  block became dirty. This is possible if the write request existed
4176  before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
4177  */
4178  } while (total_found);
4179 
4180 #ifndef DBUG_OFF
4181  /* Now there should not exist any block any more. */
4182  for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4183  {
4184  DBUG_ASSERT(!keycache->changed_blocks[idx]);
4185  DBUG_ASSERT(!keycache->file_blocks[idx]);
4186  }
4187 #endif
4188 
4189  DBUG_RETURN(0);
4190 }
4191 
4192 
4193 /*
4194  Reset the counters of a key cache.
4195 
4196  SYNOPSIS
4197  reset_key_cache_counters()
4198  name the name of a key cache
4199  key_cache pointer to the key kache to be reset
4200 
4201  DESCRIPTION
4202  This procedure is used by process_key_caches() to reset the counters of all
4203  currently used key caches, both the default one and the named ones.
4204 
4205  RETURN
4206  0 on success (always because it can't fail)
4207 */
4208 
4209 int reset_key_cache_counters(const char *name __attribute__((unused)),
4210  KEY_CACHE *key_cache)
4211 {
4212  DBUG_ENTER("reset_key_cache_counters");
4213  if (!key_cache->key_cache_inited)
4214  {
4215  DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4216  DBUG_RETURN(0);
4217  }
4218  DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4219 
4220  key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4221  key_cache->global_cache_r_requests= 0; /* Key_read_requests */
4222  key_cache->global_cache_read= 0; /* Key_reads */
4223  key_cache->global_cache_w_requests= 0; /* Key_write_requests */
4224  key_cache->global_cache_write= 0; /* Key_writes */
4225  DBUG_RETURN(0);
4226 }
4227 
4228 
4229 #ifndef DBUG_OFF
4230 /*
4231  Test if disk-cache is ok
4232 */
4233 static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)),
4234  const char *where __attribute__((unused)),
4235  my_bool lock __attribute__((unused)))
4236 {
4237  /* TODO */
4238 }
4239 #endif
4240 
4241 #if defined(KEYCACHE_TIMEOUT)
4242 
4243 #define KEYCACHE_DUMP_FILE "keycache_dump.txt"
4244 #define MAX_QUEUE_LEN 100
4245 
4246 
4247 static void keycache_dump(KEY_CACHE *keycache)
4248 {
4249  FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4250  struct st_my_thread_var *last;
4251  struct st_my_thread_var *thread;
4252  BLOCK_LINK *block;
4253  HASH_LINK *hash_link;
4255  uint i;
4256 
4257  fprintf(keycache_dump_file, "thread:%u\n", thread->id);
4258 
4259  i=0;
4260  thread=last=waiting_for_hash_link.last_thread;
4261  fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
4262  if (thread)
4263  do
4264  {
4265  thread=thread->next;
4266  page= (KEYCACHE_PAGE *) thread->opt_info;
4267  fprintf(keycache_dump_file,
4268  "thread:%u, (file,filepos)=(%u,%lu)\n",
4269  thread->id,(uint) page->file,(ulong) page->filepos);
4270  if (++i == MAX_QUEUE_LEN)
4271  break;
4272  }
4273  while (thread != last);
4274 
4275  i=0;
4276  thread=last=waiting_for_block.last_thread;
4277  fprintf(keycache_dump_file, "queue of threads waiting for block\n");
4278  if (thread)
4279  do
4280  {
4281  thread=thread->next;
4282  hash_link= (HASH_LINK *) thread->opt_info;
4283  fprintf(keycache_dump_file,
4284  "thread:%u hash_link:%u (file,filepos)=(%u,%lu)\n",
4285  thread->id, (uint) HASH_LINK_NUMBER(hash_link),
4286  (uint) hash_link->file,(ulong) hash_link->diskpos);
4287  if (++i == MAX_QUEUE_LEN)
4288  break;
4289  }
4290  while (thread != last);
4291 
4292  for (i=0 ; i< keycache->blocks_used ; i++)
4293  {
4294  int j;
4295  block= &keycache->block_root[i];
4296  hash_link= block->hash_link;
4297  fprintf(keycache_dump_file,
4298  "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4299  i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
4300  block->status, block->requests, block->condvar ? 1 : 0);
4301  for (j=0 ; j < 2; j++)
4302  {
4303  KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
4304  thread= last= wqueue->last_thread;
4305  fprintf(keycache_dump_file, "queue #%d\n", j);
4306  if (thread)
4307  {
4308  do
4309  {
4310  thread=thread->next;
4311  fprintf(keycache_dump_file,
4312  "thread:%u\n", thread->id);
4313  if (++i == MAX_QUEUE_LEN)
4314  break;
4315  }
4316  while (thread != last);
4317  }
4318  }
4319  }
4320  fprintf(keycache_dump_file, "LRU chain:");
4321  block= keycache= used_last;
4322  if (block)
4323  {
4324  do
4325  {
4326  block= block->next_used;
4327  fprintf(keycache_dump_file,
4328  "block:%u, ", BLOCK_NUMBER(block));
4329  }
4330  while (block != keycache->used_last);
4331  }
4332  fprintf(keycache_dump_file, "\n");
4333 
4334  fclose(keycache_dump_file);
4335 }
4336 
4337 #endif /* defined(KEYCACHE_TIMEOUT) */
4338 
4339 #if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
4340 
4341 
4342 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4343  mysql_mutex_t *mutex)
4344 {
4345  int rc;
4346  struct timeval now; /* time when we started waiting */
4347  struct timespec timeout; /* timeout value for the wait function */
4348  struct timezone tz;
4349 #if defined(KEYCACHE_DEBUG)
4350  int cnt=0;
4351 #endif
4352 
4353  /* Get current time */
4354  gettimeofday(&now, &tz);
4355  /* Prepare timeout value */
4356  timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
4357  /*
4358  timeval uses microseconds.
4359  timespec uses nanoseconds.
4360  1 nanosecond = 1000 micro seconds
4361  */
4362  timeout.tv_nsec= now.tv_usec * 1000;
4363  KEYCACHE_THREAD_TRACE_END("started waiting");
4364 #if defined(KEYCACHE_DEBUG)
4365  cnt++;
4366  if (cnt % 100 == 0)
4367  fprintf(keycache_debug_log, "waiting...\n");
4368  fflush(keycache_debug_log);
4369 #endif
4370  rc= mysql_cond_timedwait(cond, mutex, &timeout);
4371  KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4372  if (rc == ETIMEDOUT || rc == ETIME)
4373  {
4374 #if defined(KEYCACHE_DEBUG)
4375  fprintf(keycache_debug_log,"aborted by keycache timeout\n");
4376  fclose(keycache_debug_log);
4377  abort();
4378 #endif
4379  keycache_dump();
4380  }
4381 
4382 #if defined(KEYCACHE_DEBUG)
4383  KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
4384 #else
4385  assert(rc != ETIMEDOUT);
4386 #endif
4387  return rc;
4388 }
4389 #else
4390 #if defined(KEYCACHE_DEBUG)
4391 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4392  mysql_mutex_t *mutex)
4393 {
4394  int rc;
4395  KEYCACHE_THREAD_TRACE_END("started waiting");
4396  rc= mysql_cond_wait(cond, mutex);
4397  KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4398  return rc;
4399 }
4400 #endif
4401 #endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
4402 
4403 #if defined(KEYCACHE_DEBUG)
4404 
4405 
4406 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex)
4407 {
4408  int rc;
4409  rc= mysql_mutex_lock(mutex);
4410  KEYCACHE_THREAD_TRACE_BEGIN("");
4411  return rc;
4412 }
4413 
4414 
4415 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex)
4416 {
4417  KEYCACHE_THREAD_TRACE_END("");
4418  mysql_mutex_unlock(mutex);
4419 }
4420 
4421 
4422 static int keycache_pthread_cond_signal(mysql_cond_t *cond)
4423 {
4424  int rc;
4425  KEYCACHE_THREAD_TRACE("signal");
4426  rc= mysql_cond_signal(cond);
4427  return rc;
4428 }
4429 
4430 
4431 #if defined(KEYCACHE_DEBUG_LOG)
4432 
4433 
4434 static void keycache_debug_print(const char * fmt,...)
4435 {
4436  va_list args;
4437  va_start(args,fmt);
4438  if (keycache_debug_log)
4439  {
4440  (void) vfprintf(keycache_debug_log, fmt, args);
4441  (void) fputc('\n',keycache_debug_log);
4442  }
4443  va_end(args);
4444 }
4445 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4446 
4447 #if defined(KEYCACHE_DEBUG_LOG)
4448 
4449 
4450 void keycache_debug_log_close(void)
4451 {
4452  if (keycache_debug_log)
4453  fclose(keycache_debug_log);
4454 }
4455 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4456 
4457 #endif /* defined(KEYCACHE_DEBUG) */
4458 
4459 #if !defined(DBUG_OFF)
4460 #define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4461 
4462 static int fail_block(BLOCK_LINK *block)
4463 {
4464  F_B_PRT("block->next_used: %lx\n", (ulong) block->next_used);
4465  F_B_PRT("block->prev_used: %lx\n", (ulong) block->prev_used);
4466  F_B_PRT("block->next_changed: %lx\n", (ulong) block->next_changed);
4467  F_B_PRT("block->prev_changed: %lx\n", (ulong) block->prev_changed);
4468  F_B_PRT("block->hash_link: %lx\n", (ulong) block->hash_link);
4469  F_B_PRT("block->status: %u\n", block->status);
4470  F_B_PRT("block->length: %u\n", block->length);
4471  F_B_PRT("block->offset: %u\n", block->offset);
4472  F_B_PRT("block->requests: %u\n", block->requests);
4473  F_B_PRT("block->temperature: %u\n", block->temperature);
4474  return 0; /* Let the assert fail. */
4475 }
4476 
4477 static int fail_hlink(HASH_LINK *hlink)
4478 {
4479  F_B_PRT("hlink->next: %lx\n", (ulong) hlink->next);
4480  F_B_PRT("hlink->prev: %lx\n", (ulong) hlink->prev);
4481  F_B_PRT("hlink->block: %lx\n", (ulong) hlink->block);
4482  F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4483  F_B_PRT("hlink->file: %d\n", hlink->file);
4484  return 0; /* Let the assert fail. */
4485 }
4486 
4487 static int cache_empty(KEY_CACHE *keycache)
4488 {
4489  int errcnt= 0;
4490  int idx;
4491  if (keycache->disk_blocks <= 0)
4492  return 1;
4493  for (idx= 0; idx < keycache->disk_blocks; idx++)
4494  {
4495  BLOCK_LINK *block= keycache->block_root + idx;
4496  if (block->status || block->requests || block->hash_link)
4497  {
4498  fprintf(stderr, "block index: %u\n", idx);
4499  fail_block(block);
4500  errcnt++;
4501  }
4502  }
4503  for (idx= 0; idx < keycache->hash_links; idx++)
4504  {
4505  HASH_LINK *hash_link= keycache->hash_link_root + idx;
4506  if (hash_link->requests || hash_link->block)
4507  {
4508  fprintf(stderr, "hash_link index: %u\n", idx);
4509  fail_hlink(hash_link);
4510  errcnt++;
4511  }
4512  }
4513  if (errcnt)
4514  {
4515  fprintf(stderr, "blocks: %d used: %lu\n",
4516  keycache->disk_blocks, keycache->blocks_used);
4517  fprintf(stderr, "hash_links: %d used: %d\n",
4518  keycache->hash_links, keycache->hash_links_used);
4519  fprintf(stderr, "\n");
4520  }
4521  return !errcnt;
4522 }
4523 #endif
4524