MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
log0log.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2009, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; version 2 of the License.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc.,
22 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
23 
24 *****************************************************************************/
25 
26 /**************************************************/
33 #include "log0log.h"
34 
35 #ifdef UNIV_NONINL
36 #include "log0log.ic"
37 #endif
38 
39 #ifndef UNIV_HOTBACKUP
40 #include "mem0mem.h"
41 #include "buf0buf.h"
42 #include "buf0flu.h"
43 #include "srv0srv.h"
44 #include "log0recv.h"
45 #include "fil0fil.h"
46 #include "dict0boot.h"
47 #include "srv0srv.h"
48 #include "srv0start.h"
49 #include "trx0sys.h"
50 #include "trx0trx.h"
51 #include "srv0mon.h"
52 
53 /*
54 General philosophy of InnoDB redo-logs:
55 
56 1) Every change to a contents of a data page must be done
57 through mtr, which in mtr_commit() writes log records
58 to the InnoDB redo log.
59 
60 2) Normally these changes are performed using a mlog_write_ulint()
61 or similar function.
62 
63 3) In some page level operations only a code number of a
64 c-function and its parameters are written to the log to
65 reduce the size of the log.
66 
67  3a) You should not add parameters to these kind of functions
68  (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
69 
70  3b) You should not add such functionality which either change
71  working when compared with the old or are dependent on data
72  outside of the page. These kind of functions should implement
73  self-contained page transformation and it should be unchanged
74  if you don't have very essential reasons to change log
75  semantics or format.
76 
77 */
78 
79 /* Global log system variable */
80 UNIV_INTERN log_t* log_sys = NULL;
81 
82 #ifdef UNIV_PFS_RWLOCK
83 UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key;
84 # ifdef UNIV_LOG_ARCHIVE
85 UNIV_INTERN mysql_pfs_key_t archive_lock_key;
86 # endif
87 #endif /* UNIV_PFS_RWLOCK */
88 
89 #ifdef UNIV_PFS_MUTEX
90 UNIV_INTERN mysql_pfs_key_t log_sys_mutex_key;
91 UNIV_INTERN mysql_pfs_key_t log_flush_order_mutex_key;
92 #endif /* UNIV_PFS_MUTEX */
93 
94 #ifdef UNIV_DEBUG
95 UNIV_INTERN ibool log_do_write = TRUE;
96 #endif /* UNIV_DEBUG */
97 
98 /* These control how often we print warnings if the last checkpoint is too
99 old */
100 UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
101 UNIV_INTERN time_t log_last_warning_time;
102 
103 #ifdef UNIV_LOG_ARCHIVE
104 /* Pointer to this variable is used as the i/o-message when we do i/o to an
105 archive */
106 UNIV_INTERN byte log_archive_io;
107 #endif /* UNIV_LOG_ARCHIVE */
108 
109 /* A margin for free space in the log buffer before a log entry is catenated */
110 #define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
111 
112 /* Margins for free space in the log buffer after a log entry is catenated */
113 #define LOG_BUF_FLUSH_RATIO 2
114 #define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
115 
116 /* Margin for the free space in the smallest log group, before a new query
117 step which modifies the database, is started */
118 
119 #define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
120 #define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
121 
122 /* This parameter controls asynchronous making of a new checkpoint; the value
123 should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
124 
125 #define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
126 
127 /* This parameter controls synchronous preflushing of modified buffer pages */
128 #define LOG_POOL_PREFLUSH_RATIO_SYNC 16
129 
130 /* The same ratio for asynchronous preflushing; this value should be less than
131 the previous */
132 #define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
133 
134 /* Extra margin, in addition to one log file, used in archiving */
135 #define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
136 
137 /* This parameter controls asynchronous writing to the archive */
138 #define LOG_ARCHIVE_RATIO_ASYNC 16
139 
140 /* Codes used in unlocking flush latches */
141 #define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
142 #define LOG_UNLOCK_FLUSH_LOCK 2
143 
144 /* States of an archiving operation */
145 #define LOG_ARCHIVE_READ 1
146 #define LOG_ARCHIVE_WRITE 2
147 
148 /******************************************************/
150 static
151 void
152 log_io_complete_checkpoint(void);
153 /*============================*/
154 #ifdef UNIV_LOG_ARCHIVE
155 /******************************************************/
157 static
158 void
159 log_io_complete_archive(void);
160 /*=========================*/
161 #endif /* UNIV_LOG_ARCHIVE */
162 
163 /****************************************************************/
167 static
168 lsn_t
169 log_buf_pool_get_oldest_modification(void)
170 /*======================================*/
171 {
172  lsn_t lsn;
173 
174  ut_ad(mutex_own(&(log_sys->mutex)));
175 
177 
178  if (!lsn) {
179 
180  lsn = log_sys->lsn;
181  }
182 
183  return(lsn);
184 }
185 
186 /************************************************************/
190 UNIV_INTERN
191 lsn_t
193 /*=================*/
194  ulint len)
195 {
196  log_t* log = log_sys;
197  ulint len_upper_limit;
198 #ifdef UNIV_LOG_ARCHIVE
199  ulint archived_lsn_age;
200  ulint dummy;
201 #endif /* UNIV_LOG_ARCHIVE */
202 #ifdef UNIV_DEBUG
203  ulint count = 0;
204 #endif /* UNIV_DEBUG */
205 
206  ut_a(len < log->buf_size / 2);
207 loop:
208  mutex_enter(&(log->mutex));
209  ut_ad(!recv_no_log_write);
210 
211  /* Calculate an upper limit for the space the string may take in the
212  log buffer */
213 
214  len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
215 
216  if (log->buf_free + len_upper_limit > log->buf_size) {
217 
218  mutex_exit(&(log->mutex));
219 
220  /* Not enough free space, do a syncronous flush of the log
221  buffer */
222 
224 
226 
227  ut_ad(++count < 50);
228 
229  goto loop;
230  }
231 
232 #ifdef UNIV_LOG_ARCHIVE
233  if (log->archiving_state != LOG_ARCH_OFF) {
234 
235  archived_lsn_age = log->lsn - log->archived_lsn;
236  if (archived_lsn_age + len_upper_limit
237  > log->max_archived_lsn_age) {
238  /* Not enough free archived space in log groups: do a
239  synchronous archive write batch: */
240 
241  mutex_exit(&(log->mutex));
242 
243  ut_ad(len_upper_limit <= log->max_archived_lsn_age);
244 
245  log_archive_do(TRUE, &dummy);
246 
247  ut_ad(++count < 50);
248 
249  goto loop;
250  }
251  }
252 #endif /* UNIV_LOG_ARCHIVE */
253 
254 #ifdef UNIV_LOG_DEBUG
255  log->old_buf_free = log->buf_free;
256  log->old_lsn = log->lsn;
257 #endif
258  return(log->lsn);
259 }
260 
261 /************************************************************/
264 UNIV_INTERN
265 void
267 /*==========*/
268  byte* str,
269  ulint str_len)
270 {
271  log_t* log = log_sys;
272  ulint len;
273  ulint data_len;
274  byte* log_block;
275 
276  ut_ad(mutex_own(&(log->mutex)));
277 part_loop:
278  ut_ad(!recv_no_log_write);
279  /* Calculate a part length */
280 
281  data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
282 
283  if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
284 
285  /* The string fits within the current log block */
286 
287  len = str_len;
288  } else {
289  data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
290 
293  - LOG_BLOCK_TRL_SIZE;
294  }
295 
296  ut_memcpy(log->buf + log->buf_free, str, len);
297 
298  str_len -= len;
299  str = str + len;
300 
301  log_block = static_cast<byte*>(
303  log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
304 
305  log_block_set_data_len(log_block, data_len);
306 
307  if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
308  /* This block became full */
310  log_block_set_checkpoint_no(log_block,
311  log_sys->next_checkpoint_no);
312  len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
313 
314  log->lsn += len;
315 
316  /* Initialize the next block header */
317  log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
318  } else {
319  log->lsn += len;
320  }
321 
322  log->buf_free += len;
323 
324  ut_ad(log->buf_free <= log->buf_size);
325 
326  if (str_len > 0) {
327  goto part_loop;
328  }
329 
331 }
332 
333 /************************************************************/
336 UNIV_INTERN
337 lsn_t
339 /*===========*/
340 {
341  byte* log_block;
342  ulint first_rec_group;
343  lsn_t oldest_lsn;
344  lsn_t lsn;
345  log_t* log = log_sys;
346  lsn_t checkpoint_age;
347 
348  ut_ad(mutex_own(&(log->mutex)));
349  ut_ad(!recv_no_log_write);
350 
351  lsn = log->lsn;
352 
353  log_block = static_cast<byte*>(
355  log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
356 
357  first_rec_group = log_block_get_first_rec_group(log_block);
358 
359  if (first_rec_group == 0) {
360  /* We initialized a new log block which was not written
361  full by the current mtr: the next mtr log record group
362  will start within this block at the offset data_len */
363 
365  log_block, log_block_get_data_len(log_block));
366  }
367 
368  if (log->buf_free > log->max_buf_free) {
369 
370  log->check_flush_or_checkpoint = TRUE;
371  }
372 
373  checkpoint_age = lsn - log->last_checkpoint_lsn;
374 
375  if (checkpoint_age >= log->log_group_capacity) {
376  /* TODO: split btr_store_big_rec_extern_fields() into small
377  steps so that we can release all latches in the middle, and
378  call log_free_check() to ensure we never write over log written
379  after the latest checkpoint. In principle, we should split all
380  big_rec operations, but other operations are smaller. */
381 
382  if (!log_has_printed_chkp_warning
383  || difftime(time(NULL), log_last_warning_time) > 15) {
384 
385  log_has_printed_chkp_warning = TRUE;
386  log_last_warning_time = time(NULL);
387 
388  ut_print_timestamp(stderr);
389  fprintf(stderr,
390  " InnoDB: ERROR: the age of the last"
391  " checkpoint is " LSN_PF ",\n"
392  "InnoDB: which exceeds the log group"
393  " capacity " LSN_PF ".\n"
394  "InnoDB: If you are using big"
395  " BLOB or TEXT rows, you must set the\n"
396  "InnoDB: combined size of log files"
397  " at least 10 times bigger than the\n"
398  "InnoDB: largest such row.\n",
399  checkpoint_age,
400  log->log_group_capacity);
401  }
402  }
403 
404  if (checkpoint_age <= log->max_modified_age_sync) {
405 
406  goto function_exit;
407  }
408 
409  oldest_lsn = buf_pool_get_oldest_modification();
410 
411  if (!oldest_lsn
412  || lsn - oldest_lsn > log->max_modified_age_sync
413  || checkpoint_age > log->max_checkpoint_age_async) {
414 
415  log->check_flush_or_checkpoint = TRUE;
416  }
417 function_exit:
418 
419 #ifdef UNIV_LOG_DEBUG
420  log_check_log_recs(log->buf + log->old_buf_free,
421  log->buf_free - log->old_buf_free, log->old_lsn);
422 #endif
423 
424  return(lsn);
425 }
426 
427 #ifdef UNIV_LOG_ARCHIVE
428 /******************************************************/
431 static
432 void
433 log_pad_current_log_block(void)
434 /*===========================*/
435 {
436  byte b = MLOG_DUMMY_RECORD;
437  ulint pad_length;
438  ulint i;
439  ib_uint64_t lsn;
440 
441  /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
443 
444  pad_length = OS_FILE_LOG_BLOCK_SIZE
445  - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
446  - LOG_BLOCK_TRL_SIZE;
447 
448  for (i = 0; i < pad_length; i++) {
449  log_write_low(&b, 1);
450  }
451 
452  lsn = log_sys->lsn;
453 
454  log_close();
455  log_release();
456 
457  ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
458 }
459 #endif /* UNIV_LOG_ARCHIVE */
460 
461 /******************************************************/
465 UNIV_INTERN
466 lsn_t
468 /*===================*/
469  const log_group_t* group)
470 {
471  ut_ad(mutex_own(&(log_sys->mutex)));
472 
473  return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
474 }
475 
476 /******************************************************/
480 UNIV_INLINE
481 lsn_t
483 /*=======================*/
484  lsn_t offset,
486  const log_group_t* group)
487 {
488  ut_ad(mutex_own(&(log_sys->mutex)));
489 
490  return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
491 }
492 
493 /******************************************************/
497 UNIV_INLINE
498 lsn_t
500 /*=======================*/
501  lsn_t offset,
503  const log_group_t* group)
504 {
505  ut_ad(mutex_own(&(log_sys->mutex)));
506 
507  return(offset + LOG_FILE_HDR_SIZE
508  * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
509 }
510 
511 /******************************************************/
514 static
515 lsn_t
516 log_group_calc_lsn_offset(
517 /*======================*/
518  lsn_t lsn,
519  const log_group_t* group)
520 {
521  lsn_t gr_lsn;
522  lsn_t gr_lsn_size_offset;
523  lsn_t difference;
524  lsn_t group_size;
525  lsn_t offset;
526 
527  ut_ad(mutex_own(&(log_sys->mutex)));
528 
529  gr_lsn = group->lsn;
530 
531  gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group);
532 
533  group_size = log_group_get_capacity(group);
534 
535  if (lsn >= gr_lsn) {
536 
537  difference = lsn - gr_lsn;
538  } else {
539  difference = gr_lsn - lsn;
540 
541  difference = difference % group_size;
542 
543  difference = group_size - difference;
544  }
545 
546  offset = (gr_lsn_size_offset + difference) % group_size;
547 
548  /* fprintf(stderr,
549  "Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
550  " difference is " LSN_PF "\n",
551  offset, gr_lsn_size_offset, difference);
552  */
553 
554  return(log_group_calc_real_offset(offset, group));
555 }
556 #endif /* !UNIV_HOTBACKUP */
557 
558 #ifdef UNIV_DEBUG
559 UNIV_INTERN ibool log_debug_writes = FALSE;
560 #endif /* UNIV_DEBUG */
561 
562 /*******************************************************************/
565 UNIV_INTERN
566 ulint
568 /*==================*/
569  ib_int64_t* log_file_offset,
571  ib_uint64_t first_header_lsn,
573  ib_uint64_t lsn,
575  ulint n_log_files,
577  ib_int64_t log_file_size)
579 {
580  ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE;
581  ulint file_no;
582  ib_int64_t add_this_many;
583 
584  if (lsn < first_header_lsn) {
585  add_this_many = 1 + (first_header_lsn - lsn)
586  / (capacity * (ib_int64_t) n_log_files);
587  lsn += add_this_many
588  * capacity * (ib_int64_t) n_log_files;
589  }
590 
591  ut_a(lsn >= first_header_lsn);
592 
593  file_no = ((ulint)((lsn - first_header_lsn) / capacity))
594  % n_log_files;
595  *log_file_offset = (lsn - first_header_lsn) % capacity;
596 
597  *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
598 
599  return(file_no);
600 }
601 
602 #ifndef UNIV_HOTBACKUP
603 /********************************************************/
607 UNIV_INTERN
608 void
610 /*=================*/
611  log_group_t* group,
612  lsn_t lsn)
614 {
615  group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
616  group->lsn = lsn;
617 }
618 
619 /*****************************************************************/
624 static
625 ibool
626 log_calc_max_ages(void)
627 /*===================*/
628 {
630  lsn_t margin;
631  ulint free;
632  ibool success = TRUE;
633  lsn_t smallest_capacity;
634  lsn_t archive_margin;
635  lsn_t smallest_archive_margin;
636 
637  mutex_enter(&(log_sys->mutex));
638 
639  group = UT_LIST_GET_FIRST(log_sys->log_groups);
640 
641  ut_ad(group);
642 
643  smallest_capacity = LSN_MAX;
644  smallest_archive_margin = LSN_MAX;
645 
646  while (group) {
647  if (log_group_get_capacity(group) < smallest_capacity) {
648 
649  smallest_capacity = log_group_get_capacity(group);
650  }
651 
652  archive_margin = log_group_get_capacity(group)
653  - (group->file_size - LOG_FILE_HDR_SIZE)
654  - LOG_ARCHIVE_EXTRA_MARGIN;
655 
656  if (archive_margin < smallest_archive_margin) {
657 
658  smallest_archive_margin = archive_margin;
659  }
660 
661  group = UT_LIST_GET_NEXT(log_groups, group);
662  }
663 
664  /* Add extra safety */
665  smallest_capacity = smallest_capacity - smallest_capacity / 10;
666 
667  /* For each OS thread we must reserve so much free space in the
668  smallest log group that it can accommodate the log entries produced
669  by single query steps: running out of free log space is a serious
670  system error which requires rebooting the database. */
671 
672  free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
673  + LOG_CHECKPOINT_EXTRA_FREE;
674  if (free >= smallest_capacity / 2) {
675  success = FALSE;
676 
677  goto failure;
678  } else {
679  margin = smallest_capacity - free;
680  }
681 
682  margin = margin - margin / 10; /* Add still some extra safety */
683 
684  log_sys->log_group_capacity = smallest_capacity;
685 
686  log_sys->max_modified_age_async = margin
687  - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
688  log_sys->max_modified_age_sync = margin
689  - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
690 
691  log_sys->max_checkpoint_age_async = margin - margin
692  / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
693  log_sys->max_checkpoint_age = margin;
694 
695 #ifdef UNIV_LOG_ARCHIVE
696  log_sys->max_archived_lsn_age = smallest_archive_margin;
697 
698  log_sys->max_archived_lsn_age_async = smallest_archive_margin
699  - smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
700 #endif /* UNIV_LOG_ARCHIVE */
701 failure:
702  mutex_exit(&(log_sys->mutex));
703 
704  if (!success) {
705  fprintf(stderr,
706  "InnoDB: Error: ib_logfiles are too small"
707  " for innodb_thread_concurrency %lu.\n"
708  "InnoDB: The combined size of ib_logfiles"
709  " should be bigger than\n"
710  "InnoDB: 200 kB * innodb_thread_concurrency.\n"
711  "InnoDB: To get mysqld to start up, set"
712  " innodb_thread_concurrency in my.cnf\n"
713  "InnoDB: to a lower value, for example, to 8."
714  " After an ERROR-FREE shutdown\n"
715  "InnoDB: of mysqld you can adjust the size of"
716  " ib_logfiles, as explained in\n"
717  "InnoDB: " REFMAN "adding-and-removing.html\n"
718  "InnoDB: Cannot continue operation."
719  " Calling exit(1).\n",
720  (ulong) srv_thread_concurrency);
721 
722  exit(1);
723  }
724 
725  return(success);
726 }
727 
728 /******************************************************/
730 UNIV_INTERN
731 void
732 log_init(void)
733 /*==========*/
734 {
735  log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t)));
736 
737  mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
738 
739  mutex_create(log_flush_order_mutex_key,
740  &log_sys->log_flush_order_mutex,
741  SYNC_LOG_FLUSH_ORDER);
742 
743  mutex_enter(&(log_sys->mutex));
744 
745  /* Start the lsn from one log block from zero: this way every
746  log record has a start lsn != zero, a fact which we will use */
747 
748  log_sys->lsn = LOG_START_LSN;
749 
750  ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
751  ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
752 
753  log_sys->buf_ptr = static_cast<byte*>(
754  mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
755 
756  log_sys->buf = static_cast<byte*>(
757  ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
758 
759  log_sys->buf_size = LOG_BUFFER_SIZE;
760 
761  log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
762  - LOG_BUF_FLUSH_MARGIN;
763  log_sys->check_flush_or_checkpoint = TRUE;
764  UT_LIST_INIT(log_sys->log_groups);
765 
766  log_sys->n_log_ios = 0;
767 
768  log_sys->n_log_ios_old = log_sys->n_log_ios;
769  log_sys->last_printout_time = time(NULL);
770  /*----------------------------*/
771 
772  log_sys->buf_next_to_write = 0;
773 
774  log_sys->write_lsn = 0;
775  log_sys->current_flush_lsn = 0;
776  log_sys->flushed_to_disk_lsn = 0;
777 
778  log_sys->written_to_some_lsn = log_sys->lsn;
779  log_sys->written_to_all_lsn = log_sys->lsn;
780 
781  log_sys->n_pending_writes = 0;
782 
783  log_sys->no_flush_event = os_event_create();
784 
785  os_event_set(log_sys->no_flush_event);
786 
787  log_sys->one_flushed_event = os_event_create();
788 
790 
791  /*----------------------------*/
792 
793  log_sys->next_checkpoint_no = 0;
794  log_sys->last_checkpoint_lsn = log_sys->lsn;
795  log_sys->n_pending_checkpoint_writes = 0;
796 
797 
798  rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
799  SYNC_NO_ORDER_CHECK);
800 
801  log_sys->checkpoint_buf_ptr = static_cast<byte*>(
803 
804  log_sys->checkpoint_buf = static_cast<byte*>(
805  ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
806 
807  /*----------------------------*/
808 
809 #ifdef UNIV_LOG_ARCHIVE
810  /* Under MySQL, log archiving is always off */
811  log_sys->archiving_state = LOG_ARCH_OFF;
812  log_sys->archived_lsn = log_sys->lsn;
813  log_sys->next_archived_lsn = 0;
814 
815  log_sys->n_pending_archive_ios = 0;
816 
817  rw_lock_create(archive_lock_key, &log_sys->archive_lock,
818  SYNC_NO_ORDER_CHECK);
819 
820  log_sys->archive_buf = NULL;
821 
822  /* ut_align(
823  ut_malloc(LOG_ARCHIVE_BUF_SIZE
824  + OS_FILE_LOG_BLOCK_SIZE),
825  OS_FILE_LOG_BLOCK_SIZE); */
826  log_sys->archive_buf_size = 0;
827 
828  /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
829 
830  log_sys->archiving_on = os_event_create();
831 #endif /* UNIV_LOG_ARCHIVE */
832 
833  /*----------------------------*/
834 
835  log_block_init(log_sys->buf, log_sys->lsn);
836  log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
837 
838  log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
839  log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
840 
841  MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
842  log_sys->lsn - log_sys->last_checkpoint_lsn);
843 
844  mutex_exit(&(log_sys->mutex));
845 
846 #ifdef UNIV_LOG_DEBUG
847  recv_sys_create();
849 
850  recv_sys->parse_start_lsn = log_sys->lsn;
851  recv_sys->scanned_lsn = log_sys->lsn;
853  recv_sys->recovered_lsn = log_sys->lsn;
854  recv_sys->limit_lsn = LSN_MAX;
855 #endif
856 }
857 
858 /******************************************************************/
860 UNIV_INTERN
861 void
863 /*===========*/
864  ulint id,
865  ulint n_files,
866  lsn_t file_size,
867  ulint space_id,
870  ulint archive_space_id __attribute__((unused)))
876 {
877  ulint i;
878 
880 
881  group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t)));
882 
883  group->id = id;
884  group->n_files = n_files;
885  group->file_size = file_size;
886  group->space_id = space_id;
887  group->state = LOG_GROUP_OK;
888  group->lsn = LOG_START_LSN;
889  group->lsn_offset = LOG_FILE_HDR_SIZE;
890  group->n_pending_writes = 0;
891 
892  group->file_header_bufs_ptr = static_cast<byte**>(
893  mem_zalloc(sizeof(byte*) * n_files));
894 
895  group->file_header_bufs = static_cast<byte**>(
896  mem_zalloc(sizeof(byte**) * n_files));
897 
898 #ifdef UNIV_LOG_ARCHIVE
899  group->archive_file_header_bufs_ptr = static_cast<byte*>(
900  mem_zalloc( sizeof(byte*) * n_files));
901 
902  group->archive_file_header_bufs = static_cast<byte*>(
903  mem_zalloc(sizeof(byte*) * n_files));
904 #endif /* UNIV_LOG_ARCHIVE */
905 
906  for (i = 0; i < n_files; i++) {
907  group->file_header_bufs_ptr[i] = static_cast<byte*>(
908  mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
909 
910  group->file_header_bufs[i] = static_cast<byte*>(
911  ut_align(group->file_header_bufs_ptr[i],
913 
914 #ifdef UNIV_LOG_ARCHIVE
915  group->archive_file_header_bufs_ptr[i] = static_cast<byte*>(
916  mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
917 
918  group->archive_file_header_bufs[i] = static_cast<byte*>(
919  ut_align(group->archive_file_header_bufs_ptr[i],
921 #endif /* UNIV_LOG_ARCHIVE */
922  }
923 
924 #ifdef UNIV_LOG_ARCHIVE
925  group->archive_space_id = archive_space_id;
926 
927  group->archived_file_no = 0;
928  group->archived_offset = 0;
929 #endif /* UNIV_LOG_ARCHIVE */
930 
931  group->checkpoint_buf_ptr = static_cast<byte*>(
933 
934  group->checkpoint_buf = static_cast<byte*>(
936 
937  UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
938 
939  ut_a(log_calc_max_ages());
940 }
941 
942 /******************************************************************/
944 UNIV_INLINE
945 void
947 /*=================*/
948  ulint code)
950 {
951  ut_ad(mutex_own(&(log_sys->mutex)));
952 
953  /* NOTE that we must own the log mutex when doing the setting of the
954  events: this is because transactions will wait for these events to
955  be set, and at that moment the log flush they were waiting for must
956  have ended. If the log mutex were not reserved here, the i/o-thread
957  calling this function might be preempted for a while, and when it
958  resumed execution, it might be that a new flush had been started, and
959  this function would erroneously signal the NEW flush as completed.
960  Thus, the changes in the state of these events are performed
961  atomically in conjunction with the changes in the state of
962  log_sys->n_pending_writes etc. */
963 
964  if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
966  }
967 
968  if (code & LOG_UNLOCK_FLUSH_LOCK) {
969  os_event_set(log_sys->no_flush_event);
970  }
971 }
972 
973 /******************************************************************/
977 UNIV_INLINE
978 ulint
980 /*=============================*/
981  log_group_t* group)
982 {
983  ut_ad(mutex_own(&(log_sys->mutex)));
984 
985  if (!log_sys->one_flushed && group->n_pending_writes == 0) {
986 #ifdef UNIV_DEBUG
987  if (log_debug_writes) {
988  fprintf(stderr,
989  "Log flushed first to group %lu\n",
990  (ulong) group->id);
991  }
992 #endif /* UNIV_DEBUG */
993  log_sys->written_to_some_lsn = log_sys->write_lsn;
994  log_sys->one_flushed = TRUE;
995 
996  return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
997  }
998 
999 #ifdef UNIV_DEBUG
1000  if (log_debug_writes && (group->n_pending_writes == 0)) {
1001 
1002  fprintf(stderr, "Log flushed to group %lu\n",
1003  (ulong) group->id);
1004  }
1005 #endif /* UNIV_DEBUG */
1006  return(0);
1007 }
1008 
1009 /******************************************************/
1012 static
1013 ulint
1014 log_sys_check_flush_completion(void)
1015 /*================================*/
1016 {
1017  ulint move_start;
1018  ulint move_end;
1019 
1020  ut_ad(mutex_own(&(log_sys->mutex)));
1021 
1022  if (log_sys->n_pending_writes == 0) {
1023 
1024  log_sys->written_to_all_lsn = log_sys->write_lsn;
1025  log_sys->buf_next_to_write = log_sys->write_end_offset;
1026 
1027  if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
1028  /* Move the log buffer content to the start of the
1029  buffer */
1030 
1031  move_start = ut_calc_align_down(
1032  log_sys->write_end_offset,
1034  move_end = ut_calc_align(log_sys->buf_free,
1036 
1037  ut_memmove(log_sys->buf, log_sys->buf + move_start,
1038  move_end - move_start);
1039  log_sys->buf_free -= move_start;
1040 
1041  log_sys->buf_next_to_write -= move_start;
1042  }
1043 
1044  return(LOG_UNLOCK_FLUSH_LOCK);
1045  }
1046 
1047  return(0);
1048 }
1049 
1050 /******************************************************/
1052 UNIV_INTERN
1053 void
1055 /*============*/
1056  log_group_t* group)
1057 {
1058  ulint unlock;
1059 
1060 #ifdef UNIV_LOG_ARCHIVE
1061  if ((byte*) group == &log_archive_io) {
1062  /* It was an archive write */
1063 
1064  log_io_complete_archive();
1065 
1066  return;
1067  }
1068 #endif /* UNIV_LOG_ARCHIVE */
1069 
1070  if ((ulint) group & 0x1UL) {
1071  /* It was a checkpoint write */
1072  group = (log_group_t*)((ulint) group - 1);
1073 
1074  if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
1075  && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
1076 
1077  fil_flush(group->space_id);
1078  }
1079 
1080 #ifdef UNIV_DEBUG
1081  if (log_debug_writes) {
1082  fprintf(stderr,
1083  "Checkpoint info written to group %lu\n",
1084  group->id);
1085  }
1086 #endif /* UNIV_DEBUG */
1087  log_io_complete_checkpoint();
1088 
1089  return;
1090  }
1091 
1092  ut_error;
1095  if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
1096  && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
1097  && srv_flush_log_at_trx_commit != 2) {
1098 
1099  fil_flush(group->space_id);
1100  }
1101 
1102  mutex_enter(&(log_sys->mutex));
1103  ut_ad(!recv_no_log_write);
1104 
1105  ut_a(group->n_pending_writes > 0);
1106  ut_a(log_sys->n_pending_writes > 0);
1107 
1108  group->n_pending_writes--;
1109  log_sys->n_pending_writes--;
1110  MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
1111 
1112  unlock = log_group_check_flush_completion(group);
1113  unlock = unlock | log_sys_check_flush_completion();
1114 
1115  log_flush_do_unlocks(unlock);
1116 
1117  mutex_exit(&(log_sys->mutex));
1118 }
1119 
1120 /******************************************************/
1122 static
1123 void
1124 log_group_file_header_flush(
1125 /*========================*/
1126  log_group_t* group,
1127  ulint nth_file,
1129  lsn_t start_lsn)
1131 {
1132  byte* buf;
1133  lsn_t dest_offset;
1134 
1135  ut_ad(mutex_own(&(log_sys->mutex)));
1136  ut_ad(!recv_no_log_write);
1137  ut_a(nth_file < group->n_files);
1138 
1139  buf = *(group->file_header_bufs + nth_file);
1140 
1141  mach_write_to_4(buf + LOG_GROUP_ID, group->id);
1142  mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
1143 
1144  /* Wipe over possible label of ibbackup --restore */
1145  memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
1146 
1147  dest_offset = nth_file * group->file_size;
1148 
1149 #ifdef UNIV_DEBUG
1150  if (log_debug_writes) {
1151  fprintf(stderr,
1152  "Writing log file header to group %lu file %lu\n",
1153  (ulong) group->id, (ulong) nth_file);
1154  }
1155 #endif /* UNIV_DEBUG */
1156  if (log_do_write) {
1157  log_sys->n_log_ios++;
1158 
1159  MONITOR_INC(MONITOR_LOG_IO);
1160 
1162 
1163  fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
1164  (ulint) (dest_offset / UNIV_PAGE_SIZE),
1165  (ulint) (dest_offset % UNIV_PAGE_SIZE),
1167  buf, group);
1168 
1170  }
1171 }
1172 
1173 /******************************************************/
1177 static
1178 void
1179 log_block_store_checksum(
1180 /*=====================*/
1181  byte* block)
1182 {
1184 }
1185 
1186 /******************************************************/
1188 UNIV_INTERN
1189 void
1191 /*================*/
1192  log_group_t* group,
1193  byte* buf,
1194  ulint len,
1196  lsn_t start_lsn,
1199  ulint new_data_offset)
1203 {
1204  ulint write_len;
1205  ibool write_header;
1206  lsn_t next_offset;
1207  ulint i;
1208 
1209  ut_ad(mutex_own(&(log_sys->mutex)));
1210  ut_ad(!recv_no_log_write);
1211  ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
1212  ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
1213 
1214  if (new_data_offset == 0) {
1215  write_header = TRUE;
1216  } else {
1217  write_header = FALSE;
1218  }
1219 loop:
1220  if (len == 0) {
1221 
1222  return;
1223  }
1224 
1225  next_offset = log_group_calc_lsn_offset(start_lsn, group);
1226 
1227  if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
1228  && write_header) {
1229  /* We start to write a new log file instance in the group */
1230 
1231  ut_a(next_offset / group->file_size <= ULINT_MAX);
1232 
1233  log_group_file_header_flush(group, (ulint)
1234  (next_offset / group->file_size),
1235  start_lsn);
1237 
1239  }
1240 
1241  if ((next_offset % group->file_size) + len > group->file_size) {
1242 
1243  /* if the above condition holds, then the below expression
1244  is < len which is ulint, so the typecast is ok */
1245  write_len = (ulint)
1246  (group->file_size - (next_offset % group->file_size));
1247  } else {
1248  write_len = len;
1249  }
1250 
1251 #ifdef UNIV_DEBUG
1252  if (log_debug_writes) {
1253 
1254  fprintf(stderr,
1255  "Writing log file segment to group %lu"
1256  " offset " LSN_PF " len %lu\n"
1257  "start lsn " LSN_PF "\n"
1258  "First block n:o %lu last block n:o %lu\n",
1259  (ulong) group->id, next_offset,
1260  write_len,
1261  start_lsn,
1262  (ulong) log_block_get_hdr_no(buf),
1263  (ulong) log_block_get_hdr_no(
1264  buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
1266  == log_block_convert_lsn_to_no(start_lsn));
1267 
1268  for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1269 
1270  ut_a(log_block_get_hdr_no(buf) + i
1272  buf + i * OS_FILE_LOG_BLOCK_SIZE));
1273  }
1274  }
1275 #endif /* UNIV_DEBUG */
1276  /* Calculate the checksums for each log block and write them to
1277  the trailer fields of the log blocks */
1278 
1279  for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1280  log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
1281  }
1282 
1283  if (log_do_write) {
1284  log_sys->n_log_ios++;
1285 
1286  MONITOR_INC(MONITOR_LOG_IO);
1287 
1289 
1290  ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
1291 
1292  fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
1293  (ulint) (next_offset / UNIV_PAGE_SIZE),
1294  (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
1295  group);
1296 
1298 
1299  srv_stats.os_log_written.add(write_len);
1301  }
1302 
1303  if (write_len < len) {
1304  start_lsn += write_len;
1305  len -= write_len;
1306  buf += write_len;
1307 
1308  write_header = TRUE;
1309 
1310  goto loop;
1311  }
1312 }
1313 
1314 /******************************************************/
1319 UNIV_INTERN
1320 void
1322 /*============*/
1323  lsn_t lsn,
1326  ulint wait,
1328  ibool flush_to_disk)
1331 {
1332  log_group_t* group;
1333  ulint start_offset;
1334  ulint end_offset;
1335  ulint area_start;
1336  ulint area_end;
1337 #ifdef UNIV_DEBUG
1338  ulint loop_count = 0;
1339 #endif /* UNIV_DEBUG */
1340  ulint unlock;
1341 
1343 
1345  /* Recovery is running and no operations on the log files are
1346  allowed yet (the variable name .._no_ibuf_.. is misleading) */
1347 
1348  return;
1349  }
1350 
1351 loop:
1352 #ifdef UNIV_DEBUG
1353  loop_count++;
1354 
1355  ut_ad(loop_count < 5);
1356 
1357 # if 0
1358  if (loop_count > 2) {
1359  fprintf(stderr, "Log loop count %lu\n", loop_count);
1360  }
1361 # endif
1362 #endif
1363 
1364  mutex_enter(&(log_sys->mutex));
1365  ut_ad(!recv_no_log_write);
1366 
1367  if (flush_to_disk
1368  && log_sys->flushed_to_disk_lsn >= lsn) {
1369 
1370  mutex_exit(&(log_sys->mutex));
1371 
1372  return;
1373  }
1374 
1375  if (!flush_to_disk
1376  && (log_sys->written_to_all_lsn >= lsn
1377  || (log_sys->written_to_some_lsn >= lsn
1378  && wait != LOG_WAIT_ALL_GROUPS))) {
1379 
1380  mutex_exit(&(log_sys->mutex));
1381 
1382  return;
1383  }
1384 
1385  if (log_sys->n_pending_writes > 0) {
1386  /* A write (+ possibly flush to disk) is running */
1387 
1388  if (flush_to_disk
1389  && log_sys->current_flush_lsn >= lsn) {
1390  /* The write + flush will write enough: wait for it to
1391  complete */
1392 
1393  goto do_waits;
1394  }
1395 
1396  if (!flush_to_disk
1397  && log_sys->write_lsn >= lsn) {
1398  /* The write will write enough: wait for it to
1399  complete */
1400 
1401  goto do_waits;
1402  }
1403 
1404  mutex_exit(&(log_sys->mutex));
1405 
1406  /* Wait for the write to complete and try to start a new
1407  write */
1408 
1409  os_event_wait(log_sys->no_flush_event);
1410 
1411  goto loop;
1412  }
1413 
1414  if (!flush_to_disk
1415  && log_sys->buf_free == log_sys->buf_next_to_write) {
1416  /* Nothing to write and no flush to disk requested */
1417 
1418  mutex_exit(&(log_sys->mutex));
1419 
1420  return;
1421  }
1422 
1423 #ifdef UNIV_DEBUG
1424  if (log_debug_writes) {
1425  fprintf(stderr,
1426  "Writing log from " LSN_PF " up to lsn " LSN_PF "\n",
1427  log_sys->written_to_all_lsn,
1428  log_sys->lsn);
1429  }
1430 #endif /* UNIV_DEBUG */
1431  log_sys->n_pending_writes++;
1432  MONITOR_INC(MONITOR_PENDING_LOG_WRITE);
1433 
1434  group = UT_LIST_GET_FIRST(log_sys->log_groups);
1435  group->n_pending_writes++;
1438  os_event_reset(log_sys->no_flush_event);
1440 
1441  start_offset = log_sys->buf_next_to_write;
1442  end_offset = log_sys->buf_free;
1443 
1444  area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
1445  area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
1446 
1447  ut_ad(area_end - area_start > 0);
1448 
1449  log_sys->write_lsn = log_sys->lsn;
1450 
1451  if (flush_to_disk) {
1452  log_sys->current_flush_lsn = log_sys->lsn;
1453  }
1454 
1455  log_sys->one_flushed = FALSE;
1456 
1457  log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
1458  log_block_set_checkpoint_no(
1459  log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1460  log_sys->next_checkpoint_no);
1461 
1462  /* Copy the last, incompletely written, log block a log block length
1463  up, so that when the flush operation writes from the log buffer, the
1464  segment to write will not be changed by writers to the log */
1465 
1466  ut_memcpy(log_sys->buf + area_end,
1467  log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1469 
1470  log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
1471  log_sys->write_end_offset = log_sys->buf_free;
1472 
1473  group = UT_LIST_GET_FIRST(log_sys->log_groups);
1474 
1475  /* Do the write to the log files */
1476 
1477  while (group) {
1479  group, log_sys->buf + area_start,
1480  area_end - area_start,
1483  start_offset - area_start);
1484 
1485  log_group_set_fields(group, log_sys->write_lsn);
1486 
1487  group = UT_LIST_GET_NEXT(log_groups, group);
1488  }
1489 
1490  mutex_exit(&(log_sys->mutex));
1491 
1492  if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
1493  /* O_DSYNC means the OS did not buffer the log file at all:
1494  so we have also flushed to disk what we have written */
1495 
1496  log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1497 
1498  } else if (flush_to_disk) {
1499 
1500  group = UT_LIST_GET_FIRST(log_sys->log_groups);
1501 
1502  fil_flush(group->space_id);
1503  log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1504  }
1505 
1506  mutex_enter(&(log_sys->mutex));
1507 
1508  group = UT_LIST_GET_FIRST(log_sys->log_groups);
1509 
1510  ut_a(group->n_pending_writes == 1);
1511  ut_a(log_sys->n_pending_writes == 1);
1512 
1513  group->n_pending_writes--;
1514  log_sys->n_pending_writes--;
1515  MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
1516 
1517  unlock = log_group_check_flush_completion(group);
1518  unlock = unlock | log_sys_check_flush_completion();
1519 
1520  log_flush_do_unlocks(unlock);
1521 
1522  mutex_exit(&(log_sys->mutex));
1523 
1524  return;
1525 
1526 do_waits:
1527  mutex_exit(&(log_sys->mutex));
1528 
1529  switch (wait) {
1530  case LOG_WAIT_ONE_GROUP:
1531  os_event_wait(log_sys->one_flushed_event);
1532  break;
1533  case LOG_WAIT_ALL_GROUPS:
1534  os_event_wait(log_sys->no_flush_event);
1535  break;
1536 #ifdef UNIV_DEBUG
1537  case LOG_NO_WAIT:
1538  break;
1539  default:
1540  ut_error;
1541 #endif /* UNIV_DEBUG */
1542  }
1543 }
1544 
1545 /****************************************************************/
1547 UNIV_INTERN
1548 void
1550 /*==========================*/
1551 {
1552  lsn_t lsn;
1553 
1555  mutex_enter(&(log_sys->mutex));
1556 
1557  lsn = log_sys->lsn;
1558 
1559  mutex_exit(&(log_sys->mutex));
1560 
1561  log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
1562 }
1563 
1564 /****************************************************************/
1569 UNIV_INTERN
1570 void
1572 /*==========================*/
1573  ibool flush)
1574 {
1575  lsn_t lsn;
1576 
1577  mutex_enter(&(log_sys->mutex));
1578 
1579  lsn = log_sys->lsn;
1580 
1581  mutex_exit(&(log_sys->mutex));
1582 
1583  log_write_up_to(lsn, LOG_NO_WAIT, flush);
1584 }
1585 
1586 /********************************************************************
1587 
1588 Tries to establish a big enough margin of free space in the log buffer, such
1589 that a new log entry can be catenated without an immediate need for a flush. */
1590 static
1591 void
1592 log_flush_margin(void)
1593 /*==================*/
1594 {
1595  log_t* log = log_sys;
1596  lsn_t lsn = 0;
1597 
1598  mutex_enter(&(log->mutex));
1599 
1600  if (log->buf_free > log->max_buf_free) {
1601 
1602  if (log->n_pending_writes > 0) {
1603  /* A flush is running: hope that it will provide enough
1604  free space */
1605  } else {
1606  lsn = log->lsn;
1607  }
1608  }
1609 
1610  mutex_exit(&(log->mutex));
1611 
1612  if (lsn) {
1613  log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
1614  }
1615 }
1616 
1617 /****************************************************************/
1623 static
1624 bool
1625 log_preflush_pool_modified_pages(
1626 /*=============================*/
1627  lsn_t new_oldest)
1629 {
1630  bool success;
1631  ulint n_pages;
1632 
1633  if (recv_recovery_on) {
1634  /* If the recovery is running, we must first apply all
1635  log records to their respective file pages to get the
1636  right modify lsn values to these pages: otherwise, there
1637  might be pages on disk which are not yet recovered to the
1638  current lsn, and even after calling this function, we could
1639  not know how up-to-date the disk version of the database is,
1640  and we could not make a new checkpoint on the basis of the
1641  info on the buffer pool only. */
1642 
1644  }
1645 
1646  success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
1647 
1649 
1650  if (!success) {
1651  MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
1652  }
1653 
1655  MONITOR_FLUSH_SYNC_TOTAL_PAGE,
1656  MONITOR_FLUSH_SYNC_COUNT,
1657  MONITOR_FLUSH_SYNC_PAGES,
1658  n_pages);
1659 
1660  return(success);
1661 }
1662 
1663 /******************************************************/
1665 static
1666 void
1667 log_complete_checkpoint(void)
1668 /*=========================*/
1669 {
1670  ut_ad(mutex_own(&(log_sys->mutex)));
1671  ut_ad(log_sys->n_pending_checkpoint_writes == 0);
1672 
1673  log_sys->next_checkpoint_no++;
1674 
1675  log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
1676  MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
1677  log_sys->lsn - log_sys->last_checkpoint_lsn);
1678 
1679  rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
1680 }
1681 
1682 /******************************************************/
1684 static
1685 void
1686 log_io_complete_checkpoint(void)
1687 /*============================*/
1688 {
1689  mutex_enter(&(log_sys->mutex));
1690 
1691  ut_ad(log_sys->n_pending_checkpoint_writes > 0);
1692 
1693  log_sys->n_pending_checkpoint_writes--;
1694  MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
1695 
1696  if (log_sys->n_pending_checkpoint_writes == 0) {
1697  log_complete_checkpoint();
1698  }
1699 
1700  mutex_exit(&(log_sys->mutex));
1701 }
1702 
1703 /*******************************************************************/
1705 static
1706 void
1707 log_checkpoint_set_nth_group_info(
1708 /*==============================*/
1709  byte* buf,
1710  ulint n,
1711  ulint file_no,
1712  ulint offset)
1713 {
1714  ut_ad(n < LOG_MAX_N_GROUPS);
1715 
1716  mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1717  + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
1718  mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1719  + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
1720 }
1721 
1722 /*******************************************************************/
1724 UNIV_INTERN
1725 void
1727 /*==============================*/
1728  const byte* buf,
1729  ulint n,
1730  ulint* file_no,
1731  ulint* offset)
1732 {
1733  ut_ad(n < LOG_MAX_N_GROUPS);
1734 
1735  *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1736  + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
1737  *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1738  + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
1739 }
1740 
1741 /******************************************************/
1743 static
1744 void
1745 log_group_checkpoint(
1746 /*=================*/
1747  log_group_t* group)
1748 {
1749  log_group_t* group2;
1750 #ifdef UNIV_LOG_ARCHIVE
1751  ib_uint64_t archived_lsn;
1752  ib_uint64_t next_archived_lsn;
1753 #endif /* UNIV_LOG_ARCHIVE */
1754  lsn_t lsn_offset;
1755  ulint write_offset;
1756  ulint fold;
1757  byte* buf;
1758  ulint i;
1759 
1761  ut_ad(mutex_own(&(log_sys->mutex)));
1762 #if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
1763 # error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
1764 #endif
1765 
1766  buf = group->checkpoint_buf;
1767 
1768  mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
1769  mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
1770 
1771  lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
1772  group);
1773  mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
1774  lsn_offset & 0xFFFFFFFFUL);
1775  mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32,
1776  lsn_offset >> 32);
1777 
1778  mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
1779 
1780 #ifdef UNIV_LOG_ARCHIVE
1781  if (log_sys->archiving_state == LOG_ARCH_OFF) {
1782  archived_lsn = LSN_MAX;
1783  } else {
1784  archived_lsn = log_sys->archived_lsn;
1785 
1786  if (archived_lsn != log_sys->next_archived_lsn) {
1787  next_archived_lsn = log_sys->next_archived_lsn;
1788  /* For debugging only */
1789  }
1790  }
1791 
1792  mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
1793 #else /* UNIV_LOG_ARCHIVE */
1794  mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
1795 #endif /* UNIV_LOG_ARCHIVE */
1796 
1797  for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
1798  log_checkpoint_set_nth_group_info(buf, i, 0, 0);
1799  }
1800 
1801  group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
1802 
1803  while (group2) {
1804  log_checkpoint_set_nth_group_info(buf, group2->id,
1805 #ifdef UNIV_LOG_ARCHIVE
1806  group2->archived_file_no,
1807  group2->archived_offset
1808 #else /* UNIV_LOG_ARCHIVE */
1809  0, 0
1810 #endif /* UNIV_LOG_ARCHIVE */
1811  );
1812 
1813  group2 = UT_LIST_GET_NEXT(log_groups, group2);
1814  }
1815 
1816  fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
1817  mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
1818 
1819  fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1820  LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1821  mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
1822 
1823  /* We alternate the physical place of the checkpoint info in the first
1824  log file */
1825 
1826  if ((log_sys->next_checkpoint_no & 1) == 0) {
1827  write_offset = LOG_CHECKPOINT_1;
1828  } else {
1829  write_offset = LOG_CHECKPOINT_2;
1830  }
1831 
1832  if (log_do_write) {
1833  if (log_sys->n_pending_checkpoint_writes == 0) {
1834 
1835  rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
1836  LOG_CHECKPOINT);
1837  }
1838 
1839  log_sys->n_pending_checkpoint_writes++;
1840  MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
1841 
1842  log_sys->n_log_ios++;
1843 
1844  MONITOR_INC(MONITOR_LOG_IO);
1845 
1846  /* We send as the last parameter the group machine address
1847  added with 1, as we want to distinguish between a normal log
1848  file write and a checkpoint field write */
1849 
1850  fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
1851  write_offset / UNIV_PAGE_SIZE,
1852  write_offset % UNIV_PAGE_SIZE,
1854  buf, ((byte*) group + 1));
1855 
1856  ut_ad(((ulint) group & 0x1UL) == 0);
1857  }
1858 }
1859 #endif /* !UNIV_HOTBACKUP */
1860 
1861 #ifdef UNIV_HOTBACKUP
1862 /******************************************************/
1865 UNIV_INTERN
1866 void
1867 log_reset_first_header_and_checkpoint(
1868 /*==================================*/
1869  byte* hdr_buf,
1871  ib_uint64_t start)
1874 {
1875  ulint fold;
1876  byte* buf;
1877  ib_uint64_t lsn;
1878 
1879  mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
1880  mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
1881 
1882  lsn = start + LOG_BLOCK_HDR_SIZE;
1883 
1884  /* Write the label of ibbackup --restore */
1885  strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
1886  "ibbackup ");
1887  ut_sprintf_timestamp((char*) hdr_buf
1888  + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
1889  + (sizeof "ibbackup ") - 1));
1890  buf = hdr_buf + LOG_CHECKPOINT_1;
1891 
1892  mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
1893  mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
1894 
1895  mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
1896  LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
1897  mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
1898 
1899  mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
1900 
1901  mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
1902 
1903  fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
1904  mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
1905 
1906  fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1907  LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1908  mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
1909 
1910  /* Starting from InnoDB-3.23.50, we should also write info on
1911  allocated size in the tablespace, but unfortunately we do not
1912  know it here */
1913 }
1914 #endif /* UNIV_HOTBACKUP */
1915 
1916 #ifndef UNIV_HOTBACKUP
1917 /******************************************************/
1919 UNIV_INTERN
1920 void
1922 /*===========================*/
1923  log_group_t* group,
1924  ulint field)
1925 {
1926  ut_ad(mutex_own(&(log_sys->mutex)));
1927 
1928  log_sys->n_log_ios++;
1929 
1930  MONITOR_INC(MONITOR_LOG_IO);
1931 
1932  fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
1933  field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
1934  OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
1935 }
1936 
1937 /******************************************************/
1939 UNIV_INTERN
1940 void
1942 /*==================================*/
1943 {
1944  log_group_t* group;
1945 
1946  ut_ad(mutex_own(&(log_sys->mutex)));
1947 
1948  if (!srv_read_only_mode) {
1949  for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
1950  group;
1951  group = UT_LIST_GET_NEXT(log_groups, group)) {
1952 
1953  log_group_checkpoint(group);
1954  }
1955  }
1956 }
1957 
1958 /******************************************************/
1964 UNIV_INTERN
1965 ibool
1967 /*===========*/
1968  ibool sync,
1970  ibool write_always)
1976 {
1977  lsn_t oldest_lsn;
1978 
1980 
1981  if (recv_recovery_is_on()) {
1983  }
1984 
1985  if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
1987  }
1988 
1989  mutex_enter(&(log_sys->mutex));
1990 
1991  ut_ad(!recv_no_log_write);
1992  oldest_lsn = log_buf_pool_get_oldest_modification();
1993 
1994  mutex_exit(&(log_sys->mutex));
1995 
1996  /* Because log also contains headers and dummy log records,
1997  if the buffer pool contains no dirty buffers, oldest_lsn
1998  gets the value log_sys->lsn from the previous function,
1999  and we must make sure that the log is flushed up to that
2000  lsn. If there are dirty buffers in the buffer pool, then our
2001  write-ahead-logging algorithm ensures that the log has been flushed
2002  up to oldest_lsn. */
2003 
2004  log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
2005 
2006  mutex_enter(&(log_sys->mutex));
2007 
2008  if (!write_always
2009  && log_sys->last_checkpoint_lsn >= oldest_lsn) {
2010 
2011  mutex_exit(&(log_sys->mutex));
2012 
2013  return(TRUE);
2014  }
2015 
2016  ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
2017 
2018  if (log_sys->n_pending_checkpoint_writes > 0) {
2019  /* A checkpoint write is running */
2020 
2021  mutex_exit(&(log_sys->mutex));
2022 
2023  if (sync) {
2024  /* Wait for the checkpoint write to complete */
2025  rw_lock_s_lock(&(log_sys->checkpoint_lock));
2026  rw_lock_s_unlock(&(log_sys->checkpoint_lock));
2027  }
2028 
2029  return(FALSE);
2030  }
2031 
2032  log_sys->next_checkpoint_lsn = oldest_lsn;
2033 
2034 #ifdef UNIV_DEBUG
2035  if (log_debug_writes) {
2036  fprintf(stderr, "Making checkpoint no "
2037  LSN_PF " at lsn " LSN_PF "\n",
2038  log_sys->next_checkpoint_no,
2039  oldest_lsn);
2040  }
2041 #endif /* UNIV_DEBUG */
2042 
2044 
2045  MONITOR_INC(MONITOR_NUM_CHECKPOINT);
2046 
2047  mutex_exit(&(log_sys->mutex));
2048 
2049  if (sync) {
2050  /* Wait for the checkpoint write to complete */
2051  rw_lock_s_lock(&(log_sys->checkpoint_lock));
2052  rw_lock_s_unlock(&(log_sys->checkpoint_lock));
2053  }
2054 
2055  return(TRUE);
2056 }
2057 
2058 /****************************************************************/
2060 UNIV_INTERN
2061 void
2063 /*===================*/
2064  lsn_t lsn,
2067  ibool write_always)
2074 {
2075  /* Preflush pages synchronously */
2076 
2077  while (!log_preflush_pool_modified_pages(lsn)) {
2078  /* Flush as much as we can */
2079  }
2080 
2081  while (!log_checkpoint(TRUE, write_always)) {
2082  /* Force a checkpoint */
2083  }
2084 }
2085 
2086 /****************************************************************/
2091 static
2092 void
2093 log_checkpoint_margin(void)
2094 /*=======================*/
2095 {
2096  log_t* log = log_sys;
2097  lsn_t age;
2098  lsn_t checkpoint_age;
2099  ib_uint64_t advance;
2100  lsn_t oldest_lsn;
2101  ibool checkpoint_sync;
2102  ibool do_checkpoint;
2103  bool success;
2104 loop:
2105  checkpoint_sync = FALSE;
2106  do_checkpoint = FALSE;
2107  advance = 0;
2108 
2109  mutex_enter(&(log->mutex));
2110  ut_ad(!recv_no_log_write);
2111 
2112  if (log->check_flush_or_checkpoint == FALSE) {
2113  mutex_exit(&(log->mutex));
2114 
2115  return;
2116  }
2117 
2118  oldest_lsn = log_buf_pool_get_oldest_modification();
2119 
2120  age = log->lsn - oldest_lsn;
2121 
2122  if (age > log->max_modified_age_sync) {
2123 
2124  /* A flush is urgent: we have to do a synchronous preflush */
2125  advance = 2 * (age - log->max_modified_age_sync);
2126  }
2127 
2128  checkpoint_age = log->lsn - log->last_checkpoint_lsn;
2129 
2130  if (checkpoint_age > log->max_checkpoint_age) {
2131  /* A checkpoint is urgent: we do it synchronously */
2132 
2133  checkpoint_sync = TRUE;
2134 
2135  do_checkpoint = TRUE;
2136 
2137  } else if (checkpoint_age > log->max_checkpoint_age_async) {
2138  /* A checkpoint is not urgent: do it asynchronously */
2139 
2140  do_checkpoint = TRUE;
2141 
2142  log->check_flush_or_checkpoint = FALSE;
2143  } else {
2144  log->check_flush_or_checkpoint = FALSE;
2145  }
2146 
2147  mutex_exit(&(log->mutex));
2148 
2149  if (advance) {
2150  lsn_t new_oldest = oldest_lsn + advance;
2151 
2152  success = log_preflush_pool_modified_pages(new_oldest);
2153 
2154  /* If the flush succeeded, this thread has done its part
2155  and can proceed. If it did not succeed, there was another
2156  thread doing a flush at the same time. */
2157  if (!success) {
2158  mutex_enter(&(log->mutex));
2159 
2160  log->check_flush_or_checkpoint = TRUE;
2161 
2162  mutex_exit(&(log->mutex));
2163  goto loop;
2164  }
2165  }
2166 
2167  if (do_checkpoint) {
2168  log_checkpoint(checkpoint_sync, FALSE);
2169 
2170  if (checkpoint_sync) {
2171 
2172  goto loop;
2173  }
2174  }
2175 }
2176 
2177 /******************************************************/
2179 UNIV_INTERN
2180 void
2182 /*===================*/
2183  ulint type,
2184  byte* buf,
2185  log_group_t* group,
2186  lsn_t start_lsn,
2187  lsn_t end_lsn)
2188 {
2189  ulint len;
2190  lsn_t source_offset;
2191  bool sync;
2192 
2193  ut_ad(mutex_own(&(log_sys->mutex)));
2194 
2195  sync = (type == LOG_RECOVER);
2196 loop:
2197  source_offset = log_group_calc_lsn_offset(start_lsn, group);
2198 
2199  ut_a(end_lsn - start_lsn <= ULINT_MAX);
2200  len = (ulint) (end_lsn - start_lsn);
2201 
2202  ut_ad(len != 0);
2203 
2204  if ((source_offset % group->file_size) + len > group->file_size) {
2205 
2206  /* If the above condition is true then len (which is ulint)
2207  is > the expression below, so the typecast is ok */
2208  len = (ulint) (group->file_size -
2209  (source_offset % group->file_size));
2210  }
2211 
2212 #ifdef UNIV_LOG_ARCHIVE
2213  if (type == LOG_ARCHIVE) {
2214 
2215  log_sys->n_pending_archive_ios++;
2216  }
2217 #endif /* UNIV_LOG_ARCHIVE */
2218 
2219  log_sys->n_log_ios++;
2220 
2221  MONITOR_INC(MONITOR_LOG_IO);
2222 
2223  ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
2224 
2225  fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
2226  (ulint) (source_offset / UNIV_PAGE_SIZE),
2227  (ulint) (source_offset % UNIV_PAGE_SIZE),
2228  len, buf, NULL);
2229 
2230  start_lsn += len;
2231  buf += len;
2232 
2233  if (start_lsn != end_lsn) {
2234 
2235  goto loop;
2236  }
2237 }
2238 
2239 #ifdef UNIV_LOG_ARCHIVE
2240 /******************************************************/
2242 UNIV_INTERN
2243 void
2245 /*=======================*/
2246  char* buf,
2247  ulint id __attribute__((unused)),
2250  ulint file_no)
2251 {
2252  sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
2253 }
2254 
2255 /******************************************************/
2257 static
2258 void
2259 log_group_archive_file_header_write(
2260 /*================================*/
2261  log_group_t* group,
2262  ulint nth_file,
2264  ulint file_no,
2265  ib_uint64_t start_lsn)
2267 {
2268  byte* buf;
2269  ulint dest_offset;
2270 
2271  ut_ad(mutex_own(&(log_sys->mutex)));
2272 
2273  ut_a(nth_file < group->n_files);
2274 
2275  buf = *(group->archive_file_header_bufs + nth_file);
2276 
2277  mach_write_to_4(buf + LOG_GROUP_ID, group->id);
2278  mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
2279  mach_write_to_4(buf + LOG_FILE_NO, file_no);
2280 
2281  mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
2282 
2283  dest_offset = nth_file * group->file_size;
2284 
2285  log_sys->n_log_ios++;
2286 
2287  MONITOR_INC(MONITOR_LOG_IO);
2288 
2289  fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
2290  dest_offset / UNIV_PAGE_SIZE,
2291  dest_offset % UNIV_PAGE_SIZE,
2293  buf, &log_archive_io);
2294 }
2295 
2296 /******************************************************/
2298 static
2299 void
2300 log_group_archive_completed_header_write(
2301 /*=====================================*/
2302  log_group_t* group,
2303  ulint nth_file,
2305  ib_uint64_t end_lsn)
2306 {
2307  byte* buf;
2308  ulint dest_offset;
2309 
2310  ut_ad(mutex_own(&(log_sys->mutex)));
2311  ut_a(nth_file < group->n_files);
2312 
2313  buf = *(group->archive_file_header_bufs + nth_file);
2314 
2315  mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
2316  mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
2317 
2318  dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
2319 
2320  log_sys->n_log_ios++;
2321 
2322  MONITOR_INC(MONITOR_LOG_IO);
2323 
2324  fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
2325  dest_offset / UNIV_PAGE_SIZE,
2326  dest_offset % UNIV_PAGE_SIZE,
2328  buf + LOG_FILE_ARCH_COMPLETED,
2329  &log_archive_io);
2330 }
2331 
2332 /******************************************************/
2334 static
2335 void
2336 log_group_archive(
2337 /*==============*/
2338  log_group_t* group)
2339 {
2340  os_file_t file_handle;
2341  lsn_t start_lsn;
2342  lsn_t end_lsn;
2343  char name[1024];
2344  byte* buf;
2345  ulint len;
2346  ibool ret;
2347  lsn_t next_offset;
2348  ulint n_files;
2349  ulint open_mode;
2350 
2351  ut_ad(mutex_own(&(log_sys->mutex)));
2352 
2353  start_lsn = log_sys->archived_lsn;
2354 
2355  ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2356 
2357  end_lsn = log_sys->next_archived_lsn;
2358 
2359  ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2360 
2361  buf = log_sys->archive_buf;
2362 
2363  n_files = 0;
2364 
2365  next_offset = group->archived_offset;
2366 loop:
2367  if ((next_offset % group->file_size == 0)
2368  || (fil_space_get_size(group->archive_space_id) == 0)) {
2369 
2370  /* Add the file to the archive file space; create or open the
2371  file */
2372 
2373  if (next_offset % group->file_size == 0) {
2374  open_mode = OS_FILE_CREATE;
2375  } else {
2376  open_mode = OS_FILE_OPEN;
2377  }
2378 
2379  log_archived_file_name_gen(name, group->id,
2380  group->archived_file_no + n_files);
2381 
2382  file_handle = os_file_create(innodb_file_log_key,
2383  name, open_mode,
2384  OS_FILE_AIO,
2385  OS_DATA_FILE, &ret);
2386 
2387  if (!ret && (open_mode == OS_FILE_CREATE)) {
2388  file_handle = os_file_create(
2389  innodb_file_log_key, name, OS_FILE_OPEN,
2390  OS_FILE_AIO, OS_DATA_FILE, &ret);
2391  }
2392 
2393  if (!ret) {
2394  fprintf(stderr,
2395  "InnoDB: Cannot create or open"
2396  " archive log file %s.\n"
2397  "InnoDB: Cannot continue operation.\n"
2398  "InnoDB: Check that the log archive"
2399  " directory exists,\n"
2400  "InnoDB: you have access rights to it, and\n"
2401  "InnoDB: there is space available.\n", name);
2402  exit(1);
2403  }
2404 
2405 #ifdef UNIV_DEBUG
2406  if (log_debug_writes) {
2407  fprintf(stderr, "Created archive file %s\n", name);
2408  }
2409 #endif /* UNIV_DEBUG */
2410 
2411  ret = os_file_close(file_handle);
2412 
2413  ut_a(ret);
2414 
2415  /* Add the archive file as a node to the space */
2416 
2417  fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
2418  group->archive_space_id, FALSE);
2419 
2420  if (next_offset % group->file_size == 0) {
2421  log_group_archive_file_header_write(
2422  group, n_files,
2423  group->archived_file_no + n_files,
2424  start_lsn);
2425 
2426  next_offset += LOG_FILE_HDR_SIZE;
2427  }
2428  }
2429 
2430  len = end_lsn - start_lsn;
2431 
2432  if (group->file_size < (next_offset % group->file_size) + len) {
2433 
2434  len = group->file_size - (next_offset % group->file_size);
2435  }
2436 
2437 #ifdef UNIV_DEBUG
2438  if (log_debug_writes) {
2439  fprintf(stderr,
2440  "Archiving starting at lsn " LSN_PF ", len %lu"
2441  " to group %lu\n",
2442  start_lsn,
2443  (ulong) len, (ulong) group->id);
2444  }
2445 #endif /* UNIV_DEBUG */
2446 
2447  log_sys->n_pending_archive_ios++;
2448 
2449  log_sys->n_log_ios++;
2450 
2451  MONITOR_INC(MONITOR_LOG_IO);
2452 
2453  fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
2454  (ulint) (next_offset / UNIV_PAGE_SIZE),
2455  (ulint) (next_offset % UNIV_PAGE_SIZE),
2457  &log_archive_io);
2458 
2459  start_lsn += len;
2460  next_offset += len;
2461  buf += len;
2462 
2463  if (next_offset % group->file_size == 0) {
2464  n_files++;
2465  }
2466 
2467  if (end_lsn != start_lsn) {
2468 
2469  goto loop;
2470  }
2471 
2472  group->next_archived_file_no = group->archived_file_no + n_files;
2473  group->next_archived_offset = next_offset % group->file_size;
2474 
2475  ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
2476 }
2477 
2478 /*****************************************************/
2481 static
2482 void
2483 log_archive_groups(void)
2484 /*====================*/
2485 {
2486  log_group_t* group;
2487 
2488  ut_ad(mutex_own(&(log_sys->mutex)));
2489 
2490  group = UT_LIST_GET_FIRST(log_sys->log_groups);
2491 
2492  log_group_archive(group);
2493 }
2494 
2495 /*****************************************************/
2498 static
2499 void
2500 log_archive_write_complete_groups(void)
2501 /*===================================*/
2502 {
2503  log_group_t* group;
2504  ulint end_offset;
2505  ulint trunc_files;
2506  ulint n_files;
2507  ib_uint64_t start_lsn;
2508  ib_uint64_t end_lsn;
2509  ulint i;
2510 
2511  ut_ad(mutex_own(&(log_sys->mutex)));
2512 
2513  group = UT_LIST_GET_FIRST(log_sys->log_groups);
2514 
2515  group->archived_file_no = group->next_archived_file_no;
2516  group->archived_offset = group->next_archived_offset;
2517 
2518  /* Truncate from the archive file space all but the last
2519  file, or if it has been written full, all files */
2520 
2521  n_files = (UNIV_PAGE_SIZE
2522  * fil_space_get_size(group->archive_space_id))
2523  / group->file_size;
2524  ut_ad(n_files > 0);
2525 
2526  end_offset = group->archived_offset;
2527 
2528  if (end_offset % group->file_size == 0) {
2529 
2530  trunc_files = n_files;
2531  } else {
2532  trunc_files = n_files - 1;
2533  }
2534 
2535 #ifdef UNIV_DEBUG
2536  if (log_debug_writes && trunc_files) {
2537  fprintf(stderr,
2538  "Complete file(s) archived to group %lu\n",
2539  (ulong) group->id);
2540  }
2541 #endif /* UNIV_DEBUG */
2542 
2543  /* Calculate the archive file space start lsn */
2544  start_lsn = log_sys->next_archived_lsn
2545  - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
2546  * (group->file_size - LOG_FILE_HDR_SIZE));
2547  end_lsn = start_lsn;
2548 
2549  for (i = 0; i < trunc_files; i++) {
2550 
2551  end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
2552 
2553  /* Write a notice to the headers of archived log
2554  files that the file write has been completed */
2555 
2556  log_group_archive_completed_header_write(group, i, end_lsn);
2557  }
2558 
2559  fil_space_truncate_start(group->archive_space_id,
2560  trunc_files * group->file_size);
2561 
2562 #ifdef UNIV_DEBUG
2563  if (log_debug_writes) {
2564  fputs("Archiving writes completed\n", stderr);
2565  }
2566 #endif /* UNIV_DEBUG */
2567 }
2568 
2569 /******************************************************/
2571 static
2572 void
2573 log_archive_check_completion_low(void)
2574 /*==================================*/
2575 {
2576  ut_ad(mutex_own(&(log_sys->mutex)));
2577 
2578  if (log_sys->n_pending_archive_ios == 0
2579  && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
2580 
2581 #ifdef UNIV_DEBUG
2582  if (log_debug_writes) {
2583  fputs("Archiving read completed\n", stderr);
2584  }
2585 #endif /* UNIV_DEBUG */
2586 
2587  /* Archive buffer has now been read in: start archive writes */
2588 
2589  log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
2590 
2591  log_archive_groups();
2592  }
2593 
2594  if (log_sys->n_pending_archive_ios == 0
2595  && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
2596 
2597  log_archive_write_complete_groups();
2598 
2599  log_sys->archived_lsn = log_sys->next_archived_lsn;
2600 
2601  rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
2602  }
2603 }
2604 
2605 /******************************************************/
2607 static
2608 void
2609 log_io_complete_archive(void)
2610 /*=========================*/
2611 {
2612  log_group_t* group;
2613 
2614  mutex_enter(&(log_sys->mutex));
2615 
2616  group = UT_LIST_GET_FIRST(log_sys->log_groups);
2617 
2618  mutex_exit(&(log_sys->mutex));
2619 
2620  fil_flush(group->archive_space_id);
2621 
2622  mutex_enter(&(log_sys->mutex));
2623 
2624  ut_ad(log_sys->n_pending_archive_ios > 0);
2625 
2626  log_sys->n_pending_archive_ios--;
2627 
2628  log_archive_check_completion_low();
2629 
2630  mutex_exit(&(log_sys->mutex));
2631 }
2632 
2633 /********************************************************************/
2636 UNIV_INTERN
2637 ibool
2639 /*===========*/
2640  ibool sync,
2641  ulint* n_bytes)
2643 {
2644  ibool calc_new_limit;
2645  ib_uint64_t start_lsn;
2646  ib_uint64_t limit_lsn;
2647 
2648  calc_new_limit = TRUE;
2649 loop:
2650  mutex_enter(&(log_sys->mutex));
2651 
2652  switch (log_sys->archiving_state) {
2653  case LOG_ARCH_OFF:
2654 arch_none:
2655  mutex_exit(&(log_sys->mutex));
2656 
2657  *n_bytes = 0;
2658 
2659  return(TRUE);
2660  case LOG_ARCH_STOPPED:
2661  case LOG_ARCH_STOPPING2:
2662  mutex_exit(&(log_sys->mutex));
2663 
2664  os_event_wait(log_sys->archiving_on);
2665 
2666  goto loop;
2667  }
2668 
2669  start_lsn = log_sys->archived_lsn;
2670 
2671  if (calc_new_limit) {
2672  ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
2673  limit_lsn = start_lsn + log_sys->archive_buf_size;
2674 
2675  *n_bytes = log_sys->archive_buf_size;
2676 
2677  if (limit_lsn >= log_sys->lsn) {
2678 
2679  limit_lsn = ut_uint64_align_down(
2680  log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
2681  }
2682  }
2683 
2684  if (log_sys->archived_lsn >= limit_lsn) {
2685 
2686  goto arch_none;
2687  }
2688 
2689  if (log_sys->written_to_all_lsn < limit_lsn) {
2690 
2691  mutex_exit(&(log_sys->mutex));
2692 
2693  log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
2694 
2695  calc_new_limit = FALSE;
2696 
2697  goto loop;
2698  }
2699 
2700  if (log_sys->n_pending_archive_ios > 0) {
2701  /* An archiving operation is running */
2702 
2703  mutex_exit(&(log_sys->mutex));
2704 
2705  if (sync) {
2706  rw_lock_s_lock(&(log_sys->archive_lock));
2707  rw_lock_s_unlock(&(log_sys->archive_lock));
2708  }
2709 
2710  *n_bytes = log_sys->archive_buf_size;
2711 
2712  return(FALSE);
2713  }
2714 
2715  rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
2716 
2717  log_sys->archiving_phase = LOG_ARCHIVE_READ;
2718 
2719  log_sys->next_archived_lsn = limit_lsn;
2720 
2721 #ifdef UNIV_DEBUG
2722  if (log_debug_writes) {
2723  fprintf(stderr,
2724  "Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n",
2725  log_sys->archived_lsn, limit_lsn);
2726  }
2727 #endif /* UNIV_DEBUG */
2728 
2729  /* Read the log segment to the archive buffer */
2730 
2731  log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
2732  UT_LIST_GET_FIRST(log_sys->log_groups),
2733  start_lsn, limit_lsn);
2734 
2735  mutex_exit(&(log_sys->mutex));
2736 
2737  if (sync) {
2738  rw_lock_s_lock(&(log_sys->archive_lock));
2739  rw_lock_s_unlock(&(log_sys->archive_lock));
2740  }
2741 
2742  *n_bytes = log_sys->archive_buf_size;
2743 
2744  return(TRUE);
2745 }
2746 
2747 /****************************************************************/
2750 static
2751 void
2752 log_archive_all(void)
2753 /*=================*/
2754 {
2755  ib_uint64_t present_lsn;
2756  ulint dummy;
2757 
2758  mutex_enter(&(log_sys->mutex));
2759 
2760  if (log_sys->archiving_state == LOG_ARCH_OFF) {
2761  mutex_exit(&(log_sys->mutex));
2762 
2763  return;
2764  }
2765 
2766  present_lsn = log_sys->lsn;
2767 
2768  mutex_exit(&(log_sys->mutex));
2769 
2770  log_pad_current_log_block();
2771 
2772  for (;;) {
2773  mutex_enter(&(log_sys->mutex));
2774 
2775  if (present_lsn <= log_sys->archived_lsn) {
2776 
2777  mutex_exit(&(log_sys->mutex));
2778 
2779  return;
2780  }
2781 
2782  mutex_exit(&(log_sys->mutex));
2783 
2784  log_archive_do(TRUE, &dummy);
2785  }
2786 }
2787 
2788 /*****************************************************/
2791 static
2792 void
2793 log_archive_close_groups(
2794 /*=====================*/
2795  ibool increment_file_count)
2797 {
2798  log_group_t* group;
2799  ulint trunc_len;
2800 
2801  ut_ad(mutex_own(&(log_sys->mutex)));
2802 
2803  if (log_sys->archiving_state == LOG_ARCH_OFF) {
2804 
2805  return;
2806  }
2807 
2808  group = UT_LIST_GET_FIRST(log_sys->log_groups);
2809 
2810  trunc_len = UNIV_PAGE_SIZE
2811  * fil_space_get_size(group->archive_space_id);
2812  if (trunc_len > 0) {
2813  ut_a(trunc_len == group->file_size);
2814 
2815  /* Write a notice to the headers of archived log
2816  files that the file write has been completed */
2817 
2818  log_group_archive_completed_header_write(
2819  group, 0, log_sys->archived_lsn);
2820 
2821  fil_space_truncate_start(group->archive_space_id,
2822  trunc_len);
2823  if (increment_file_count) {
2824  group->archived_offset = 0;
2825  group->archived_file_no += 2;
2826  }
2827 
2828 #ifdef UNIV_DEBUG
2829  if (log_debug_writes) {
2830  fprintf(stderr,
2831  "Incrementing arch file no to %lu"
2832  " in log group %lu\n",
2833  (ulong) group->archived_file_no + 2,
2834  (ulong) group->id);
2835  }
2836 #endif /* UNIV_DEBUG */
2837  }
2838 }
2839 
2840 /****************************************************************/
2846 UNIV_INTERN
2847 ulint
2848 log_archive_stop(void)
2849 /*==================*/
2850 {
2851  ibool success;
2852 
2853  mutex_enter(&(log_sys->mutex));
2854 
2855  if (log_sys->archiving_state != LOG_ARCH_ON) {
2856 
2857  mutex_exit(&(log_sys->mutex));
2858 
2859  return(DB_ERROR);
2860  }
2861 
2862  log_sys->archiving_state = LOG_ARCH_STOPPING;
2863 
2864  mutex_exit(&(log_sys->mutex));
2865 
2866  log_archive_all();
2867 
2868  mutex_enter(&(log_sys->mutex));
2869 
2870  log_sys->archiving_state = LOG_ARCH_STOPPING2;
2871  os_event_reset(log_sys->archiving_on);
2872 
2873  mutex_exit(&(log_sys->mutex));
2874 
2875  /* Wait for a possible archiving operation to end */
2876 
2877  rw_lock_s_lock(&(log_sys->archive_lock));
2878  rw_lock_s_unlock(&(log_sys->archive_lock));
2879 
2880  mutex_enter(&(log_sys->mutex));
2881 
2882  /* Close all archived log files, incrementing the file count by 2,
2883  if appropriate */
2884 
2885  log_archive_close_groups(TRUE);
2886 
2887  mutex_exit(&(log_sys->mutex));
2888 
2889  /* Make a checkpoint, so that if recovery is needed, the file numbers
2890  of new archived log files will start from the right value */
2891 
2892  success = FALSE;
2893 
2894  while (!success) {
2895  success = log_checkpoint(TRUE, TRUE);
2896  }
2897 
2898  mutex_enter(&(log_sys->mutex));
2899 
2900  log_sys->archiving_state = LOG_ARCH_STOPPED;
2901 
2902  mutex_exit(&(log_sys->mutex));
2903 
2904  return(DB_SUCCESS);
2905 }
2906 
2907 /****************************************************************/
2910 UNIV_INTERN
2911 ulint
2912 log_archive_start(void)
2913 /*===================*/
2914 {
2915  mutex_enter(&(log_sys->mutex));
2916 
2917  if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
2918 
2919  mutex_exit(&(log_sys->mutex));
2920 
2921  return(DB_ERROR);
2922  }
2923 
2924  log_sys->archiving_state = LOG_ARCH_ON;
2925 
2926  os_event_set(log_sys->archiving_on);
2927 
2928  mutex_exit(&(log_sys->mutex));
2929 
2930  return(DB_SUCCESS);
2931 }
2932 
2933 /****************************************************************/
2936 UNIV_INTERN
2937 ulint
2939 /*==========================*/
2940 {
2941 loop:
2942  mutex_enter(&(log_sys->mutex));
2943 
2944  if (log_sys->archiving_state == LOG_ARCH_STOPPED
2945  || log_sys->archiving_state == LOG_ARCH_OFF) {
2946 
2947  log_sys->archiving_state = LOG_ARCH_OFF;
2948 
2949  os_event_set(log_sys->archiving_on);
2950 
2951  mutex_exit(&(log_sys->mutex));
2952 
2953  return(DB_SUCCESS);
2954  }
2955 
2956  mutex_exit(&(log_sys->mutex));
2957 
2958  log_archive_stop();
2959 
2960  os_thread_sleep(500000);
2961 
2962  goto loop;
2963 }
2964 
2965 /****************************************************************/
2968 UNIV_INTERN
2969 ulint
2971 /*========================*/
2972 {
2973  mutex_enter(&(log_sys->mutex));
2974 
2975  if (log_sys->archiving_state == LOG_ARCH_OFF) {
2976 
2977  log_sys->archiving_state = LOG_ARCH_ON;
2978 
2979  log_sys->archived_lsn
2980  = ut_uint64_align_down(log_sys->lsn,
2982  mutex_exit(&(log_sys->mutex));
2983 
2984  return(DB_SUCCESS);
2985  }
2986 
2987  mutex_exit(&(log_sys->mutex));
2988 
2989  return(DB_ERROR);
2990 }
2991 
2992 /****************************************************************/
2996 static
2997 void
2998 log_archive_margin(void)
2999 /*====================*/
3000 {
3001  log_t* log = log_sys;
3002  ulint age;
3003  ibool sync;
3004  ulint dummy;
3005 loop:
3006  mutex_enter(&(log->mutex));
3007 
3008  if (log->archiving_state == LOG_ARCH_OFF) {
3009  mutex_exit(&(log->mutex));
3010 
3011  return;
3012  }
3013 
3014  age = log->lsn - log->archived_lsn;
3015 
3016  if (age > log->max_archived_lsn_age) {
3017 
3018  /* An archiving is urgent: we have to do synchronous i/o */
3019 
3020  sync = TRUE;
3021 
3022  } else if (age > log->max_archived_lsn_age_async) {
3023 
3024  /* An archiving is not urgent: we do asynchronous i/o */
3025 
3026  sync = FALSE;
3027  } else {
3028  /* No archiving required yet */
3029 
3030  mutex_exit(&(log->mutex));
3031 
3032  return;
3033  }
3034 
3035  mutex_exit(&(log->mutex));
3036 
3037  log_archive_do(sync, &dummy);
3038 
3039  if (sync == TRUE) {
3040  /* Check again that enough was written to the archive */
3041 
3042  goto loop;
3043  }
3044 }
3045 #endif /* UNIV_LOG_ARCHIVE */
3046 
3047 /********************************************************************/
3052 UNIV_INTERN
3053 void
3055 /*===================*/
3056 {
3057 loop:
3058  log_flush_margin();
3059 
3060  log_checkpoint_margin();
3061 
3062 #ifdef UNIV_LOG_ARCHIVE
3063  log_archive_margin();
3064 #endif /* UNIV_LOG_ARCHIVE */
3065 
3066  mutex_enter(&(log_sys->mutex));
3067  ut_ad(!recv_no_log_write);
3068 
3069  if (log_sys->check_flush_or_checkpoint) {
3070 
3071  mutex_exit(&(log_sys->mutex));
3072 
3073  goto loop;
3074  }
3075 
3076  mutex_exit(&(log_sys->mutex));
3077 }
3078 
3079 /****************************************************************/
3084 UNIV_INTERN
3085 void
3087 /*=======================================*/
3088 {
3089  lsn_t lsn;
3090  ulint arch_log_no;
3091  ulint count = 0;
3092  ulint total_trx;
3093  ulint pending_io;
3094  enum srv_thread_type active_thd;
3095  const char* thread_name;
3096  ibool server_busy;
3097 
3098  ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
3099 
3100  /* Wait until the master thread and all other operations are idle: our
3101  algorithm only works if the server is idle at shutdown */
3102 
3104 loop:
3105  os_thread_sleep(100000);
3106 
3107  count++;
3108 
3109  /* We need the monitor threads to stop before we proceed with
3110  a shutdown. */
3111 
3112  thread_name = srv_any_background_threads_are_active();
3113 
3114  if (thread_name != NULL) {
3115  /* Print a message every 60 seconds if we are waiting
3116  for the monitor thread to exit. Master and worker
3117  threads check will be done later. */
3118 
3119  if (srv_print_verbose_log && count > 600) {
3120  ib_logf(IB_LOG_LEVEL_INFO,
3121  "Waiting for %s to exit", thread_name);
3122  count = 0;
3123  }
3124 
3125  goto loop;
3126  }
3127 
3128  /* Check that there are no longer transactions, except for
3129  PREPARED ones. We need this wait even for the 'very fast'
3130  shutdown, because the InnoDB layer may have committed or
3131  prepared transactions and we don't want to lose them. */
3132 
3133  total_trx = trx_sys_any_active_transactions();
3134 
3135  if (total_trx > 0) {
3136 
3137  if (srv_print_verbose_log && count > 600) {
3138  ib_logf(IB_LOG_LEVEL_INFO,
3139  "Waiting for %lu active transactions to finish",
3140  (ulong) total_trx);
3141 
3142  count = 0;
3143  }
3144 
3145  goto loop;
3146  }
3147 
3148  /* Check that the background threads are suspended */
3149 
3150  active_thd = srv_get_active_thread_type();
3151 
3152  if (active_thd != SRV_NONE) {
3153 
3154  if (active_thd == SRV_PURGE) {
3155  srv_purge_wakeup();
3156  }
3157 
3158  /* The srv_lock_timeout_thread, srv_error_monitor_thread
3159  and srv_monitor_thread should already exit by now. The
3160  only threads to be suspended are the master threads
3161  and worker threads (purge threads). Print the thread
3162  type if any of such threads not in suspended mode */
3163  if (srv_print_verbose_log && count > 600) {
3164  const char* thread_type = "<null>";
3165 
3166  switch (active_thd) {
3167  case SRV_NONE:
3168  /* This shouldn't happen because we've
3169  already checked for this case before
3170  entering the if(). We handle it here
3171  to avoid a compiler warning. */
3172  ut_error;
3173  case SRV_WORKER:
3174  thread_type = "worker threads";
3175  break;
3176  case SRV_MASTER:
3177  thread_type = "master thread";
3178  break;
3179  case SRV_PURGE:
3180  thread_type = "purge thread";
3181  break;
3182  }
3183 
3184  ib_logf(IB_LOG_LEVEL_INFO,
3185  "Waiting for %s to be suspended",
3186  thread_type);
3187  count = 0;
3188  }
3189 
3190  goto loop;
3191  }
3192 
3193  /* At this point only page_cleaner should be active. We wait
3194  here to let it complete the flushing of the buffer pools
3195  before proceeding further. */
3197  count = 0;
3198  while (buf_page_cleaner_is_active) {
3199  ++count;
3200  os_thread_sleep(100000);
3201  if (srv_print_verbose_log && count > 600) {
3202  ib_logf(IB_LOG_LEVEL_INFO,
3203  "Waiting for page_cleaner to "
3204  "finish flushing of buffer pool");
3205  count = 0;
3206  }
3207  }
3208 
3209  mutex_enter(&log_sys->mutex);
3210  server_busy = log_sys->n_pending_checkpoint_writes
3211 #ifdef UNIV_LOG_ARCHIVE
3212  || log_sys->n_pending_archive_ios
3213 #endif /* UNIV_LOG_ARCHIVE */
3214  || log_sys->n_pending_writes;
3215  mutex_exit(&log_sys->mutex);
3216 
3217  if (server_busy) {
3218  if (srv_print_verbose_log && count > 600) {
3219  ib_logf(IB_LOG_LEVEL_INFO,
3220  "Pending checkpoint_writes: %lu. "
3221  "Pending log flush writes: %lu",
3222  (ulong) log_sys->n_pending_checkpoint_writes,
3223  (ulong) log_sys->n_pending_writes);
3224  count = 0;
3225  }
3226  goto loop;
3227  }
3228 
3229  pending_io = buf_pool_check_no_pending_io();
3230 
3231  if (pending_io) {
3232  if (srv_print_verbose_log && count > 600) {
3233  ib_logf(IB_LOG_LEVEL_INFO,
3234  "Waiting for %lu buffer page I/Os to complete",
3235  (ulong) pending_io);
3236  count = 0;
3237  }
3238 
3239  goto loop;
3240  }
3241 
3242 #ifdef UNIV_LOG_ARCHIVE
3243  log_archive_all();
3244 #endif /* UNIV_LOG_ARCHIVE */
3245  if (srv_fast_shutdown == 2) {
3246  if (!srv_read_only_mode) {
3247  ib_logf(IB_LOG_LEVEL_INFO,
3248  "MySQL has requested a very fast shutdown "
3249  "without flushing the InnoDB buffer pool to "
3250  "data files. At the next mysqld startup "
3251  "InnoDB will do a crash recovery!");
3252 
3253  /* In this fastest shutdown we do not flush the
3254  buffer pool:
3255 
3256  it is essentially a 'crash' of the InnoDB server.
3257  Make sure that the log is all flushed to disk, so
3258  that we can recover all committed transactions in
3259  a crash recovery. We must not write the lsn stamps
3260  to the data files, since at a startup InnoDB deduces
3261  from the stamps if the previous shutdown was clean. */
3262 
3264 
3265  /* Check that the background threads stay suspended */
3266  thread_name = srv_any_background_threads_are_active();
3267 
3268  if (thread_name != NULL) {
3269  ib_logf(IB_LOG_LEVEL_WARN,
3270  "Background thread %s woke up "
3271  "during shutdown", thread_name);
3272  goto loop;
3273  }
3274  }
3275 
3277 
3279 
3280  thread_name = srv_any_background_threads_are_active();
3281 
3282  ut_a(!thread_name);
3283 
3284  return;
3285  }
3286 
3287  if (!srv_read_only_mode) {
3288  log_make_checkpoint_at(LSN_MAX, TRUE);
3289  }
3290 
3291  mutex_enter(&log_sys->mutex);
3292 
3293  lsn = log_sys->lsn;
3294 
3295  if (lsn != log_sys->last_checkpoint_lsn
3296 #ifdef UNIV_LOG_ARCHIVE
3297  || (srv_log_archive_on
3298  && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
3299 #endif /* UNIV_LOG_ARCHIVE */
3300  ) {
3301 
3302  mutex_exit(&log_sys->mutex);
3303 
3304  goto loop;
3305  }
3306 
3307  arch_log_no = 0;
3308 
3309 #ifdef UNIV_LOG_ARCHIVE
3310  UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
3311 
3312  if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
3313 
3314  arch_log_no--;
3315  }
3316 
3317  log_archive_close_groups(TRUE);
3318 #endif /* UNIV_LOG_ARCHIVE */
3319 
3320  mutex_exit(&log_sys->mutex);
3321 
3322  /* Check that the background threads stay suspended */
3323  thread_name = srv_any_background_threads_are_active();
3324  if (thread_name != NULL) {
3325  ib_logf(IB_LOG_LEVEL_WARN,
3326  "Background thread %s woke up during shutdown",
3327  thread_name);
3328 
3329  goto loop;
3330  }
3331 
3332  if (!srv_read_only_mode) {
3335  }
3336 
3337  /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
3338  pool: therefore it is essential that the buffer pool has been
3339  completely flushed to disk! (We do not call fil_write... if the
3340  'very fast' shutdown is enabled.) */
3341 
3342  if (!buf_all_freed()) {
3343 
3344  if (srv_print_verbose_log && count > 600) {
3345  ib_logf(IB_LOG_LEVEL_INFO,
3346  "Waiting for dirty buffer pages to be flushed");
3347  count = 0;
3348  }
3349 
3350  goto loop;
3351  }
3352 
3354 
3355  /* Make some checks that the server really is quiet */
3357  ut_a(type == SRV_NONE);
3358 
3359  bool freed = buf_all_freed();
3360  ut_a(freed);
3361 
3362  ut_a(lsn == log_sys->lsn);
3363 
3364  if (lsn < srv_start_lsn) {
3365  ib_logf(IB_LOG_LEVEL_ERROR,
3366  "Log sequence number at shutdown " LSN_PF " "
3367  "is lower than at startup " LSN_PF "!",
3368  lsn, srv_start_lsn);
3369  }
3370 
3372 
3373  if (!srv_read_only_mode) {
3374  fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
3375 
3377  }
3378 
3380 
3381  /* Make some checks that the server really is quiet */
3382  type = srv_get_active_thread_type();
3383  ut_a(type == SRV_NONE);
3384 
3385  freed = buf_all_freed();
3386  ut_a(freed);
3387 
3388  ut_a(lsn == log_sys->lsn);
3389 }
3390 
3391 #ifdef UNIV_LOG_DEBUG
3392 /******************************************************/
3395 UNIV_INTERN
3396 ibool
3397 log_check_log_recs(
3398 /*===============*/
3399  const byte* buf,
3402  ulint len,
3403  ib_uint64_t buf_start_lsn)
3404 {
3405  ib_uint64_t contiguous_lsn;
3406  ib_uint64_t scanned_lsn;
3407  const byte* start;
3408  const byte* end;
3409  byte* buf1;
3410  byte* scan_buf;
3411 
3412  ut_ad(mutex_own(&(log_sys->mutex)));
3413 
3414  if (len == 0) {
3415 
3416  return(TRUE);
3417  }
3418 
3419  start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
3420  end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
3421 
3422  buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
3423  scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
3424 
3425  ut_memcpy(scan_buf, start, end - start);
3426 
3428  - (recv_n_pool_free_frames * srv_buf_pool_instances))
3429  * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
3430  ut_uint64_align_down(buf_start_lsn,
3432  &contiguous_lsn, &scanned_lsn);
3433 
3434  ut_a(scanned_lsn == buf_start_lsn + len);
3435  ut_a(recv_sys->recovered_lsn == scanned_lsn);
3436 
3437  mem_free(buf1);
3438 
3439  return(TRUE);
3440 }
3441 #endif /* UNIV_LOG_DEBUG */
3442 
3443 /******************************************************/
3446 UNIV_INTERN
3447 ibool
3449 /*=========*/
3450  lsn_t* lsn)
3451 {
3452  if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
3453  *lsn = log_sys->lsn;
3454 
3455  mutex_exit(&(log_sys->mutex));
3456 
3457  return(TRUE);
3458  }
3459 
3460  return(FALSE);
3461 }
3462 
3463 /******************************************************/
3465 UNIV_INTERN
3466 void
3468 /*======*/
3469  FILE* file)
3470 {
3471  double time_elapsed;
3472  time_t current_time;
3473 
3474  mutex_enter(&(log_sys->mutex));
3475 
3476  fprintf(file,
3477  "Log sequence number " LSN_PF "\n"
3478  "Log flushed up to " LSN_PF "\n"
3479  "Pages flushed up to " LSN_PF "\n"
3480  "Last checkpoint at " LSN_PF "\n",
3481  log_sys->lsn,
3482  log_sys->flushed_to_disk_lsn,
3483  log_buf_pool_get_oldest_modification(),
3484  log_sys->last_checkpoint_lsn);
3485 
3486  current_time = time(NULL);
3487 
3488  time_elapsed = difftime(current_time,
3489  log_sys->last_printout_time);
3490 
3491  if (time_elapsed <= 0) {
3492  time_elapsed = 1;
3493  }
3494 
3495  fprintf(file,
3496  "%lu pending log writes, %lu pending chkp writes\n"
3497  "%lu log i/o's done, %.2f log i/o's/second\n",
3498  (ulong) log_sys->n_pending_writes,
3499  (ulong) log_sys->n_pending_checkpoint_writes,
3500  (ulong) log_sys->n_log_ios,
3501  ((double)(log_sys->n_log_ios - log_sys->n_log_ios_old)
3502  / time_elapsed));
3503 
3504  log_sys->n_log_ios_old = log_sys->n_log_ios;
3505  log_sys->last_printout_time = current_time;
3506 
3507  mutex_exit(&(log_sys->mutex));
3508 }
3509 
3510 /**********************************************************************/
3512 UNIV_INTERN
3513 void
3515 /*===================*/
3516 {
3517  log_sys->n_log_ios_old = log_sys->n_log_ios;
3518  log_sys->last_printout_time = time(NULL);
3519 }
3520 
3521 /********************************************************/
3523 static
3524 void
3525 log_group_close(
3526 /*===========*/
3527  log_group_t* group) /* in,own: log group to close */
3528 {
3529  ulint i;
3530 
3531  for (i = 0; i < group->n_files; i++) {
3532  mem_free(group->file_header_bufs_ptr[i]);
3533 #ifdef UNIV_LOG_ARCHIVE
3534  mem_free(group->archive_file_header_bufs_ptr[i]);
3535 #endif /* UNIV_LOG_ARCHIVE */
3536  }
3537 
3539  mem_free(group->file_header_bufs);
3540 
3541 #ifdef UNIV_LOG_ARCHIVE
3542  mem_free(group->archive_file_header_bufs_ptr);
3543  mem_free(group->archive_file_header_bufs);
3544 #endif /* UNIV_LOG_ARCHIVE */
3545 
3546  mem_free(group->checkpoint_buf_ptr);
3547 
3548  mem_free(group);
3549 }
3550 
3551 /********************************************************/
3553 UNIV_INTERN
3554 void
3556 /*=====================*/
3557 {
3558  log_group_t* group;
3559 
3560  group = UT_LIST_GET_FIRST(log_sys->log_groups);
3561 
3562  while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
3563  log_group_t* prev_group = group;
3564 
3565  group = UT_LIST_GET_NEXT(log_groups, group);
3566  UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
3567 
3568  log_group_close(prev_group);
3569  }
3570 }
3571 
3572 /********************************************************/
3574 UNIV_INTERN
3575 void
3577 /*==============*/
3578 {
3580 
3581  mem_free(log_sys->buf_ptr);
3582  log_sys->buf_ptr = NULL;
3583  log_sys->buf = NULL;
3584  mem_free(log_sys->checkpoint_buf_ptr);
3585  log_sys->checkpoint_buf_ptr = NULL;
3586  log_sys->checkpoint_buf = NULL;
3587 
3588  os_event_free(log_sys->no_flush_event);
3589  os_event_free(log_sys->one_flushed_event);
3590 
3591  rw_lock_free(&log_sys->checkpoint_lock);
3592 
3593  mutex_free(&log_sys->mutex);
3594 
3595 #ifdef UNIV_LOG_ARCHIVE
3596  rw_lock_free(&log_sys->archive_lock);
3597  os_event_create();
3598 #endif /* UNIV_LOG_ARCHIVE */
3599 
3600 #ifdef UNIV_LOG_DEBUG
3601  recv_sys_debug_free();
3602 #endif
3603 
3604  recv_sys_close();
3605 }
3606 
3607 /********************************************************/
3609 UNIV_INTERN
3610 void
3612 /*==============*/
3613 {
3614  if (log_sys != NULL) {
3616  mem_free(log_sys);
3617 
3618  log_sys = NULL;
3619  }
3620 }
3621 #endif /* !UNIV_HOTBACKUP */