MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trx0trx.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "trx0trx.h"
27 
28 #ifdef UNIV_NONINL
29 #include "trx0trx.ic"
30 #endif
31 
32 #include "trx0undo.h"
33 #include "trx0rseg.h"
34 #include "log0log.h"
35 #include "que0que.h"
36 #include "lock0lock.h"
37 #include "trx0roll.h"
38 #include "usr0sess.h"
39 #include "read0read.h"
40 #include "srv0srv.h"
41 #include "srv0start.h"
42 #include "btr0sea.h"
43 #include "os0proc.h"
44 #include "trx0xa.h"
45 #include "trx0rec.h"
46 #include "trx0purge.h"
47 #include "ha_prototypes.h"
48 #include "srv0mon.h"
49 #include "ut0vec.h"
50 
51 #include<set>
52 
54 typedef std::set<table_id_t> table_id_set;
55 
57 UNIV_INTERN sess_t* trx_dummy_sess = NULL;
58 
59 #ifdef UNIV_PFS_MUTEX
60 /* Key to register the mutex with performance schema */
61 UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
62 /* Key to register the mutex with performance schema */
63 UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
64 #endif /* UNIV_PFS_MUTEX */
65 
66 /*************************************************************/
68 UNIV_INTERN
69 void
71 /*===================*/
72  trx_t* trx,
73  const char* msg)
74 {
75  ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
76 }
77 
78 /*************************************************************/
81 UNIV_INTERN
82 void
84 /*=============================*/
85  trx_t* trx,
86  FILE* file)
87 {
89  sizeof(trx->detailed_error));
90 }
91 
92 /****************************************************************/
97 static
98 trx_t*
99 trx_create(void)
100 /*============*/
101 {
102  trx_t* trx;
103  mem_heap_t* heap;
104  ib_alloc_t* heap_alloc;
105 
106  trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
107 
108  mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
109 
110  trx->magic_n = TRX_MAGIC_N;
111 
112  trx->state = TRX_STATE_NOT_STARTED;
113 
114  trx->isolation_level = TRX_ISO_REPEATABLE_READ;
115 
116  trx->no = TRX_ID_MAX;
117 
118  trx->support_xa = TRUE;
119 
120  trx->check_foreigns = TRUE;
121  trx->check_unique_secondary = TRUE;
122 
124 
125  mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
126 
127  trx->error_state = DB_SUCCESS;
128 
130 
132  256, MEM_HEAP_FOR_LOCK_HEAP);
133 
135 
137 
138  trx->xid.formatID = -1;
139 
140  trx->op_info = "";
141 
142  heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
143  heap_alloc = ib_heap_allocator_create(heap);
144 
145  /* Remember to free the vector explicitly in trx_free(). */
146  trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
147 
148  /* Remember to free the vector explicitly in trx_free(). */
149  heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
150  heap_alloc = ib_heap_allocator_create(heap);
151 
152  trx->lock.table_locks = ib_vector_create(
153  heap_alloc, sizeof(void**), 32);
154 
155  return(trx);
156 }
157 
158 /********************************************************************/
161 UNIV_INTERN
162 trx_t*
164 /*=============================*/
165 {
166  trx_t* trx;
167 
168  trx = trx_create();
169 
170  trx->sess = trx_dummy_sess;
171 
172  return(trx);
173 }
174 
175 /********************************************************************/
178 UNIV_INTERN
179 trx_t*
181 /*========================*/
182 {
183  trx_t* trx;
184 
186 
187  mutex_enter(&trx_sys->mutex);
188 
189  ut_d(trx->in_mysql_trx_list = TRUE);
190  UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
191 
192  mutex_exit(&trx_sys->mutex);
193 
194  return(trx);
195 }
196 
197 /********************************************************************/
199 static
200 void
201 trx_free(
202 /*=====*/
203  trx_t* trx)
204 {
205  ut_a(trx->magic_n == TRX_MAGIC_N);
206  ut_ad(!trx->in_ro_trx_list);
207  ut_ad(!trx->in_rw_trx_list);
208  ut_ad(!trx->in_mysql_trx_list);
209 
210  mutex_free(&trx->undo_mutex);
211 
212  if (trx->undo_no_arr != NULL) {
214  }
215 
216  ut_a(trx->lock.wait_lock == NULL);
217  ut_a(trx->lock.wait_thr == NULL);
218 
219  ut_a(!trx->has_search_latch);
220 
221  ut_a(trx->dict_operation_lock_mode == 0);
222 
223  if (trx->lock.lock_heap) {
225  }
226 
227  ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
228 
229  if (trx->global_read_view_heap) {
231  }
232 
233  ut_a(ib_vector_is_empty(trx->autoinc_locks));
234  /* We allocated a dedicated heap for the vector. */
235  ib_vector_free(trx->autoinc_locks);
236 
237  if (trx->lock.table_locks != NULL) {
238  /* We allocated a dedicated heap for the vector. */
239  ib_vector_free(trx->lock.table_locks);
240  }
241 
242  mutex_free(&trx->mutex);
243 
244  mem_free(trx);
245 }
246 
247 /********************************************************************/
249 UNIV_INTERN
250 void
252 /*====================*/
253  trx_t* trx)
254 {
255  if (trx->declared_to_be_inside_innodb) {
256 
257  ib_logf(IB_LOG_LEVEL_ERROR,
258  "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
259  "to be processing inside InnoDB", trx, trx->id);
260 
261  trx_print(stderr, trx, 600);
262  putc('\n', stderr);
263 
264  /* This is an error but not a fatal error. We must keep
265  the counters like srv_conc_n_threads accurate. */
267  }
268 
269  if (trx->n_mysql_tables_in_use != 0
270  || trx->mysql_n_tables_locked != 0) {
271 
272  ib_logf(IB_LOG_LEVEL_ERROR,
273  "MySQL is freeing a thd though "
274  "trx->n_mysql_tables_in_use is %lu and "
275  "trx->mysql_n_tables_locked is %lu.",
276  (ulong) trx->n_mysql_tables_in_use,
277  (ulong) trx->mysql_n_tables_locked);
278 
279  trx_print(stderr, trx, 600);
280  ut_print_buf(stderr, trx, sizeof(trx_t));
281  putc('\n', stderr);
282  }
283 
284  ut_a(trx->state == TRX_STATE_NOT_STARTED);
285  ut_a(trx->insert_undo == NULL);
286  ut_a(trx->update_undo == NULL);
287  ut_a(trx->read_view == NULL);
288 
289  trx_free(trx);
290 }
291 
292 /********************************************************************/
294 UNIV_INTERN
295 void
297 /*==============*/
298  trx_t* trx)
299 {
300  ut_ad(mutex_own(&trx_sys->mutex));
301 
302  ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
303  ut_a(trx->magic_n == TRX_MAGIC_N);
304 
306 
308 
309  ut_a(!trx->read_only);
310 
311  UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
312  ut_d(trx->in_rw_trx_list = FALSE);
313 
314  /* Undo trx_resurrect_table_locks(). */
316 
317  trx_free(trx);
318 }
319 
320 /********************************************************************/
322 UNIV_INTERN
323 void
325 /*===============*/
326  trx_t* trx)
327 {
328  mutex_enter(&trx_sys->mutex);
329 
330  ut_ad(trx->in_mysql_trx_list);
331  ut_d(trx->in_mysql_trx_list = FALSE);
332  UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
333 
334  ut_ad(trx_sys_validate_trx_list());
335 
336  mutex_exit(&trx_sys->mutex);
337 
339 }
340 
341 /****************************************************************/
346 static
347 void
348 trx_list_rw_insert_ordered(
349 /*=======================*/
350  trx_t* trx)
351 {
352  trx_t* trx2;
353 
354  ut_ad(!trx->read_only);
355 
356  ut_d(trx->start_file = __FILE__);
357  ut_d(trx->start_line = __LINE__);
358 
360  ut_ad(!trx->in_ro_trx_list);
361  ut_ad(!trx->in_rw_trx_list);
362  ut_ad(trx->state != TRX_STATE_NOT_STARTED);
363  ut_ad(trx->is_recovered);
364 
365  for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
366  trx2 != NULL;
367  trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
368 
369  assert_trx_in_rw_list(trx2);
370 
371  if (trx->id >= trx2->id) {
372 
373  ut_ad(trx->id > trx2->id);
374  break;
375  }
376  }
377 
378  if (trx2 != NULL) {
379  trx2 = UT_LIST_GET_PREV(trx_list, trx2);
380 
381  if (trx2 == NULL) {
382  UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
383  ut_d(trx_sys->rw_max_trx_id = trx->id);
384  } else {
386  trx_list, trx_sys->rw_trx_list, trx2, trx);
387  }
388  } else {
389  UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
390  }
391 
392  ut_ad(!trx->in_rw_trx_list);
393  ut_d(trx->in_rw_trx_list = TRUE);
394 }
395 
396 /****************************************************************/
398 static
399 void
400 trx_resurrect_table_locks(
401 /*======================*/
402  trx_t* trx,
403  const trx_undo_t* undo)
404 {
405  mtr_t mtr;
406  page_t* undo_page;
407  trx_undo_rec_t* undo_rec;
408  table_id_set tables;
409 
410  ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
411 
412  if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
413  || undo->empty) {
414  return;
415  }
416 
417  mtr_start(&mtr);
418  /* trx_rseg_mem_create() may have acquired an X-latch on this
419  page, so we cannot acquire an S-latch. */
420  undo_page = trx_undo_page_get(
421  undo->space, undo->zip_size, undo->top_page_no, &mtr);
422  undo_rec = undo_page + undo->top_offset;
423 
424  do {
425  ulint type;
426  ulint cmpl_info;
427  bool updated_extern;
428  undo_no_t undo_no;
429  table_id_t table_id;
430 
431  page_t* undo_rec_page = page_align(undo_rec);
432 
433  if (undo_rec_page != undo_page) {
434  if (!mtr_memo_release(&mtr,
435  buf_block_align(undo_page),
436  MTR_MEMO_PAGE_X_FIX)) {
437  /* The page of the previous undo_rec
438  should have been latched by
439  trx_undo_page_get() or
440  trx_undo_get_prev_rec(). */
441  ut_ad(0);
442  }
443 
444  undo_page = undo_rec_page;
445  }
446 
448  undo_rec, &type, &cmpl_info,
449  &updated_extern, &undo_no, &table_id);
450  tables.insert(table_id);
451 
452  undo_rec = trx_undo_get_prev_rec(
453  undo_rec, undo->hdr_page_no,
454  undo->hdr_offset, false, &mtr);
455  } while (undo_rec);
456 
457  mtr_commit(&mtr);
458 
459  for (table_id_set::const_iterator i = tables.begin();
460  i != tables.end(); i++) {
462  *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
463  if (table->ibd_file_missing
465  mutex_enter(&dict_sys->mutex);
466  dict_table_close(table, TRUE, FALSE);
468  mutex_exit(&dict_sys->mutex);
469  continue;
470  }
471 
473 
474  DBUG_PRINT("ib_trx",
475  ("resurrect" TRX_ID_FMT
476  " table '%s' IX lock from %s undo",
477  trx->id, table->name,
478  undo == trx->insert_undo
479  ? "insert" : "update"));
480 
481  dict_table_close(table, FALSE, FALSE);
482  }
483  }
484 }
485 
486 /****************************************************************/
490 static
491 trx_t*
492 trx_resurrect_insert(
493 /*=================*/
494  trx_undo_t* undo,
495  trx_rseg_t* rseg)
496 {
497  trx_t* trx;
498 
500 
501  trx->rseg = rseg;
502  trx->xid = undo->xid;
503  trx->id = undo->trx_id;
504  trx->insert_undo = undo;
505  trx->is_recovered = TRUE;
506 
507  /* This is single-threaded startup code, we do not need the
508  protection of trx->mutex or trx_sys->mutex here. */
509 
510  if (undo->state != TRX_UNDO_ACTIVE) {
511 
512  /* Prepared transactions are left in the prepared state
513  waiting for a commit or abort decision from MySQL */
514 
515  if (undo->state == TRX_UNDO_PREPARED) {
516 
517  fprintf(stderr,
518  "InnoDB: Transaction " TRX_ID_FMT " was in the"
519  " XA prepared state.\n", trx->id);
520 
521  if (srv_force_recovery == 0) {
522 
523  trx->state = TRX_STATE_PREPARED;
526  } else {
527  fprintf(stderr,
528  "InnoDB: Since innodb_force_recovery"
529  " > 0, we will rollback it anyway.\n");
530 
531  trx->state = TRX_STATE_ACTIVE;
532  }
533  } else {
534  trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
535  }
536 
537  /* We give a dummy value for the trx no; this should have no
538  relevance since purge is not interested in committed
539  transaction numbers, unless they are in the history
540  list, in which case it looks the number from the disk based
541  undo log structure */
542 
543  trx->no = trx->id;
544  } else {
545  trx->state = TRX_STATE_ACTIVE;
546 
547  /* A running transaction always has the number
548  field inited to TRX_ID_MAX */
549 
550  trx->no = TRX_ID_MAX;
551  }
552 
553  if (undo->dict_operation) {
555  trx->table_id = undo->table_id;
556  }
557 
558  if (!undo->empty) {
559  trx->undo_no = undo->top_undo_no + 1;
560  }
561 
562  return(trx);
563 }
564 
565 /****************************************************************/
568 static
569 void
570 trx_resurrect_update_in_prepared_state(
571 /*===================================*/
572  trx_t* trx,
573  const trx_undo_t* undo)
574 {
575  /* This is single-threaded startup code, we do not need the
576  protection of trx->mutex or trx_sys->mutex here. */
577 
578  if (undo->state == TRX_UNDO_PREPARED) {
579  fprintf(stderr,
580  "InnoDB: Transaction " TRX_ID_FMT
581  " was in the XA prepared state.\n", trx->id);
582 
583  if (srv_force_recovery == 0) {
584  if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
587  } else {
588  ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
589  }
590 
591  trx->state = TRX_STATE_PREPARED;
592  } else {
593  fprintf(stderr,
594  "InnoDB: Since innodb_force_recovery"
595  " > 0, we will rollback it anyway.\n");
596 
597  trx->state = TRX_STATE_ACTIVE;
598  }
599  } else {
600  trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
601  }
602 }
603 
604 /****************************************************************/
607 static
608 void
609 trx_resurrect_update(
610 /*=================*/
611  trx_t* trx,
612  trx_undo_t* undo,
613  trx_rseg_t* rseg)
614 {
615  trx->rseg = rseg;
616  trx->xid = undo->xid;
617  trx->id = undo->trx_id;
618  trx->update_undo = undo;
619  trx->is_recovered = TRUE;
620 
621  /* This is single-threaded startup code, we do not need the
622  protection of trx->mutex or trx_sys->mutex here. */
623 
624  if (undo->state != TRX_UNDO_ACTIVE) {
625  trx_resurrect_update_in_prepared_state(trx, undo);
626 
627  /* We give a dummy value for the trx number */
628 
629  trx->no = trx->id;
630 
631  } else {
632  trx->state = TRX_STATE_ACTIVE;
633 
634  /* A running transaction always has the number field inited to
635  TRX_ID_MAX */
636 
637  trx->no = TRX_ID_MAX;
638  }
639 
640  if (undo->dict_operation) {
642  trx->table_id = undo->table_id;
643  }
644 
645  if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
646 
647  trx->undo_no = undo->top_undo_no + 1;
648  }
649 }
650 
651 /****************************************************************/
657 UNIV_INTERN
658 void
660 /*============================*/
661 {
662  ulint i;
663 
665 
668 
669  /* Look from the rollback segments if there exist undo logs for
670  transactions */
671 
672  for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
673  trx_undo_t* undo;
674  trx_rseg_t* rseg;
675 
676  rseg = trx_sys->rseg_array[i];
677 
678  if (rseg == NULL) {
679  continue;
680  }
681 
682  /* Resurrect transactions that were doing inserts. */
683  for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
684  undo != NULL;
685  undo = UT_LIST_GET_NEXT(undo_list, undo)) {
686  trx_t* trx;
687 
688  trx = trx_resurrect_insert(undo, rseg);
689 
690  trx_list_rw_insert_ordered(trx);
691 
692  trx_resurrect_table_locks(trx, undo);
693  }
694 
695  /* Ressurrect transactions that were doing updates. */
696  for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
697  undo != NULL;
698  undo = UT_LIST_GET_NEXT(undo_list, undo)) {
699  trx_t* trx;
700  ibool trx_created;
701 
702  /* Check the trx_sys->rw_trx_list first. */
703  mutex_enter(&trx_sys->mutex);
704  trx = trx_get_rw_trx_by_id(undo->trx_id);
705  mutex_exit(&trx_sys->mutex);
706 
707  if (trx == NULL) {
709  trx_created = TRUE;
710  } else {
711  trx_created = FALSE;
712  }
713 
714  trx_resurrect_update(trx, undo, rseg);
715 
716  if (trx_created) {
717  trx_list_rw_insert_ordered(trx);
718  }
719 
720  trx_resurrect_table_locks(trx, undo);
721  }
722  }
723 }
724 
725 /******************************************************************/
728 static
729 trx_rseg_t*
730 trx_assign_rseg_low(
731 /*================*/
732  ulong max_undo_logs,
733  ulint n_tablespaces)
734 {
735  ulint i;
736  trx_rseg_t* rseg;
737  static ulint latest_rseg = 0;
738 
740  ut_a(max_undo_logs == ULONG_UNDEFINED);
741  return(NULL);
742  }
743 
744  /* This breaks true round robin but that should be OK. */
745 
746  ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
747 
748  i = latest_rseg++;
749  i %= max_undo_logs;
750 
751  /* Note: The assumption here is that there can't be any gaps in
752  the array. Once we implement more flexible rollback segment
753  management this may not hold. The assertion checks for that case. */
754 
755  ut_a(trx_sys->rseg_array[0] != NULL);
756 
757  /* Skip the system tablespace if we have more than one tablespace
758  defined for rollback segments. We want all UNDO records to be in
759  the non-system tablespaces. */
760 
761  do {
762  rseg = trx_sys->rseg_array[i];
763  ut_a(rseg == NULL || i == rseg->id);
764 
765  i = (rseg == NULL) ? 0 : i + 1;
766 
767  } while (rseg == NULL
768  || (rseg->space == 0
769  && n_tablespaces > 0
770  && trx_sys->rseg_array[1] != NULL));
771 
772  return(rseg);
773 }
774 
775 /****************************************************************/
778 UNIV_INTERN
779 void
781 /*============*/
782  trx_t* trx)
784 {
785  ut_a(trx->rseg == 0);
786  ut_a(trx->read_only);
789 
790  trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
791 }
792 
793 /****************************************************************/
795 static
796 void
797 trx_start_low(
798 /*==========*/
799  trx_t* trx)
800 {
801  ut_ad(trx->rseg == NULL);
802 
803  ut_ad(trx->start_file != 0);
804  ut_ad(trx->start_line != 0);
805  ut_ad(!trx->is_recovered);
806  ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
807  ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
808 
809  /* Check whether it is an AUTOCOMMIT SELECT */
811 
812  trx->read_only =
813  (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
815 
816  if (!trx->auto_commit) {
817  ++trx->will_lock;
818  } else if (trx->will_lock == 0) {
819  trx->read_only = TRUE;
820  }
821 
822  if (!trx->read_only) {
823  trx->rseg = trx_assign_rseg_low(
824  srv_undo_logs, srv_undo_tablespaces);
825  }
826 
827  /* The initial value for trx->no: TRX_ID_MAX is used in
828  read_view_open_now: */
829 
830  trx->no = TRX_ID_MAX;
831 
832  ut_a(ib_vector_is_empty(trx->autoinc_locks));
834 
835  mutex_enter(&trx_sys->mutex);
836 
837  /* If this transaction came from trx_allocate_for_mysql(),
838  trx->in_mysql_trx_list would hold. In that case, the trx->state
839  change must be protected by the trx_sys->mutex, so that
840  lock_print_info_all_transactions() will have a consistent view. */
841 
842  trx->state = TRX_STATE_ACTIVE;
843 
844  trx->id = trx_sys_get_new_trx_id();
845 
846  ut_ad(!trx->in_rw_trx_list);
847  ut_ad(!trx->in_ro_trx_list);
848 
849  if (trx->read_only) {
850 
851  /* Note: The trx_sys_t::ro_trx_list doesn't really need to
852  be ordered, we should exploit this using a list type that
853  doesn't need a list wide lock to increase concurrency. */
854 
855  if (!trx_is_autocommit_non_locking(trx)) {
856  UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
857  ut_d(trx->in_ro_trx_list = TRUE);
858  }
859  } else {
860 
861  ut_ad(trx->rseg != NULL
863 
865  UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
866  ut_d(trx->in_rw_trx_list = TRUE);
867  ut_d(trx_sys->rw_max_trx_id = trx->id);
868  }
869 
870  ut_ad(trx_sys_validate_trx_list());
871 
872  mutex_exit(&trx_sys->mutex);
873 
874  trx->start_time = ut_time();
875 
876  MONITOR_INC(MONITOR_TRX_ACTIVE);
877 }
878 
879 /****************************************************************/
881 static
882 void
883 trx_serialisation_number_get(
884 /*=========================*/
885  trx_t* trx)
886 {
887  trx_rseg_t* rseg;
888 
889  rseg = trx->rseg;
890 
891  ut_ad(mutex_own(&rseg->mutex));
892 
893  mutex_enter(&trx_sys->mutex);
894 
895  trx->no = trx_sys_get_new_trx_id();
896 
897  /* If the rollack segment is not empty then the
898  new trx_t::no can't be less than any trx_t::no
899  already in the rollback segment. User threads only
900  produce events when a rollback segment is empty. */
901 
902  if (rseg->last_page_no == FIL_NULL) {
903  void* ptr;
904  rseg_queue_t rseg_queue;
905 
906  rseg_queue.rseg = rseg;
907  rseg_queue.trx_no = trx->no;
908 
909  mutex_enter(&purge_sys->bh_mutex);
910 
911  /* This is to reduce the pressure on the trx_sys_t::mutex
912  though in reality it should make very little (read no)
913  difference because this code path is only taken when the
914  rbs is empty. */
915 
916  mutex_exit(&trx_sys->mutex);
917 
918  ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
919  ut_a(ptr);
920 
921  mutex_exit(&purge_sys->bh_mutex);
922  } else {
923  mutex_exit(&trx_sys->mutex);
924  }
925 }
926 
927 /****************************************************************/
930 static __attribute__((nonnull))
931 void
932 trx_write_serialisation_history(
933 /*============================*/
934  trx_t* trx,
935  mtr_t* mtr)
936 {
937  trx_rseg_t* rseg;
938 
939  rseg = trx->rseg;
940 
941  /* Change the undo log segment states from TRX_UNDO_ACTIVE
942  to some other state: these modifications to the file data
943  structure define the transaction as committed in the file
944  based domain, at the serialization point of the log sequence
945  number lsn obtained below. */
946 
947  if (trx->update_undo != NULL) {
948  page_t* undo_hdr_page;
949  trx_undo_t* undo = trx->update_undo;
950 
951  /* We have to hold the rseg mutex because update
952  log headers have to be put to the history list in the
953  (serialisation) order of the UNDO trx number. This is
954  required for the purge in-memory data structures too. */
955 
956  mutex_enter(&rseg->mutex);
957 
958  /* Assign the transaction serialisation number and also
959  update the purge min binary heap if this is the first
960  UNDO log being written to the assigned rollback segment. */
961 
962  trx_serialisation_number_get(trx);
963 
964  /* It is not necessary to obtain trx->undo_mutex here
965  because only a single OS thread is allowed to do the
966  transaction commit for this transaction. */
967 
968  undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
969 
970  trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
971  } else {
972  mutex_enter(&rseg->mutex);
973  }
974 
975  if (trx->insert_undo != NULL) {
976  trx_undo_set_state_at_finish(trx->insert_undo, mtr);
977  }
978 
979  mutex_exit(&rseg->mutex);
980 
981  MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
982 
983  /* Update the latest MySQL binlog name and offset info
984  in trx sys header if MySQL binlogging is on or the database
985  server is a MySQL replication slave */
986 
987  if (trx->mysql_log_file_name
988  && trx->mysql_log_file_name[0] != '\0') {
989 
991  trx->mysql_log_file_name,
992  trx->mysql_log_offset,
994 
995  trx->mysql_log_file_name = NULL;
996  }
997 }
998 
999 /********************************************************************
1000 Finalize a transaction containing updates for a FTS table. */
1001 static __attribute__((nonnull))
1002 void
1003 trx_finalize_for_fts_table(
1004 /*=======================*/
1005  fts_trx_table_t* ftt) /* in: FTS trx table */
1006 {
1007  fts_t* fts = ftt->table->fts;
1008  fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
1009 
1010  mutex_enter(&fts->bg_threads_mutex);
1011 
1012  if (fts->fts_status & BG_THREAD_STOP) {
1013  /* The table is about to be dropped, no use
1014  adding anything to its work queue. */
1015 
1016  mutex_exit(&fts->bg_threads_mutex);
1017  } else {
1018  mem_heap_t* heap;
1019  mutex_exit(&fts->bg_threads_mutex);
1020 
1021  ut_a(fts->add_wq);
1022 
1023  heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
1024 
1025  ib_wqueue_add(fts->add_wq, doc_ids, heap);
1026 
1027  /* fts_trx_table_t no longer owns the list. */
1028  ftt->added_doc_ids = NULL;
1029  }
1030 }
1031 
1032 /******************************************************************/
1034 static __attribute__((nonnull))
1035 void
1036 trx_finalize_for_fts(
1037 /*=================*/
1038  trx_t* trx,
1039  bool is_commit)
1041 {
1042  if (is_commit) {
1043  const ib_rbt_node_t* node;
1044  ib_rbt_t* tables;
1045  fts_savepoint_t* savepoint;
1046 
1047  savepoint = static_cast<fts_savepoint_t*>(
1048  ib_vector_last(trx->fts_trx->savepoints));
1049 
1050  tables = savepoint->tables;
1051 
1052  for (node = rbt_first(tables);
1053  node;
1054  node = rbt_next(tables, node)) {
1055  fts_trx_table_t** ftt;
1056 
1057  ftt = rbt_value(fts_trx_table_t*, node);
1058 
1059  if ((*ftt)->added_doc_ids) {
1060  trx_finalize_for_fts_table(*ftt);
1061  }
1062  }
1063  }
1064 
1065  fts_trx_free(trx->fts_trx);
1066  trx->fts_trx = NULL;
1067 }
1068 
1069 /**********************************************************************/
1072 static
1073 void
1074 trx_flush_log_if_needed_low(
1075 /*========================*/
1076  lsn_t lsn)
1078 {
1079  switch (srv_flush_log_at_trx_commit) {
1080  case 0:
1081  /* Do nothing */
1082  break;
1083  case 1:
1084  /* Write the log and optionally flush it to disk */
1085  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
1086  srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
1087  break;
1088  case 2:
1089  /* Write the log but do not flush it to disk */
1090  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1091 
1092  break;
1093  default:
1094  ut_error;
1095  }
1096 }
1097 
1098 /**********************************************************************/
1101 static __attribute__((nonnull))
1102 void
1103 trx_flush_log_if_needed(
1104 /*====================*/
1105  lsn_t lsn,
1107  trx_t* trx)
1108 {
1109  trx->op_info = "flushing log";
1110  trx_flush_log_if_needed_low(lsn);
1111  trx->op_info = "";
1112 }
1113 
1114 /****************************************************************/
1116 static __attribute__((nonnull))
1117 void
1118 trx_commit_in_memory(
1119 /*=================*/
1120  trx_t* trx,
1121  lsn_t lsn)
1124 {
1125  trx->must_flush_log_later = FALSE;
1126 
1127  if (trx_is_autocommit_non_locking(trx)) {
1128  ut_ad(trx->read_only);
1129  ut_a(!trx->is_recovered);
1130  ut_ad(trx->rseg == NULL);
1131  ut_ad(!trx->in_ro_trx_list);
1132  ut_ad(!trx->in_rw_trx_list);
1133 
1134  /* Note: We are asserting without holding the lock mutex. But
1135  that is OK because this transaction is not waiting and cannot
1136  be rolled back and no new locks can (or should not) be added
1137  becuase it is flagged as a non-locking read-only transaction. */
1138 
1139  ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1140 
1141  /* This state change is not protected by any mutex, therefore
1142  there is an inherent race here around state transition during
1143  printouts. We ignore this race for the sake of efficiency.
1144  However, the trx_sys_t::mutex will protect the trx_t instance
1145  and it cannot be removed from the mysql_trx_list and freed
1146  without first acquiring the trx_sys_t::mutex. */
1147 
1148  ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1149 
1150  trx->state = TRX_STATE_NOT_STARTED;
1151 
1152  read_view_remove(trx->global_read_view, false);
1153 
1154  MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1155  } else {
1157 
1158  /* Remove the transaction from the list of active
1159  transactions now that it no longer holds any user locks. */
1160 
1161  ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1162 
1163  mutex_enter(&trx_sys->mutex);
1164 
1165  assert_trx_in_list(trx);
1166 
1167  if (trx->read_only) {
1168  UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
1169  ut_d(trx->in_ro_trx_list = FALSE);
1170  MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1171  } else {
1172  UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1173  ut_d(trx->in_rw_trx_list = FALSE);
1174  MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1175  }
1176 
1177  /* If this transaction came from trx_allocate_for_mysql(),
1178  trx->in_mysql_trx_list would hold. In that case, the
1179  trx->state change must be protected by trx_sys->mutex, so that
1180  lock_print_info_all_transactions() will have a consistent
1181  view. */
1182 
1183  trx->state = TRX_STATE_NOT_STARTED;
1184 
1185  /* We already own the trx_sys_t::mutex, by doing it here we
1186  avoid a potential context switch later. */
1187  read_view_remove(trx->global_read_view, true);
1188 
1189  ut_ad(trx_sys_validate_trx_list());
1190 
1191  mutex_exit(&trx_sys->mutex);
1192  }
1193 
1194  if (trx->global_read_view != NULL) {
1195 
1196  mem_heap_empty(trx->global_read_view_heap);
1197 
1198  trx->global_read_view = NULL;
1199  }
1200 
1201  trx->read_view = NULL;
1202 
1203  if (lsn) {
1204  if (trx->insert_undo != NULL) {
1205 
1207  }
1208 
1209  /* NOTE that we could possibly make a group commit more
1210  efficient here: call os_thread_yield here to allow also other
1211  trxs to come to commit! */
1212 
1213  /*-------------------------------------*/
1214 
1215  /* Depending on the my.cnf options, we may now write the log
1216  buffer to the log files, making the transaction durable if
1217  the OS does not crash. We may also flush the log files to
1218  disk, making the transaction durable also at an OS crash or a
1219  power outage.
1220 
1221  The idea in InnoDB's group commit is that a group of
1222  transactions gather behind a trx doing a physical disk write
1223  to log files, and when that physical write has been completed,
1224  one of those transactions does a write which commits the whole
1225  group. Note that this group commit will only bring benefit if
1226  there are > 2 users in the database. Then at least 2 users can
1227  gather behind one doing the physical log write to disk.
1228 
1229  If we are calling trx_commit() under prepare_commit_mutex, we
1230  will delay possible log write and flush to a separate function
1231  trx_commit_complete_for_mysql(), which is only called when the
1232  thread has released the mutex. This is to make the
1233  group commit algorithm to work. Otherwise, the prepare_commit
1234  mutex would serialize all commits and prevent a group of
1235  transactions from gathering. */
1236 
1237  if (trx->flush_log_later) {
1238  /* Do nothing yet */
1239  trx->must_flush_log_later = TRUE;
1240  } else if (srv_flush_log_at_trx_commit == 0
1241  || thd_requested_durability(trx->mysql_thd)
1242  == HA_IGNORE_DURABILITY) {
1243  /* Do nothing */
1244  } else {
1245  trx_flush_log_if_needed(lsn, trx);
1246  }
1247 
1248  trx->commit_lsn = lsn;
1249  }
1250 
1251  /* undo_no is non-zero if we're doing the final commit. */
1252  bool not_rollback = trx->undo_no != 0;
1253  /* Free all savepoints, starting from the first. */
1254  trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1255  trx_roll_savepoints_free(trx, savep);
1256 
1257  trx->rseg = NULL;
1258  trx->undo_no = 0;
1259  trx->last_sql_stat_start.least_undo_no = 0;
1260 
1261  trx->ddl = false;
1262 #ifdef UNIV_DEBUG
1263  ut_ad(trx->start_file != 0);
1264  ut_ad(trx->start_line != 0);
1265  trx->start_file = 0;
1266  trx->start_line = 0;
1267 #endif /* UNIV_DEBUG */
1268 
1269  trx->will_lock = 0;
1270  trx->read_only = FALSE;
1271  trx->auto_commit = FALSE;
1272 
1273  if (trx->fts_trx) {
1274  trx_finalize_for_fts(trx, not_rollback);
1275  }
1276 
1277  ut_ad(trx->lock.wait_thr == NULL);
1278  ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1279  ut_ad(!trx->in_ro_trx_list);
1280  ut_ad(!trx->in_rw_trx_list);
1281 
1282  trx->dict_operation = TRX_DICT_OP_NONE;
1283 
1284  trx->error_state = DB_SUCCESS;
1285 
1286  /* trx->in_mysql_trx_list would hold between
1287  trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
1288  hold for recovered transactions or system transactions. */
1289 }
1290 
1291 /****************************************************************/
1293 UNIV_INTERN
1294 void
1296 /*===========*/
1297  trx_t* trx,
1298  mtr_t* mtr)
1300 {
1301  lsn_t lsn;
1302 
1304  ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1305  ut_ad(!mtr || mtr->state == MTR_ACTIVE);
1306  ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
1307 
1308  /* undo_no is non-zero if we're doing the final commit. */
1309  if (trx->fts_trx && trx->undo_no != 0) {
1310  dberr_t error;
1311 
1313 
1314  error = fts_commit(trx);
1315 
1316  /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
1317  instead of dying. This is a possible scenario if there
1318  is a crash between insert to DELETED table committing
1319  and transaction committing. The fix would be able to
1320  return error from this function */
1321  if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
1322  /* FTS-FIXME: once we can return values from this
1323  function, we should do so and signal an error
1324  instead of just dying. */
1325 
1326  ut_error;
1327  }
1328  }
1329 
1330  if (mtr) {
1331  trx_write_serialisation_history(trx, mtr);
1332  /* The following call commits the mini-transaction, making the
1333  whole transaction committed in the file-based world, at this
1334  log sequence number. The transaction becomes 'durable' when
1335  we write the log to disk, but in the logical sense the commit
1336  in the file-based data structures (undo logs etc.) happens
1337  here.
1338 
1339  NOTE that transaction numbers, which are assigned only to
1340  transactions with an update undo log, do not necessarily come
1341  in exactly the same order as commit lsn's, if the transactions
1342  have different rollback segments. To get exactly the same
1343  order we should hold the kernel mutex up to this point,
1344  adding to the contention of the kernel mutex. However, if
1345  a transaction T2 is able to see modifications made by
1346  a transaction T1, T2 will always get a bigger transaction
1347  number and a bigger commit lsn than T1. */
1348 
1349  /*--------------*/
1350  mtr_commit(mtr);
1351  /*--------------*/
1352  lsn = mtr->end_lsn;
1353  } else {
1354  lsn = 0;
1355  }
1356 
1357  trx_commit_in_memory(trx, lsn);
1358 }
1359 
1360 /****************************************************************/
1362 UNIV_INTERN
1363 void
1364 trx_commit(
1365 /*=======*/
1366  trx_t* trx)
1367 {
1368  mtr_t local_mtr;
1369  mtr_t* mtr;
1370 
1371  if (trx->insert_undo || trx->update_undo) {
1372  mtr = &local_mtr;
1373  mtr_start(mtr);
1374  } else {
1375  mtr = NULL;
1376  }
1377 
1378  trx_commit_low(trx, mtr);
1379 }
1380 
1381 /****************************************************************/
1385 UNIV_INTERN
1386 void
1388 /*======================*/
1389  trx_t* trx)
1390 {
1391  ut_ad(trx->is_recovered);
1392 
1393  if (trx->insert_undo != NULL) {
1394 
1396  }
1397 
1398  trx->rseg = NULL;
1399  trx->undo_no = 0;
1401 
1402  mutex_enter(&trx_sys->mutex);
1403 
1404  ut_a(!trx->read_only);
1405 
1406  UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1407 
1408  assert_trx_in_rw_list(trx);
1409  ut_d(trx->in_rw_trx_list = FALSE);
1410 
1411  mutex_exit(&trx_sys->mutex);
1412 
1413  /* Change the transaction state without mutex protection, now
1414  that it no longer is in the trx_list. Recovered transactions
1415  are never placed in the mysql_trx_list. */
1416  ut_ad(trx->is_recovered);
1417  ut_ad(!trx->in_ro_trx_list);
1418  ut_ad(!trx->in_rw_trx_list);
1419  ut_ad(!trx->in_mysql_trx_list);
1420  trx->state = TRX_STATE_NOT_STARTED;
1421 }
1422 
1423 /********************************************************************/
1428 UNIV_INTERN
1429 read_view_t*
1431 /*=================*/
1432  trx_t* trx)
1433 {
1434  ut_ad(trx->state == TRX_STATE_ACTIVE);
1435 
1436  if (trx->read_view != NULL) {
1437  return(trx->read_view);
1438  }
1439 
1440  if (!trx->read_view) {
1441 
1443  trx->id, trx->global_read_view_heap);
1444 
1445  trx->global_read_view = trx->read_view;
1446  }
1447 
1448  return(trx->read_view);
1449 }
1450 
1451 /****************************************************************/
1453 UNIV_INTERN
1454 void
1456 /*===========================*/
1457  trx_t* trx)
1458 {
1459  /* We are reading trx->state without holding trx_sys->mutex
1460  here, because the commit or rollback should be invoked for a
1461  running (or recovered prepared) transaction that is associated
1462  with the current thread. */
1463 
1464  switch (trx->state) {
1465  case TRX_STATE_NOT_STARTED:
1466  trx_start_low(trx);
1467  /* fall through */
1468  case TRX_STATE_ACTIVE:
1469  case TRX_STATE_PREPARED:
1470  /* If the trx is in a lock wait state, moves the waiting
1471  query thread to the suspended state */
1472 
1473  if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1474 
1475  ut_a(trx->lock.wait_thr != NULL);
1476  trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
1477  trx->lock.wait_thr = NULL;
1478 
1480  }
1481 
1482  ut_a(trx->lock.n_active_thrs == 1);
1483  return;
1484  case TRX_STATE_COMMITTED_IN_MEMORY:
1485  break;
1486  }
1487 
1488  ut_error;
1489 }
1490 
1491 /*********************************************************************/
1494 UNIV_INTERN
1497 /*===================*/
1498  mem_heap_t* heap)
1499 {
1500  commit_node_t* node;
1501 
1502  node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
1503  node->common.type = QUE_NODE_COMMIT;
1504  node->state = COMMIT_NODE_SEND;
1505 
1506  return(node);
1507 }
1508 
1509 /***********************************************************/
1512 UNIV_INTERN
1513 que_thr_t*
1515 /*============*/
1516  que_thr_t* thr)
1517 {
1518  commit_node_t* node;
1519 
1520  node = static_cast<commit_node_t*>(thr->run_node);
1521 
1522  ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1523 
1524  if (thr->prev_node == que_node_get_parent(node)) {
1525  node->state = COMMIT_NODE_SEND;
1526  }
1527 
1528  if (node->state == COMMIT_NODE_SEND) {
1529  trx_t* trx;
1530 
1531  node->state = COMMIT_NODE_WAIT;
1532 
1533  trx = thr_get_trx(thr);
1534 
1535  ut_a(trx->lock.wait_thr == NULL);
1537 
1539 
1541 
1542  trx_commit(trx);
1543 
1544  ut_ad(trx->lock.wait_thr == NULL);
1545 
1547 
1548  thr = NULL;
1549  } else {
1550  ut_ad(node->state == COMMIT_NODE_WAIT);
1551 
1552  node->state = COMMIT_NODE_SEND;
1553 
1554  thr->run_node = que_node_get_parent(node);
1555  }
1556 
1557  return(thr);
1558 }
1559 
1560 /**********************************************************************/
1563 UNIV_INTERN
1564 dberr_t
1566 /*=================*/
1567  trx_t* trx)
1568 {
1569  /* Because we do not do the commit by sending an Innobase
1570  sig to the transaction, we must here make sure that trx has been
1571  started. */
1572 
1573  ut_a(trx);
1574 
1575  switch (trx->state) {
1576  case TRX_STATE_NOT_STARTED:
1577  /* Update the info whether we should skip XA steps that eat
1578  CPU time.
1579 
1580  For the duration of the transaction trx->support_xa is
1581  not reread from thd so any changes in the value take
1582  effect in the next transaction. This is to avoid a
1583  scenario where some undo log records generated by a
1584  transaction contain XA information and other undo log
1585  records, generated by the same transaction do not. */
1586  trx->support_xa = thd_supports_xa(trx->mysql_thd);
1587 
1588  ut_d(trx->start_file = __FILE__);
1589  ut_d(trx->start_line = __LINE__);
1590 
1591  trx_start_low(trx);
1592  /* fall through */
1593  case TRX_STATE_ACTIVE:
1594  case TRX_STATE_PREPARED:
1595  trx->op_info = "committing";
1596  trx_commit(trx);
1597  MONITOR_DEC(MONITOR_TRX_ACTIVE);
1598  trx->op_info = "";
1599  return(DB_SUCCESS);
1600  case TRX_STATE_COMMITTED_IN_MEMORY:
1601  break;
1602  }
1603  ut_error;
1604  return(DB_CORRUPTION);
1605 }
1606 
1607 /**********************************************************************/
1610 UNIV_INTERN
1611 void
1613 /*==========================*/
1614  trx_t* trx)
1615 {
1616  ut_a(trx);
1617 
1618  if (!trx->must_flush_log_later
1620  == HA_IGNORE_DURABILITY) {
1621  return;
1622  }
1623 
1624  trx_flush_log_if_needed(trx->commit_lsn, trx);
1625 
1626  trx->must_flush_log_later = FALSE;
1627 }
1628 
1629 /**********************************************************************/
1631 UNIV_INTERN
1632 void
1634 /*==================*/
1635  trx_t* trx)
1636 {
1637  ut_a(trx);
1638 
1639  switch (trx->state) {
1640  case TRX_STATE_PREPARED:
1641  case TRX_STATE_COMMITTED_IN_MEMORY:
1642  break;
1643  case TRX_STATE_NOT_STARTED:
1644  trx->undo_no = 0;
1645  /* fall through */
1646  case TRX_STATE_ACTIVE:
1648 
1649  if (trx->fts_trx) {
1651  }
1652 
1653  return;
1654  }
1655 
1656  ut_error;
1657 }
1658 
1659 /**********************************************************************/
1662 UNIV_INTERN
1663 void
1665 /*==========*/
1666  FILE* f,
1668  const trx_t* trx,
1670  ulint max_query_len,
1673  ulint n_rec_locks,
1675  ulint n_trx_locks,
1677  ulint heap_size)
1679 {
1680  ibool newline;
1681  const char* op_info;
1682 
1683  ut_ad(mutex_own(&trx_sys->mutex));
1684 
1685  fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1686 
1687  /* trx->state cannot change from or to NOT_STARTED while we
1688  are holding the trx_sys->mutex. It may change from ACTIVE to
1689  PREPARED or COMMITTED. */
1690  switch (trx->state) {
1691  case TRX_STATE_NOT_STARTED:
1692  fputs(", not started", f);
1693  goto state_ok;
1694  case TRX_STATE_ACTIVE:
1695  fprintf(f, ", ACTIVE %lu sec",
1696  (ulong) difftime(time(NULL), trx->start_time));
1697  goto state_ok;
1698  case TRX_STATE_PREPARED:
1699  fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1700  (ulong) difftime(time(NULL), trx->start_time));
1701  goto state_ok;
1702  case TRX_STATE_COMMITTED_IN_MEMORY:
1703  fputs(", COMMITTED IN MEMORY", f);
1704  goto state_ok;
1705  }
1706  fprintf(f, ", state %lu", (ulong) trx->state);
1707  ut_ad(0);
1708 state_ok:
1709 
1710  /* prevent a race condition */
1711  op_info = trx->op_info;
1712 
1713  if (*op_info) {
1714  putc(' ', f);
1715  fputs(op_info, f);
1716  }
1717 
1718  if (trx->is_recovered) {
1719  fputs(" recovered trx", f);
1720  }
1721 
1722  if (trx->declared_to_be_inside_innodb) {
1723  fprintf(f, ", thread declared inside InnoDB %lu",
1724  (ulong) trx->n_tickets_to_enter_innodb);
1725  }
1726 
1727  putc('\n', f);
1728 
1729  if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1730  fprintf(f, "mysql tables in use %lu, locked %lu\n",
1731  (ulong) trx->n_mysql_tables_in_use,
1732  (ulong) trx->mysql_n_tables_locked);
1733  }
1734 
1735  newline = TRUE;
1736 
1737  /* trx->lock.que_state of an ACTIVE transaction may change
1738  while we are not holding trx->mutex. We perform a dirty read
1739  for performance reasons. */
1740 
1741  switch (trx->lock.que_state) {
1742  case TRX_QUE_RUNNING:
1743  newline = FALSE; break;
1744  case TRX_QUE_LOCK_WAIT:
1745  fputs("LOCK WAIT ", f); break;
1746  case TRX_QUE_ROLLING_BACK:
1747  fputs("ROLLING BACK ", f); break;
1748  case TRX_QUE_COMMITTING:
1749  fputs("COMMITTING ", f); break;
1750  default:
1751  fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
1752  }
1753 
1754  if (n_trx_locks > 0 || heap_size > 400) {
1755  newline = TRUE;
1756 
1757  fprintf(f, "%lu lock struct(s), heap size %lu,"
1758  " %lu row lock(s)",
1759  (ulong) n_trx_locks,
1760  (ulong) heap_size,
1761  (ulong) n_rec_locks);
1762  }
1763 
1764  if (trx->has_search_latch) {
1765  newline = TRUE;
1766  fputs(", holds adaptive hash latch", f);
1767  }
1768 
1769  if (trx->undo_no != 0) {
1770  newline = TRUE;
1771  fprintf(f, ", undo log entries "TRX_ID_FMT, trx->undo_no);
1772  }
1773 
1774  if (newline) {
1775  putc('\n', f);
1776  }
1777 
1778  if (trx->mysql_thd != NULL) {
1779  innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
1780  }
1781 }
1782 
1783 /**********************************************************************/
1787 UNIV_INTERN
1788 void
1790 /*==============*/
1791  FILE* f,
1792  const trx_t* trx,
1793  ulint max_query_len)
1795 {
1796  ut_ad(lock_mutex_own());
1797  ut_ad(mutex_own(&trx_sys->mutex));
1798 
1799  trx_print_low(f, trx, max_query_len,
1803 }
1804 
1805 /**********************************************************************/
1808 UNIV_INTERN
1809 void
1810 trx_print(
1811 /*======*/
1812  FILE* f,
1813  const trx_t* trx,
1814  ulint max_query_len)
1816 {
1817  ulint n_rec_locks;
1818  ulint n_trx_locks;
1819  ulint heap_size;
1820 
1821  lock_mutex_enter();
1822  n_rec_locks = lock_number_of_rows_locked(&trx->lock);
1823  n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
1824  heap_size = mem_heap_get_size(trx->lock.lock_heap);
1825  lock_mutex_exit();
1826 
1827  mutex_enter(&trx_sys->mutex);
1828  trx_print_low(f, trx, max_query_len,
1829  n_rec_locks, n_trx_locks, heap_size);
1830  mutex_exit(&trx_sys->mutex);
1831 }
1832 
1833 #ifdef UNIV_DEBUG
1834 /**********************************************************************/
1838 UNIV_INTERN
1839 ibool
1840 trx_assert_started(
1841 /*===============*/
1842  const trx_t* trx)
1843 {
1844  ut_ad(mutex_own(&trx_sys->mutex));
1845 
1846  /* Non-locking autocommits should not hold any locks and this
1847  function is only called from the locking code. */
1848  assert_trx_in_list(trx);
1849 
1850  /* trx->state can change from or to NOT_STARTED while we are holding
1851  trx_sys->mutex for non-locking autocommit selects but not for other
1852  types of transactions. It may change from ACTIVE to PREPARED. Unless
1853  we are holding lock_sys->mutex, it may also change to COMMITTED. */
1854 
1855  switch (trx->state) {
1856  case TRX_STATE_PREPARED:
1857  return(TRUE);
1858 
1859  case TRX_STATE_ACTIVE:
1860  case TRX_STATE_COMMITTED_IN_MEMORY:
1861  return(TRUE);
1862 
1863  case TRX_STATE_NOT_STARTED:
1864  break;
1865  }
1866 
1867  ut_error;
1868  return(FALSE);
1869 }
1870 #endif /* UNIV_DEBUG */
1871 
1872 /*******************************************************************/
1877 UNIV_INTERN
1878 ibool
1880 /*==========*/
1881  const trx_t* a,
1882  const trx_t* b)
1883 {
1884  ibool a_notrans_edit;
1885  ibool b_notrans_edit;
1886 
1887  /* If mysql_thd is NULL for a transaction we assume that it has
1888  not edited non-transactional tables. */
1889 
1890  a_notrans_edit = a->mysql_thd != NULL
1892 
1893  b_notrans_edit = b->mysql_thd != NULL
1895 
1896  if (a_notrans_edit != b_notrans_edit) {
1897 
1898  return(a_notrans_edit);
1899  }
1900 
1901  /* Either both had edited non-transactional tables or both had
1902  not, we fall back to comparing the number of altered/locked
1903  rows. */
1904 
1905 #if 0
1906  fprintf(stderr,
1907  "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
1908  __func__,
1911 #endif
1912 
1913  return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
1914 }
1915 
1916 /****************************************************************/
1918 static
1919 void
1920 trx_prepare(
1921 /*========*/
1922  trx_t* trx)
1923 {
1924  trx_rseg_t* rseg;
1925  lsn_t lsn;
1926  mtr_t mtr;
1927 
1928  rseg = trx->rseg;
1929  /* Only fresh user transactions can be prepared.
1930  Recovered transactions cannot. */
1931  ut_a(!trx->is_recovered);
1932 
1933  if (trx->insert_undo != NULL || trx->update_undo != NULL) {
1934 
1935  mtr_start(&mtr);
1936 
1937  /* Change the undo log segment states from TRX_UNDO_ACTIVE
1938  to TRX_UNDO_PREPARED: these modifications to the file data
1939  structure define the transaction as prepared in the
1940  file-based world, at the serialization point of lsn. */
1941 
1942  mutex_enter(&rseg->mutex);
1943 
1944  if (trx->insert_undo != NULL) {
1945 
1946  /* It is not necessary to obtain trx->undo_mutex here
1947  because only a single OS thread is allowed to do the
1948  transaction prepare for this transaction. */
1949 
1951  &mtr);
1952  }
1953 
1954  if (trx->update_undo) {
1956  trx, trx->update_undo, &mtr);
1957  }
1958 
1959  mutex_exit(&rseg->mutex);
1960 
1961  /*--------------*/
1962  mtr_commit(&mtr); /* This mtr commit makes the
1963  transaction prepared in the file-based
1964  world */
1965  /*--------------*/
1966  lsn = mtr.end_lsn;
1967  ut_ad(lsn);
1968  } else {
1969  lsn = 0;
1970  }
1971 
1972  /*--------------------------------------*/
1973  ut_a(trx->state == TRX_STATE_ACTIVE);
1974  mutex_enter(&trx_sys->mutex);
1975  trx->state = TRX_STATE_PREPARED;
1977  mutex_exit(&trx_sys->mutex);
1978  /*--------------------------------------*/
1979 
1980  if (lsn) {
1981  /* Depending on the my.cnf options, we may now write the log
1982  buffer to the log files, making the prepared state of the
1983  transaction durable if the OS does not crash. We may also
1984  flush the log files to disk, making the prepared state of the
1985  transaction durable also at an OS crash or a power outage.
1986 
1987  The idea in InnoDB's group prepare is that a group of
1988  transactions gather behind a trx doing a physical disk write
1989  to log files, and when that physical write has been completed,
1990  one of those transactions does a write which prepares the whole
1991  group. Note that this group prepare will only bring benefit if
1992  there are > 2 users in the database. Then at least 2 users can
1993  gather behind one doing the physical log write to disk.
1994 
1995  TODO: find out if MySQL holds some mutex when calling this.
1996  That would spoil our group prepare algorithm. */
1997 
1998  trx_flush_log_if_needed(lsn, trx);
1999  }
2000 }
2001 
2002 /**********************************************************************/
2004 UNIV_INTERN
2005 void
2007 /*==================*/
2008  trx_t* trx)
2009 {
2010  trx_start_if_not_started_xa(trx);
2011 
2012  trx->op_info = "preparing";
2013 
2014  trx_prepare(trx);
2015 
2016  trx->op_info = "";
2017 }
2018 
2019 /**********************************************************************/
2023 UNIV_INTERN
2024 int
2026 /*==================*/
2027  XID* xid_list,
2028  ulint len)
2029 {
2030  const trx_t* trx;
2031  ulint count = 0;
2032 
2033  ut_ad(xid_list);
2034  ut_ad(len);
2035 
2036  /* We should set those transactions which are in the prepared state
2037  to the xid_list */
2038 
2039  mutex_enter(&trx_sys->mutex);
2040 
2041  for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2042  trx != NULL;
2043  trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2044 
2045  assert_trx_in_rw_list(trx);
2046 
2047  /* The state of a read-write transaction cannot change
2048  from or to NOT_STARTED while we are holding the
2049  trx_sys->mutex. It may change to PREPARED, but not if
2050  trx->is_recovered. It may also change to COMMITTED. */
2051  if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
2052  xid_list[count] = trx->xid;
2053 
2054  if (count == 0) {
2055  ut_print_timestamp(stderr);
2056  fprintf(stderr,
2057  " InnoDB: Starting recovery for"
2058  " XA transactions...\n");
2059  }
2060 
2061  ut_print_timestamp(stderr);
2062  fprintf(stderr,
2063  " InnoDB: Transaction " TRX_ID_FMT " in"
2064  " prepared state after recovery\n",
2065  trx->id);
2066 
2067  ut_print_timestamp(stderr);
2068  fprintf(stderr,
2069  " InnoDB: Transaction contains changes"
2070  " to "TRX_ID_FMT" rows\n",
2071  trx->undo_no);
2072 
2073  count++;
2074 
2075  if (count == len) {
2076  break;
2077  }
2078  }
2079  }
2080 
2081  mutex_exit(&trx_sys->mutex);
2082 
2083  if (count > 0){
2084  ut_print_timestamp(stderr);
2085  fprintf(stderr,
2086  " InnoDB: %d transactions in prepared state"
2087  " after recovery\n",
2088  int (count));
2089  }
2090 
2091  return(int (count));
2092 }
2093 
2094 /*******************************************************************/
2100 static __attribute__((nonnull, warn_unused_result))
2101 trx_t*
2102 trx_get_trx_by_xid_low(
2103 /*===================*/
2104  const XID* xid)
2106 {
2107  trx_t* trx;
2108 
2109  ut_ad(mutex_own(&trx_sys->mutex));
2110 
2111  for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2112  trx != NULL;
2113  trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2114 
2115  assert_trx_in_rw_list(trx);
2116 
2117  /* Compare two X/Open XA transaction id's: their
2118  length should be the same and binary comparison
2119  of gtrid_length+bqual_length bytes should be
2120  the same */
2121 
2122  if (trx->is_recovered
2123  && trx_state_eq(trx, TRX_STATE_PREPARED)
2124  && xid->gtrid_length == trx->xid.gtrid_length
2125  && xid->bqual_length == trx->xid.bqual_length
2126  && memcmp(xid->data, trx->xid.data,
2127  xid->gtrid_length + xid->bqual_length) == 0) {
2128 
2129  /* Invalidate the XID, so that subsequent calls
2130  will not find it. */
2131  memset(&trx->xid, 0, sizeof(trx->xid));
2132  trx->xid.formatID = -1;
2133  break;
2134  }
2135  }
2136 
2137  return(trx);
2138 }
2139 
2140 /*******************************************************************/
2146 UNIV_INTERN
2147 trx_t*
2149 /*===============*/
2150  const XID* xid)
2151 {
2152  trx_t* trx;
2153 
2154  if (xid == NULL) {
2155 
2156  return(NULL);
2157  }
2158 
2159  mutex_enter(&trx_sys->mutex);
2160 
2161  /* Recovered/Resurrected transactions are always only on the
2162  trx_sys_t::rw_trx_list. */
2163  trx = trx_get_trx_by_xid_low(xid);
2164 
2165  mutex_exit(&trx_sys->mutex);
2166 
2167  return(trx);
2168 }
2169 
2170 /*************************************************************/
2172 UNIV_INTERN
2173 void
2175 /*============================*/
2176  trx_t* trx)
2177 {
2178  switch (trx->state) {
2179  case TRX_STATE_NOT_STARTED:
2180 
2181  /* Update the info whether we should skip XA steps
2182  that eat CPU time.
2183 
2184  For the duration of the transaction trx->support_xa is
2185  not reread from thd so any changes in the value take
2186  effect in the next transaction. This is to avoid a
2187  scenario where some undo generated by a transaction,
2188  has XA stuff, and other undo, generated by the same
2189  transaction, doesn't. */
2190  trx->support_xa = thd_supports_xa(trx->mysql_thd);
2191 
2192  trx_start_low(trx);
2193  /* fall through */
2194  case TRX_STATE_ACTIVE:
2195  return;
2196  case TRX_STATE_PREPARED:
2197  case TRX_STATE_COMMITTED_IN_MEMORY:
2198  break;
2199  }
2200 
2201  ut_error;
2202 }
2203 
2204 /*************************************************************/
2206 UNIV_INTERN
2207 void
2209 /*=========================*/
2210  trx_t* trx)
2211 {
2212  switch (trx->state) {
2213  case TRX_STATE_NOT_STARTED:
2214  trx_start_low(trx);
2215  /* fall through */
2216  case TRX_STATE_ACTIVE:
2217  return;
2218  case TRX_STATE_PREPARED:
2219  case TRX_STATE_COMMITTED_IN_MEMORY:
2220  break;
2221  }
2222 
2223  ut_error;
2224 }
2225 
2226 /*************************************************************/
2228 UNIV_INTERN
2229 void
2231 /*==================*/
2232  trx_t* trx,
2233  trx_dict_op_t op)
2234 {
2235  switch (trx->state) {
2236  case TRX_STATE_NOT_STARTED:
2237  /* Flag this transaction as a dictionary operation, so that
2238  the data dictionary will be locked in crash recovery. */
2239 
2240  trx_set_dict_operation(trx, op);
2241 
2242  /* Ensure it is not flagged as an auto-commit-non-locking
2243  transation. */
2244  trx->will_lock = 1;
2245 
2246  trx->ddl = true;
2247 
2248  trx_start_low(trx);
2249  return;
2250 
2251  case TRX_STATE_ACTIVE:
2252  /* We have this start if not started idiom, therefore we
2253  can't add stronger checks here. */
2254  trx->ddl = true;
2255 
2257  ut_ad(trx->will_lock > 0);
2258  return;
2259  case TRX_STATE_PREPARED:
2260  case TRX_STATE_COMMITTED_IN_MEMORY:
2261  break;
2262  }
2263 
2264  ut_error;
2265 }
2266