MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rpl_slave.cc
Go to the documentation of this file.
1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software Foundation,
14  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
15 
16 
27 #include "sql_priv.h"
28 #include "my_global.h"
29 #include "rpl_slave.h"
30 #include "sql_parse.h" // execute_init_command
31 #include "sql_table.h" // mysql_rm_table
32 #include "rpl_mi.h"
33 #include "rpl_rli.h"
34 #include "rpl_filter.h"
35 #include "rpl_info_factory.h"
36 #include "transaction.h"
37 #include <thr_alarm.h>
38 #include <my_dir.h>
39 #include <sql_common.h>
40 #include <errmsg.h>
41 #include <mysqld_error.h>
42 #include <mysys_err.h>
43 #include "rpl_handler.h"
44 #include "rpl_info_dummy.h"
45 #include <signal.h>
46 #include <mysql.h>
47 #include <myisam.h>
48 
49 #include "sql_base.h" // close_thread_tables
50 #include "tztime.h" // struct Time_zone
51 #include "log_event.h" // Rotate_log_event,
52  // Create_file_log_event,
53  // Format_description_log_event
54 #include "dynamic_ids.h"
55 #include "rpl_rli_pdb.h"
56 #include "global_threads.h"
57 
58 #ifdef HAVE_REPLICATION
59 
60 #include "rpl_tblmap.h"
61 #include "debug_sync.h"
62 
63 using std::min;
64 using std::max;
65 
66 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
67 
68 #define MAX_SLAVE_RETRY_PAUSE 5
69 /*
70  a parameter of sql_slave_killed() to defer the killed status
71 */
72 #define SLAVE_WAIT_GROUP_DONE 60
73 bool use_slave_mask = 0;
74 MY_BITMAP slave_error_mask;
75 char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
76 
77 static unsigned long stop_wait_timeout;
78 char* slave_load_tmpdir = 0;
79 Master_info *active_mi= 0;
80 my_bool replicate_same_server_id;
81 ulonglong relay_log_space_limit = 0;
82 
83 const char *relay_log_index= 0;
84 const char *relay_log_basename= 0;
85 
86 /*
87  MTS load-ballancing parameter.
88  Max length of one MTS Worker queue. The value also determines the size
89  of Relay_log_info::gaq (see @c slave_start_workers()).
90  It can be set to any value in [1, ULONG_MAX - 1] range.
91 */
92 const ulong mts_slave_worker_queue_len_max= 16384;
93 
94 /*
95  Statistics go to the error log every # of seconds when --log-warnings > 1
96 */
97 const long mts_online_stat_period= 60 * 2;
98 
99 
100 /*
101  MTS load-ballancing parameter.
102  Time unit in microsecs to sleep by MTS Coordinator to avoid extra thread
103  signalling in the case of Worker queues are close to be filled up.
104 */
105 const ulong mts_coordinator_basic_nap= 5;
106 
107 /*
108  MTS load-ballancing parameter.
109  Percent of Worker queue size at which Worker is considered to become
110  hungry.
111 
112  C enqueues --+ . underrun level
113  V "
114  +----------+-+------------------+--------------+
115  | empty |.|::::::::::::::::::|xxxxxxxxxxxxxx| ---> Worker dequeues
116  +----------+-+------------------+--------------+
117 
118  Like in the above diagram enqueuing to the x-d area would indicate
119  actual underrruning by Worker.
120 */
121 const ulong mts_worker_underrun_level= 10;
122 
123 Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret);
124 bool append_item_to_jobs(slave_job_item *job_item,
125  Slave_worker *w, Relay_log_info *rli);
126 
127 /*
128  When slave thread exits, we need to remember the temporary tables so we
129  can re-use them on slave start.
130 
131  TODO: move the vars below under Master_info
132 */
133 
134 int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
135 
136 static pthread_key(Master_info*, RPL_MASTER_INFO);
137 
138 enum enum_slave_reconnect_actions
139 {
140  SLAVE_RECON_ACT_REG= 0,
141  SLAVE_RECON_ACT_DUMP= 1,
142  SLAVE_RECON_ACT_EVENT= 2,
143  SLAVE_RECON_ACT_MAX
144 };
145 
146 enum enum_slave_reconnect_messages
147 {
148  SLAVE_RECON_MSG_WAIT= 0,
149  SLAVE_RECON_MSG_KILLED_WAITING= 1,
150  SLAVE_RECON_MSG_AFTER= 2,
151  SLAVE_RECON_MSG_FAILED= 3,
152  SLAVE_RECON_MSG_COMMAND= 4,
153  SLAVE_RECON_MSG_KILLED_AFTER= 5,
154  SLAVE_RECON_MSG_MAX
155 };
156 
157 static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
158 {
159  {
160  "Waiting to reconnect after a failed registration on master",
161  "Slave I/O thread killed while waitnig to reconnect after a failed \
162 registration on master",
163  "Reconnecting after a failed registration on master",
164  "failed registering on master, reconnecting to try again, \
165 log '%s' at position %s",
166  "COM_REGISTER_SLAVE",
167  "Slave I/O thread killed during or after reconnect"
168  },
169  {
170  "Waiting to reconnect after a failed binlog dump request",
171  "Slave I/O thread killed while retrying master dump",
172  "Reconnecting after a failed binlog dump request",
173  "failed dump request, reconnecting to try again, log '%s' at position %s",
174  "COM_BINLOG_DUMP",
175  "Slave I/O thread killed during or after reconnect"
176  },
177  {
178  "Waiting to reconnect after a failed master event read",
179  "Slave I/O thread killed while waiting to reconnect after a failed read",
180  "Reconnecting after a failed master event read",
181  "Slave I/O thread: Failed reading log event, reconnecting to retry, \
182 log '%s' at position %s",
183  "",
184  "Slave I/O thread killed during or after a reconnect done to recover from \
185 failed read"
186  }
187 };
188 
189 enum enum_slave_apply_event_and_update_pos_retval
190 {
191  SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK= 0,
192  SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR= 1,
193  SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR= 2,
194  SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR= 3,
195  SLAVE_APPLY_EVENT_AND_UPDATE_POS_MAX
196 };
197 
198 
199 static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
200 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
201 static bool wait_for_relay_log_space(Relay_log_info* rli);
202 static inline bool io_slave_killed(THD* thd,Master_info* mi);
203 static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
204 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
205 static void print_slave_skip_errors(void);
206 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
207 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
208  bool suppress_warnings);
209 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
210  bool reconnect, bool suppress_warnings);
211 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
212 static int get_master_uuid(MYSQL *mysql, Master_info *mi);
213 int io_thread_init_commands(MYSQL *mysql, Master_info *mi);
214 static Log_event* next_event(Relay_log_info* rli);
215 static int queue_event(Master_info* mi,const char* buf,ulong event_len);
216 static void set_stop_slave_wait_timeout(unsigned long wait_timeout);
217 static int terminate_slave_thread(THD *thd,
218  mysql_mutex_t *term_lock,
219  mysql_cond_t *term_cond,
220  volatile uint *slave_running,
221  bool need_lock_term);
222 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info);
223 int slave_worker_exec_job(Slave_worker * w, Relay_log_info *rli);
224 static int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2);
225 /*
226  Function to set the slave's max_allowed_packet based on the value
227  of slave_max_allowed_packet.
228 
229  @in_param thd Thread handler for slave
230  @in_param mysql MySQL connection handle
231 */
232 
233 static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql)
234 {
235  DBUG_ENTER("set_slave_max_allowed_packet");
236  // thd and mysql must be valid
237  DBUG_ASSERT(thd && mysql);
238 
239  thd->variables.max_allowed_packet= slave_max_allowed_packet;
240  thd->net.max_packet_size= slave_max_allowed_packet;
241  /*
242  Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
243  thread and the mysql->option max_allowed_packet, since a
244  replication event can become this much larger than
245  the corresponding packet (query) sent from client to master.
246  */
247  thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
248  /*
249  Skipping the setting of mysql->net.max_packet size to slave
250  max_allowed_packet since this is done during mysql_real_connect.
251  */
252  mysql->options.max_allowed_packet=
253  slave_max_allowed_packet+MAX_LOG_EVENT_HEADER;
254  DBUG_VOID_RETURN;
255 }
256 
257 /*
258  Find out which replications threads are running
259 
260  SYNOPSIS
261  init_thread_mask()
262  mask Return value here
263  mi master_info for slave
264  inverse If set, returns which threads are not running
265 
266  IMPLEMENTATION
267  Get a bit mask for which threads are running so that we can later restart
268  these threads.
269 
270  RETURN
271  mask If inverse == 0, running threads
272  If inverse == 1, stopped threads
273 */
274 
275 void init_thread_mask(int* mask, Master_info* mi, bool inverse)
276 {
277  bool set_io = mi->slave_running, set_sql = mi->rli->slave_running;
278  register int tmp_mask=0;
279  DBUG_ENTER("init_thread_mask");
280 
281  if (set_io)
282  tmp_mask |= SLAVE_IO;
283  if (set_sql)
284  tmp_mask |= SLAVE_SQL;
285  if (inverse)
286  tmp_mask^= (SLAVE_IO | SLAVE_SQL);
287  *mask = tmp_mask;
288  DBUG_VOID_RETURN;
289 }
290 
291 
292 /*
293  lock_slave_threads()
294 */
295 
296 void lock_slave_threads(Master_info* mi)
297 {
298  DBUG_ENTER("lock_slave_threads");
299 
300  //TODO: see if we can do this without dual mutex
301  mysql_mutex_lock(&mi->run_lock);
302  mysql_mutex_lock(&mi->rli->run_lock);
303  DBUG_VOID_RETURN;
304 }
305 
306 
307 /*
308  unlock_slave_threads()
309 */
310 
311 void unlock_slave_threads(Master_info* mi)
312 {
313  DBUG_ENTER("unlock_slave_threads");
314 
315  //TODO: see if we can do this without dual mutex
316  mysql_mutex_unlock(&mi->rli->run_lock);
317  mysql_mutex_unlock(&mi->run_lock);
318  DBUG_VOID_RETURN;
319 }
320 
321 #ifdef HAVE_PSI_INTERFACE
322 static PSI_thread_key key_thread_slave_io, key_thread_slave_sql, key_thread_slave_worker;
323 
324 static PSI_thread_info all_slave_threads[]=
325 {
326  { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL},
327  { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL},
328  { &key_thread_slave_worker, "slave_worker", PSI_FLAG_GLOBAL}
329 };
330 
331 static void init_slave_psi_keys(void)
332 {
333  const char* category= "sql";
334  int count;
335 
336  count= array_elements(all_slave_threads);
337  mysql_thread_register(category, all_slave_threads, count);
338 }
339 #endif /* HAVE_PSI_INTERFACE */
340 
341 /* Initialize slave structures */
342 
343 int init_slave()
344 {
345  DBUG_ENTER("init_slave");
346  int error= 0;
347  int thread_mask= SLAVE_SQL | SLAVE_IO;
348  Relay_log_info* rli= NULL;
349 
350 #ifdef HAVE_PSI_INTERFACE
351  init_slave_psi_keys();
352 #endif
353 
354  /*
355  This is called when mysqld starts. Before client connections are
356  accepted. However bootstrap may conflict with us if it does START SLAVE.
357  So it's safer to take the lock.
358  */
359  mysql_mutex_lock(&LOCK_active_mi);
360 
361  if (pthread_key_create(&RPL_MASTER_INFO, NULL))
362  DBUG_RETURN(1);
363 
364  if ((error= Rpl_info_factory::create_coordinators(opt_mi_repository_id, &active_mi,
365  opt_rli_repository_id, &rli)))
366  {
367  sql_print_error("Failed to create or recover replication info repository.");
368  error= 1;
369  goto err;
370  }
371 
372  /*
373  This is the startup routine and as such we try to
374  configure both the SLAVE_SQL and SLAVE_IO.
375  */
376  if (global_init_info(active_mi, true, thread_mask))
377  {
378  sql_print_error("Failed to initialize the master info structure");
379  error= 1;
380  goto err;
381  }
382 
383  DBUG_PRINT("info", ("init group master %s %lu group relay %s %lu event %s %lu\n",
384  rli->get_group_master_log_name(),
385  (ulong) rli->get_group_master_log_pos(),
386  rli->get_group_relay_log_name(),
387  (ulong) rli->get_group_relay_log_pos(),
388  rli->get_event_relay_log_name(),
389  (ulong) rli->get_event_relay_log_pos()));
390 
391  /* If server id is not set, start_slave_thread() will say it */
392  if (active_mi->host[0] && !opt_skip_slave_start)
393  {
394  /* same as in start_slave() cache the global var values into rli's members */
395  active_mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
396  active_mi->rli->checkpoint_group= opt_mts_checkpoint_group;
397  if (start_slave_threads(true/*need_lock_slave=true*/,
398  false/*wait_for_start=false*/,
399  active_mi,
400  thread_mask))
401  {
402  sql_print_error("Failed to create slave threads");
403  error= 1;
404  goto err;
405  }
406  }
407 
408 err:
409  mysql_mutex_unlock(&LOCK_active_mi);
410  if (error)
411  sql_print_information("Check error log for additional messages. "
412  "You will not be able to start replication until "
413  "the issue is resolved and the server restarted.");
414  DBUG_RETURN(error);
415 }
416 
417 /*
418  Updates the master info based on the information stored in the
419  relay info and ignores relay logs previously retrieved by the IO
420  thread, which thus starts fetching again based on to the
421  master_log_pos and master_log_name. Eventually, the old
422  relay logs will be purged by the normal purge mechanism.
423 
424  In the feature, we should improve this routine in order to avoid throwing
425  away logs that are safely stored in the disk. Note also that this recovery
426  routine relies on the correctness of the relay-log.info and only tolerates
427  coordinate problems in master.info.
428 
429  In this function, there is no need for a mutex as the caller
430  (i.e. init_slave) already has one acquired.
431 
432  Specifically, the following structures are updated:
433 
434  1 - mi->master_log_pos <-- rli->group_master_log_pos
435  2 - mi->master_log_name <-- rli->group_master_log_name
436  3 - It moves the relay log to the new relay log file, by
437  rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
438  rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
439  rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
440  rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
441 
442  If there is an error, it returns (1), otherwise returns (0).
443  */
444 int init_recovery(Master_info* mi, const char** errmsg)
445 {
446  DBUG_ENTER("init_recovery");
447 
448  int error= 0;
449  Relay_log_info *rli= mi->rli;
450  char *group_master_log_name= NULL;
451 
452  if (rli->recovery_parallel_workers)
453  {
454  /*
455  This is not idempotent and a crash after this function and before
456  the recovery is actually done may lead the system to an inconsistent
457  state.
458 
459  This may happen because the gap is not persitent stored anywhere
460  and eventually old relay log files will be removed and further
461  calculations on the gaps will be impossible.
462 
463  We need to improve this. /Alfranio.
464  */
465  error= mts_recovery_groups(rli);
466  if (rli->mts_recovery_group_cnt)
467  {
468  error= 1;
469  sql_print_error("--relay-log-recovery cannot be executed when the slave "
470  "was stopped with an error or killed in MTS mode; "
471  "consider using RESET SLAVE or restart the server "
472  "with --relay-log-recovery = 0 followed by "
473  "START SLAVE UNTIL SQL_AFTER_MTS_GAPS");
474  }
475  }
476 
477  group_master_log_name= const_cast<char *>(rli->get_group_master_log_name());
478  if (!error && group_master_log_name[0])
479  {
480  mi->set_master_log_pos(max<ulonglong>(BIN_LOG_HEADER_SIZE,
481  rli->get_group_master_log_pos()));
482  mi->set_master_log_name(rli->get_group_master_log_name());
483 
484  sql_print_warning("Recovery from master pos %ld and file %s.",
485  (ulong) mi->get_master_log_pos(), mi->get_master_log_name());
486 
487  rli->set_group_relay_log_name(rli->relay_log.get_log_fname());
488  rli->set_event_relay_log_name(rli->relay_log.get_log_fname());
489  rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
490  rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
491  }
492 
493  /*
494  Clear the retrieved GTID set so that events that are written partially
495  will be fetched again.
496  */
497  global_sid_lock->wrlock();
498  (const_cast<Gtid_set *>(rli->get_gtid_set()))->clear();
499  global_sid_lock->unlock();
500  DBUG_RETURN(error);
501 }
502 
503 int global_init_info(Master_info* mi, bool ignore_if_no_info, int thread_mask)
504 {
505  DBUG_ENTER("init_info");
506  DBUG_ASSERT(mi != NULL && mi->rli != NULL);
507  int init_error= 0;
508  enum_return_check check_return= ERROR_CHECKING_REPOSITORY;
509  THD *thd= current_thd;
510 
511  /*
512  We need a mutex while we are changing master info parameters to
513  keep other threads from reading bogus info
514  */
515  mysql_mutex_lock(&mi->data_lock);
516  mysql_mutex_lock(&mi->rli->data_lock);
517 
518  /*
519  When info tables are used and autocommit= 0 we force a new
520  transaction start to avoid table access deadlocks when START SLAVE
521  is executed after RESET SLAVE.
522  */
523  if (thd && thd->in_multi_stmt_transaction_mode() &&
524  (opt_mi_repository_id == INFO_REPOSITORY_TABLE ||
525  opt_rli_repository_id == INFO_REPOSITORY_TABLE))
526  if (trans_begin(thd))
527  {
528  init_error= 1;
529  goto end;
530  }
531 
532  /*
533  This takes care of the startup dependency between the master_info
534  and relay_info. It initializes the master info if the SLAVE_IO
535  thread is being started and the relay log info if either the
536  SLAVE_SQL thread is being started or was not initialized as it is
537  required by the SLAVE_IO thread.
538  */
539  check_return= mi->check_info();
540  if (check_return == ERROR_CHECKING_REPOSITORY)
541  goto end;
542 
543  if (!(ignore_if_no_info && check_return == REPOSITORY_DOES_NOT_EXIST))
544  {
545  if ((thread_mask & SLAVE_IO) != 0 && mi->mi_init_info())
546  init_error= 1;
547  }
548 
549  check_return= mi->rli->check_info();
550  if (check_return == ERROR_CHECKING_REPOSITORY)
551  goto end;
552  if (!(ignore_if_no_info && check_return == REPOSITORY_DOES_NOT_EXIST))
553  {
554  if (((thread_mask & SLAVE_SQL) != 0 || !(mi->rli->inited))
555  && mi->rli->rli_init_info())
556  init_error= 1;
557  }
558 
559 end:
560  /*
561  When info tables are used and autocommit= 0 we force transaction
562  commit to avoid table access deadlocks when START SLAVE is executed
563  after RESET SLAVE.
564  */
565  if (thd && thd->in_multi_stmt_transaction_mode() &&
566  (opt_mi_repository_id == INFO_REPOSITORY_TABLE ||
567  opt_rli_repository_id == INFO_REPOSITORY_TABLE))
568  if (trans_commit(thd))
569  init_error= 1;
570 
571  mysql_mutex_unlock(&mi->rli->data_lock);
572  mysql_mutex_unlock(&mi->data_lock);
573  DBUG_RETURN(check_return == ERROR_CHECKING_REPOSITORY || init_error);
574 }
575 
576 void end_info(Master_info* mi)
577 {
578  DBUG_ENTER("end_info");
579  DBUG_ASSERT(mi != NULL && mi->rli != NULL);
580 
581  /*
582  The previous implementation was not acquiring locks. We do the same here.
583  However, this is quite strange.
584  */
585  mi->end_info();
586  mi->rli->end_info();
587 
588  DBUG_VOID_RETURN;
589 }
590 
591 int remove_info(Master_info* mi)
592 {
593  int error= 1;
594  DBUG_ENTER("remove_info");
595  DBUG_ASSERT(mi != NULL && mi->rli != NULL);
596 
597  /*
598  The previous implementation was not acquiring locks.
599  We do the same here. However, this is quite strange.
600  */
601  /*
602  Reset errors (the idea is that we forget about the
603  old master).
604  */
605  mi->clear_error();
606  mi->rli->clear_error();
607  mi->rli->clear_until_condition();
608  mi->rli->clear_sql_delay();
609 
610  mi->end_info();
611  mi->rli->end_info();
612 
613  if (mi->remove_info() || Rpl_info_factory::reset_workers(mi->rli) ||
614  mi->rli->remove_info())
615  goto err;
616 
617  error= 0;
618 
619 err:
620  DBUG_RETURN(error);
621 }
622 
623 int flush_master_info(Master_info* mi, bool force)
624 {
625  DBUG_ENTER("flush_master_info");
626  DBUG_ASSERT(mi != NULL && mi->rli != NULL);
627  /*
628  The previous implementation was not acquiring locks.
629  We do the same here. However, this is quite strange.
630  */
631  /*
632  With the appropriate recovery process, we will not need to flush
633  the content of the current log.
634 
635  For now, we flush the relay log BEFORE the master.info file, because
636  if we crash, we will get a duplicate event in the relay log at restart.
637  If we change the order, there might be missing events.
638 
639  If we don't do this and the slave server dies when the relay log has
640  some parts (its last kilobytes) in memory only, with, say, from master's
641  position 100 to 150 in memory only (not on disk), and with position 150
642  in master.info, there will be missing information. When the slave restarts,
643  the I/O thread will fetch binlogs from 150, so in the relay log we will
644  have "[0, 100] U [150, infinity[" and nobody will notice it, so the SQL
645  thread will jump from 100 to 150, and replication will silently break.
646  */
647  mysql_mutex_t *log_lock= mi->rli->relay_log.get_log_lock();
648 
649  mysql_mutex_lock(log_lock);
650 
651  int err= (mi->rli->flush_current_log() ||
652  mi->flush_info(force));
653 
654  mysql_mutex_unlock(log_lock);
655 
656  DBUG_RETURN (err);
657 }
658 
663 static void print_slave_skip_errors(void)
664 {
665  /*
666  To be safe, we want 10 characters of room in the buffer for a number
667  plus terminators. Also, we need some space for constant strings.
668  10 characters must be sufficient for a number plus {',' | '...'}
669  plus a NUL terminator. That is a max 6 digit number.
670  */
671  const size_t MIN_ROOM= 10;
672  DBUG_ENTER("print_slave_skip_errors");
673  DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM);
674  DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits
675 
676  if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask))
677  {
678  /* purecov: begin tested */
679  memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF"));
680  /* purecov: end */
681  }
682  else if (bitmap_is_set_all(&slave_error_mask))
683  {
684  /* purecov: begin tested */
685  memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL"));
686  /* purecov: end */
687  }
688  else
689  {
690  char *buff= slave_skip_error_names;
691  char *bend= buff + sizeof(slave_skip_error_names);
692  int errnum;
693 
694  for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++)
695  {
696  if (bitmap_is_set(&slave_error_mask, errnum))
697  {
698  if (buff + MIN_ROOM >= bend)
699  break; /* purecov: tested */
700  buff= int10_to_str(errnum, buff, 10);
701  *buff++= ',';
702  }
703  }
704  if (buff != slave_skip_error_names)
705  buff--; // Remove last ','
706  if (errnum < MAX_SLAVE_ERROR)
707  {
708  /* Couldn't show all errors */
709  buff= strmov(buff, "..."); /* purecov: tested */
710  }
711  *buff=0;
712  }
713  DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names));
714  DBUG_VOID_RETURN;
715 }
716 
717 static void set_stop_slave_wait_timeout(unsigned long wait_timeout) {
718  stop_wait_timeout = wait_timeout;
719 }
720 
726 void set_slave_skip_errors(char** slave_skip_errors_ptr)
727 {
728  DBUG_ENTER("set_slave_skip_errors");
729  print_slave_skip_errors();
730  *slave_skip_errors_ptr= slave_skip_error_names;
731  DBUG_VOID_RETURN;
732 }
733 
737 static void init_slave_skip_errors()
738 {
739  DBUG_ENTER("init_slave_skip_errors");
740  DBUG_ASSERT(!use_slave_mask); // not already initialized
741 
742  if (bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0))
743  {
744  fprintf(stderr, "Badly out of memory, please check your system status\n");
745  exit(1);
746  }
747  use_slave_mask = 1;
748  DBUG_VOID_RETURN;
749 }
750 
751 static void add_slave_skip_errors(const uint* errors, uint n_errors)
752 {
753  DBUG_ENTER("add_slave_skip_errors");
754  DBUG_ASSERT(errors);
755  DBUG_ASSERT(use_slave_mask);
756 
757  for (uint i = 0; i < n_errors; i++)
758  {
759  const uint err_code = errors[i];
760  if (err_code < MAX_SLAVE_ERROR)
761  bitmap_set_bit(&slave_error_mask, err_code);
762  }
763  DBUG_VOID_RETURN;
764 }
765 
766 /*
767  Add errors that should be skipped for slave
768 
769  SYNOPSIS
770  add_slave_skip_errors()
771  arg List of errors numbers to be added to skip, separated with ','
772 
773  NOTES
774  Called from get_options() in mysqld.cc on start-up
775 */
776 
777 void add_slave_skip_errors(const char* arg)
778 {
779  const char *p= NULL;
780  /*
781  ALL is only valid when nothing else is provided.
782  */
783  const uchar SKIP_ALL[]= "all";
784  size_t SIZE_SKIP_ALL= strlen((const char *) SKIP_ALL) + 1;
785  /*
786  IGNORE_DDL_ERRORS can be combined with other parameters
787  but must be the first one provided.
788  */
789  const uchar SKIP_DDL_ERRORS[]= "ddl_exist_errors";
790  size_t SIZE_SKIP_DDL_ERRORS= strlen((const char *) SKIP_DDL_ERRORS);
791  DBUG_ENTER("add_slave_skip_errors");
792 
793  // initialize mask if not done yet
794  if (!use_slave_mask)
795  init_slave_skip_errors();
796 
797  for (; my_isspace(system_charset_info,*arg); ++arg)
798  /* empty */;
799  if (!my_strnncoll(system_charset_info, (uchar*)arg, SIZE_SKIP_ALL,
800  SKIP_ALL, SIZE_SKIP_ALL))
801  {
802  bitmap_set_all(&slave_error_mask);
803  DBUG_VOID_RETURN;
804  }
805  if (!my_strnncoll(system_charset_info, (uchar*)arg, SIZE_SKIP_DDL_ERRORS,
806  SKIP_DDL_ERRORS, SIZE_SKIP_DDL_ERRORS))
807  {
808  // DDL errors to be skipped for relaxed 'exist' handling
809  const uint ddl_errors[] = {
810  // error codes with create/add <schema object>
811  ER_DB_CREATE_EXISTS, ER_TABLE_EXISTS_ERROR, ER_DUP_KEYNAME,
812  ER_MULTIPLE_PRI_KEY,
813  // error codes with change/rename <schema object>
814  ER_BAD_FIELD_ERROR, ER_NO_SUCH_TABLE, ER_DUP_FIELDNAME,
815  // error codes with drop <schema object>
816  ER_DB_DROP_EXISTS, ER_BAD_TABLE_ERROR, ER_CANT_DROP_FIELD_OR_KEY
817  };
818 
819  add_slave_skip_errors(ddl_errors,
820  sizeof(ddl_errors)/sizeof(ddl_errors[0]));
821  /*
822  After processing the SKIP_DDL_ERRORS, the pointer is
823  increased to the position after the comma.
824  */
825  if (strlen(arg) > SIZE_SKIP_DDL_ERRORS + 1)
826  arg+= SIZE_SKIP_DDL_ERRORS + 1;
827  }
828  for (p= arg ; *p; )
829  {
830  long err_code;
831  if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
832  break;
833  if (err_code < MAX_SLAVE_ERROR)
834  bitmap_set_bit(&slave_error_mask,(uint)err_code);
835  while (!my_isdigit(system_charset_info,*p) && *p)
836  p++;
837  }
838  DBUG_VOID_RETURN;
839 }
840 
841 static void set_thd_in_use_temporary_tables(Relay_log_info *rli)
842 {
843  TABLE *table;
844 
845  for (table= rli->save_temporary_tables ; table ; table= table->next)
846  {
847  table->in_use= rli->info_thd;
848  if (table->file != NULL)
849  {
850  /*
851  Since we are stealing opened temporary tables from one thread to another,
852  we need to let the performance schema know that,
853  for aggregates per thread to work properly.
854  */
855  table->file->unbind_psi();
856  table->file->rebind_psi();
857  }
858  }
859 }
860 
861 int terminate_slave_threads(Master_info* mi,int thread_mask,bool need_lock_term)
862 {
863  DBUG_ENTER("terminate_slave_threads");
864 
865  if (!mi->inited)
866  DBUG_RETURN(0); /* successfully do nothing */
867  int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
868  mysql_mutex_t *sql_lock = &mi->rli->run_lock, *io_lock = &mi->run_lock;
869  mysql_mutex_t *log_lock= mi->rli->relay_log.get_log_lock();
870  set_stop_slave_wait_timeout(rpl_stop_slave_timeout);
871 
872  if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
873  {
874  DBUG_PRINT("info",("Terminating SQL thread"));
875  mi->rli->abort_slave= 1;
876  if ((error=terminate_slave_thread(mi->rli->info_thd, sql_lock,
877  &mi->rli->stop_cond,
878  &mi->rli->slave_running,
879  need_lock_term)) &&
880  !force_all)
881  {
882  if (error == 1)
883  {
884  DBUG_RETURN(ER_STOP_SLAVE_SQL_THREAD_TIMEOUT);
885  }
886  DBUG_RETURN(error);
887  }
888  mysql_mutex_lock(log_lock);
889 
890  DBUG_PRINT("info",("Flushing relay-log info file."));
891  if (current_thd)
892  THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
893 
894  /*
895  Flushes the relay log info regardles of the sync_relay_log_info option.
896  */
897  if (mi->rli->flush_info(TRUE))
898  {
899  mysql_mutex_unlock(log_lock);
900  DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
901  }
902 
903  mysql_mutex_unlock(log_lock);
904  }
905  if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
906  {
907  DBUG_PRINT("info",("Terminating IO thread"));
908  mi->abort_slave=1;
909  if ((error=terminate_slave_thread(mi->info_thd,io_lock,
910  &mi->stop_cond,
911  &mi->slave_running,
912  need_lock_term)) &&
913  !force_all)
914  {
915  if (error == 1)
916  {
917  DBUG_RETURN(ER_STOP_SLAVE_IO_THREAD_TIMEOUT);
918  }
919  DBUG_RETURN(error);
920  }
921  mysql_mutex_lock(log_lock);
922 
923  DBUG_PRINT("info",("Flushing relay log and master info repository."));
924  if (current_thd)
925  THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository);
926 
927  /*
928  Flushes the master info regardles of the sync_master_info option.
929  */
930  if (mi->flush_info(TRUE))
931  {
932  mysql_mutex_unlock(log_lock);
933  DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
934  }
935 
936  /*
937  Flushes the relay log regardles of the sync_relay_log option.
938  */
939  if (mi->rli->relay_log.is_open() &&
940  mi->rli->relay_log.flush_and_sync(true))
941  {
942  mysql_mutex_unlock(log_lock);
943  DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
944  }
945 
946  mysql_mutex_unlock(log_lock);
947  }
948  DBUG_RETURN(0);
949 }
950 
951 
986 static int
987 terminate_slave_thread(THD *thd,
988  mysql_mutex_t *term_lock,
989  mysql_cond_t *term_cond,
990  volatile uint *slave_running,
991  bool need_lock_term)
992 {
993  DBUG_ENTER("terminate_slave_thread");
994  if (need_lock_term)
995  {
996  mysql_mutex_lock(term_lock);
997  }
998  else
999  {
1000  mysql_mutex_assert_owner(term_lock);
1001  }
1002  if (!*slave_running)
1003  {
1004  if (need_lock_term)
1005  {
1006  /*
1007  if run_lock (term_lock) is acquired locally then either
1008  slave_running status is fine
1009  */
1010  mysql_mutex_unlock(term_lock);
1011  DBUG_RETURN(0);
1012  }
1013  else
1014  {
1015  DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
1016  }
1017  }
1018  DBUG_ASSERT(thd != 0);
1019  THD_CHECK_SENTRY(thd);
1020 
1021  /*
1022  Is is critical to test if the slave is running. Otherwise, we might
1023  be referening freed memory trying to kick it
1024  */
1025 
1026  while (*slave_running) // Should always be true
1027  {
1028  int error;
1029  DBUG_PRINT("loop", ("killing slave thread"));
1030 
1031  mysql_mutex_lock(&thd->LOCK_thd_data);
1032 #ifndef DONT_USE_THR_ALARM
1033  /*
1034  Error codes from pthread_kill are:
1035  EINVAL: invalid signal number (can't happen)
1036  ESRCH: thread already killed (can happen, should be ignored)
1037  */
1038  int err __attribute__((unused))= pthread_kill(thd->real_id, thr_client_alarm);
1039  DBUG_ASSERT(err != EINVAL);
1040 #endif
1041  thd->awake(THD::NOT_KILLED);
1042  mysql_mutex_unlock(&thd->LOCK_thd_data);
1043 
1044  /*
1045  There is a small chance that slave thread might miss the first
1046  alarm. To protect againts it, resend the signal until it reacts
1047  */
1048  struct timespec abstime;
1049  set_timespec(abstime,2);
1050  error= mysql_cond_timedwait(term_cond, term_lock, &abstime);
1051  if (stop_wait_timeout >= 2)
1052  stop_wait_timeout= stop_wait_timeout - 2;
1053  else if (*slave_running)
1054  {
1055  if (need_lock_term)
1056  mysql_mutex_unlock(term_lock);
1057  DBUG_RETURN (1);
1058  }
1059  DBUG_ASSERT(error == ETIMEDOUT || error == 0);
1060  }
1061 
1062  DBUG_ASSERT(*slave_running == 0);
1063 
1064  if (need_lock_term)
1065  mysql_mutex_unlock(term_lock);
1066  DBUG_RETURN(0);
1067 }
1068 
1069 
1070 int start_slave_thread(
1071 #ifdef HAVE_PSI_INTERFACE
1072  PSI_thread_key thread_key,
1073 #endif
1074  pthread_handler h_func, mysql_mutex_t *start_lock,
1075  mysql_mutex_t *cond_lock,
1076  mysql_cond_t *start_cond,
1077  volatile uint *slave_running,
1078  volatile ulong *slave_run_id,
1079  Master_info* mi)
1080 {
1081  pthread_t th;
1082  ulong start_id;
1083  int error;
1084  DBUG_ENTER("start_slave_thread");
1085 
1086  if (start_lock)
1087  mysql_mutex_lock(start_lock);
1088  if (!server_id)
1089  {
1090  if (start_cond)
1091  mysql_cond_broadcast(start_cond);
1092  if (start_lock)
1093  mysql_mutex_unlock(start_lock);
1094  sql_print_error("Server id not set, will not start slave");
1095  DBUG_RETURN(ER_BAD_SLAVE);
1096  }
1097 
1098  if (*slave_running)
1099  {
1100  if (start_cond)
1101  mysql_cond_broadcast(start_cond);
1102  if (start_lock)
1103  mysql_mutex_unlock(start_lock);
1104  DBUG_RETURN(ER_SLAVE_MUST_STOP);
1105  }
1106  start_id= *slave_run_id;
1107  DBUG_PRINT("info",("Creating new slave thread"));
1108  if ((error= mysql_thread_create(thread_key,
1109  &th, &connection_attrib, h_func, (void*)mi)))
1110  {
1111  sql_print_error("Can't create slave thread (errno= %d).", error);
1112  if (start_lock)
1113  mysql_mutex_unlock(start_lock);
1114  DBUG_RETURN(ER_SLAVE_THREAD);
1115  }
1116  if (start_cond && cond_lock) // caller has cond_lock
1117  {
1118  THD* thd = current_thd;
1119  while (start_id == *slave_run_id && thd != NULL)
1120  {
1121  DBUG_PRINT("sleep",("Waiting for slave thread to start"));
1122  PSI_stage_info saved_stage= {0, "", 0};
1123  thd->ENTER_COND(start_cond, cond_lock,
1124  & stage_waiting_for_slave_thread_to_start,
1125  & saved_stage);
1126  /*
1127  It is not sufficient to test this at loop bottom. We must test
1128  it after registering the mutex in enter_cond(). If the kill
1129  happens after testing of thd->killed and before the mutex is
1130  registered, we could otherwise go waiting though thd->killed is
1131  set.
1132  */
1133  if (!thd->killed)
1134  mysql_cond_wait(start_cond, cond_lock);
1135  thd->EXIT_COND(& saved_stage);
1136  mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
1137  if (thd->killed)
1138  {
1139  if (start_lock)
1140  mysql_mutex_unlock(start_lock);
1141  DBUG_RETURN(thd->killed_errno());
1142  }
1143  }
1144  }
1145  if (start_lock)
1146  mysql_mutex_unlock(start_lock);
1147  DBUG_RETURN(0);
1148 }
1149 
1150 
1151 /*
1152  start_slave_threads()
1153 
1154  NOTES
1155  SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
1156  sense to do that for starting a slave--we always care if it actually
1157  started the threads that were not previously running
1158 */
1159 
1160 int start_slave_threads(bool need_lock_slave, bool wait_for_start,
1161  Master_info* mi, int thread_mask)
1162 {
1163  mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0;
1164  mysql_cond_t* cond_io=0, *cond_sql=0;
1165  int error=0;
1166  DBUG_ENTER("start_slave_threads");
1167  DBUG_EXECUTE_IF("uninitialized_master-info_structure",
1168  mi->inited= FALSE;);
1169 
1170  if (!mi->inited || !mi->rli->inited)
1171  {
1172  error= !mi->inited ? ER_SLAVE_MI_INIT_REPOSITORY :
1173  ER_SLAVE_RLI_INIT_REPOSITORY;
1174  Rpl_info *info= (!mi->inited ? mi : static_cast<Rpl_info *>(mi->rli));
1175  const char* prefix= current_thd ? ER(error) : ER_DEFAULT(error);
1176  info->report(ERROR_LEVEL, error, prefix, NULL);
1177 
1178  DBUG_RETURN(error);
1179  }
1180 
1181  if (need_lock_slave)
1182  {
1183  lock_io = &mi->run_lock;
1184  lock_sql = &mi->rli->run_lock;
1185  }
1186  if (wait_for_start)
1187  {
1188  cond_io = &mi->start_cond;
1189  cond_sql = &mi->rli->start_cond;
1190  lock_cond_io = &mi->run_lock;
1191  lock_cond_sql = &mi->rli->run_lock;
1192  }
1193 
1194  if (thread_mask & SLAVE_IO)
1195  error= start_slave_thread(
1196 #ifdef HAVE_PSI_INTERFACE
1197  key_thread_slave_io,
1198 #endif
1199  handle_slave_io, lock_io, lock_cond_io,
1200  cond_io,
1201  &mi->slave_running, &mi->slave_run_id,
1202  mi);
1203  if (!error && (thread_mask & SLAVE_SQL))
1204  {
1205  /*
1206  MTS-recovery gaps gathering is placed onto common execution path
1207  for either START-SLAVE and --skip-start-slave= 0
1208  */
1209  if (mi->rli->recovery_parallel_workers != 0)
1210  error= mts_recovery_groups(mi->rli);
1211  if (!error)
1212  error= start_slave_thread(
1213 #ifdef HAVE_PSI_INTERFACE
1214  key_thread_slave_sql,
1215 #endif
1216  handle_slave_sql, lock_sql, lock_cond_sql,
1217  cond_sql,
1218  &mi->rli->slave_running, &mi->rli->slave_run_id,
1219  mi);
1220  if (error)
1221  terminate_slave_threads(mi, thread_mask & SLAVE_IO, need_lock_slave);
1222  }
1223  DBUG_RETURN(error);
1224 }
1225 
1226 /*
1227  Release slave threads at time of executing shutdown.
1228 
1229  SYNOPSIS
1230  end_slave()
1231 */
1232 
1233 void end_slave()
1234 {
1235  DBUG_ENTER("end_slave");
1236 
1237  /*
1238  This is called when the server terminates, in close_connections().
1239  It terminates slave threads. However, some CHANGE MASTER etc may still be
1240  running presently. If a START SLAVE was in progress, the mutex lock below
1241  will make us wait until slave threads have started, and START SLAVE
1242  returns, then we terminate them here.
1243  */
1244  mysql_mutex_lock(&LOCK_active_mi);
1245  if (active_mi)
1246  {
1247  /*
1248  TODO: replace the line below with
1249  list_walk(&master_list, (list_walk_action)end_slave_on_walk,0);
1250  once multi-master code is ready.
1251  */
1252  terminate_slave_threads(active_mi,SLAVE_FORCE_ALL);
1253  }
1254  mysql_mutex_unlock(&LOCK_active_mi);
1255  DBUG_VOID_RETURN;
1256 }
1257 
1267 void close_active_mi()
1268 {
1269  mysql_mutex_lock(&LOCK_active_mi);
1270  if (active_mi)
1271  {
1272  end_info(active_mi);
1273  if (active_mi->rli)
1274  delete active_mi->rli;
1275  delete active_mi;
1276  active_mi= 0;
1277  }
1278  mysql_mutex_unlock(&LOCK_active_mi);
1279 }
1280 
1281 static bool io_slave_killed(THD* thd, Master_info* mi)
1282 {
1283  DBUG_ENTER("io_slave_killed");
1284 
1285  DBUG_ASSERT(mi->info_thd == thd);
1286  DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
1287  DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed);
1288 }
1289 
1309 static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
1310 {
1311  bool ret= FALSE;
1312  bool is_parallel_warn= FALSE;
1313 
1314  DBUG_ENTER("sql_slave_killed");
1315 
1316  DBUG_ASSERT(rli->info_thd == thd);
1317  DBUG_ASSERT(rli->slave_running == 1);
1318  if (abort_loop || thd->killed || rli->abort_slave)
1319  {
1320  is_parallel_warn= (rli->is_parallel_exec() &&
1321  (rli->is_mts_in_group() || thd->killed));
1322  /*
1323  Slave can execute stop being in one of two MTS or Single-Threaded mode.
1324  The modes define different criteria to accept the stop.
1325  In particular that relates to the concept of groupping.
1326  Killed Coordinator thread expects the worst so it warns on
1327  possible consistency issue.
1328  */
1329  if (is_parallel_warn ||
1330  (!rli->is_parallel_exec() &&
1331  thd->transaction.all.cannot_safely_rollback() && rli->is_in_group()))
1332  {
1333  char msg_stopped[]=
1334  "... Slave SQL Thread stopped with incomplete event group "
1335  "having non-transactional changes. "
1336  "If the group consists solely of row-based events, you can try "
1337  "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
1338  "ignores duplicate key, key not found, and similar errors (see "
1339  "documentation for details).";
1340  char msg_stopped_mts[]=
1341  "... The slave coordinator and worker threads are stopped, possibly "
1342  "leaving data in inconsistent state. A restart should "
1343  "restore consistency automatically, although using non-transactional "
1344  "storage for data or info tables or DDL queries could lead to problems. "
1345  "In such cases you have to examine your data (see documentation for "
1346  "details).";
1347 
1348  ret= TRUE;
1349  if (rli->abort_slave)
1350  {
1351  DBUG_PRINT("info", ("Request to stop slave SQL Thread received while "
1352  "applying an MTS group or a group that "
1353  "has non-transactional "
1354  "changes; waiting for completion of the group ... "));
1355 
1356  /*
1357  Slave sql thread shutdown in face of unfinished group modified
1358  Non-trans table is handled via a timer. The slave may eventually
1359  give out to complete the current group and in that case there
1360  might be issues at consequent slave restart, see the error message.
1361  WL#2975 offers a robust solution requiring to store the last exectuted
1362  event's coordinates along with the group's coordianates
1363  instead of waiting with @c last_event_start_time the timer.
1364  */
1365 
1366  if (rli->last_event_start_time == 0)
1367  rli->last_event_start_time= my_time(0);
1368  ret= difftime(my_time(0), rli->last_event_start_time) <=
1369  SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE;
1370 
1371  DBUG_EXECUTE_IF("stop_slave_middle_group",
1372  DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
1373  ret= TRUE;);); // time is over
1374 
1375  if (!ret && !rli->reported_unsafe_warning)
1376  {
1377  rli->report(WARNING_LEVEL, 0,
1378  !is_parallel_warn ?
1379  "Request to stop slave SQL Thread received while "
1380  "applying a group that has non-transactional "
1381  "changes; waiting for completion of the group ... "
1382  :
1383  "Coordinator thread of multi-threaded slave is being "
1384  "stopped in the middle of assigning a group of events; "
1385  "deferring to exit until the group completion ... ");
1386  rli->reported_unsafe_warning= true;
1387  }
1388  }
1389  if (ret)
1390  {
1391  if (is_parallel_warn)
1392  rli->report(!rli->is_error() ? ERROR_LEVEL :
1393  WARNING_LEVEL, // an error was reported by Worker
1394  ER_MTS_INCONSISTENT_DATA,
1395  ER(ER_MTS_INCONSISTENT_DATA),
1396  msg_stopped_mts);
1397  else
1398  rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
1399  ER(ER_SLAVE_FATAL_ERROR), msg_stopped);
1400  }
1401  }
1402  else
1403  {
1404  ret= TRUE;
1405  }
1406  }
1407  if (ret)
1408  {
1409  rli->last_event_start_time= 0;
1410  if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
1411  {
1412  rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
1413  }
1414  }
1415 
1416  DBUG_RETURN(ret);
1417 }
1418 
1419 
1420 /*
1421  skip_load_data_infile()
1422 
1423  NOTES
1424  This is used to tell a 3.23 master to break send_file()
1425 */
1426 
1427 void skip_load_data_infile(NET *net)
1428 {
1429  DBUG_ENTER("skip_load_data_infile");
1430 
1431  (void)net_request_file(net, "/dev/null");
1432  (void)my_net_read(net); // discard response
1433  (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
1434  DBUG_VOID_RETURN;
1435 }
1436 
1437 
1438 bool net_request_file(NET* net, const char* fname)
1439 {
1440  DBUG_ENTER("net_request_file");
1441  DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
1442  (uchar*) "", 0));
1443 }
1444 
1445 /*
1446  From other comments and tests in code, it looks like
1447  sometimes Query_log_event and Load_log_event can have db == 0
1448  (see rewrite_db() above for example)
1449  (cases where this happens are unclear; it may be when the master is 3.23).
1450 */
1451 
1452 const char *print_slave_db_safe(const char* db)
1453 {
1454  DBUG_ENTER("*print_slave_db_safe");
1455 
1456  DBUG_RETURN((db ? db : ""));
1457 }
1458 
1459 /*
1460  Check if the error is caused by network.
1461  @param[in] errorno Number of the error.
1462  RETURNS:
1463  TRUE network error
1464  FALSE not network error
1465 */
1466 
1467 bool is_network_error(uint errorno)
1468 {
1469  if (errorno == CR_CONNECTION_ERROR ||
1470  errorno == CR_CONN_HOST_ERROR ||
1471  errorno == CR_SERVER_GONE_ERROR ||
1472  errorno == CR_SERVER_LOST ||
1473  errorno == ER_CON_COUNT_ERROR ||
1474  errorno == ER_SERVER_SHUTDOWN)
1475  return TRUE;
1476 
1477  return FALSE;
1478 }
1479 
1480 
1503 enum enum_command_status
1504 { COMMAND_STATUS_OK, COMMAND_STATUS_ERROR, COMMAND_STATUS_ALLOWED_ERROR };
1505 static enum_command_status
1506 io_thread_init_command(Master_info *mi, const char *query, int allowed_error,
1507  MYSQL_RES **master_res= NULL,
1508  MYSQL_ROW *master_row= NULL)
1509 {
1510  DBUG_ENTER("io_thread_init_command");
1511  DBUG_PRINT("info", ("IO thread initialization command: '%s'", query));
1512  MYSQL *mysql= mi->mysql;
1513  int ret= mysql_real_query(mysql, query, strlen(query));
1514  if (io_slave_killed(mi->info_thd, mi))
1515  {
1516  sql_print_information("The slave IO thread was killed while executing "
1517  "initialization query '%s'", query);
1518  mysql_free_result(mysql_store_result(mysql));
1519  DBUG_RETURN(COMMAND_STATUS_ERROR);
1520  }
1521  if (ret != 0)
1522  {
1523  int err= mysql_errno(mysql);
1524  mysql_free_result(mysql_store_result(mysql));
1525  if (!err || err != allowed_error)
1526  {
1527  mi->report(is_network_error(err) ? WARNING_LEVEL : ERROR_LEVEL, err,
1528  "The slave IO thread stops because the initialization query "
1529  "'%s' failed with error '%s'.",
1530  query, mysql_error(mysql));
1531  DBUG_RETURN(COMMAND_STATUS_ERROR);
1532  }
1533  DBUG_RETURN(COMMAND_STATUS_ALLOWED_ERROR);
1534  }
1535  if (master_res != NULL)
1536  {
1537  if ((*master_res= mysql_store_result(mysql)) == NULL)
1538  {
1539  mi->report(WARNING_LEVEL, mysql_errno(mysql),
1540  "The slave IO thread stops because the initialization query "
1541  "'%s' did not return any result.",
1542  query);
1543  DBUG_RETURN(COMMAND_STATUS_ERROR);
1544  }
1545  if (master_row != NULL)
1546  {
1547  if ((*master_row= mysql_fetch_row(*master_res)) == NULL)
1548  {
1549  mysql_free_result(*master_res);
1550  mi->report(WARNING_LEVEL, mysql_errno(mysql),
1551  "The slave IO thread stops because the initialization query "
1552  "'%s' did not return any row.",
1553  query);
1554  DBUG_RETURN(COMMAND_STATUS_ERROR);
1555  }
1556  }
1557  }
1558  else
1559  DBUG_ASSERT(master_row == NULL);
1560  DBUG_RETURN(COMMAND_STATUS_OK);
1561 }
1562 
1563 
1572 int io_thread_init_commands(MYSQL *mysql, Master_info *mi)
1573 {
1574  char query[256];
1575  int ret= 0;
1576 
1577  sprintf(query, "SET @slave_uuid= '%s'", server_uuid);
1578  if (mysql_real_query(mysql, query, strlen(query))
1579  && !check_io_slave_killed(mi->info_thd, mi, NULL))
1580  goto err;
1581 
1582  mysql_free_result(mysql_store_result(mysql));
1583  return ret;
1584 
1585 err:
1586  if (mysql_errno(mysql) && is_network_error(mysql_errno(mysql)))
1587  {
1588  mi->report(WARNING_LEVEL, mysql_errno(mysql),
1589  "The initialization command '%s' failed with the following"
1590  " error: '%s'.", query, mysql_error(mysql));
1591  ret= 2;
1592  }
1593  else
1594  {
1595  char errmsg[512];
1596  const char *errmsg_fmt=
1597  "The slave I/O thread stops because a fatal error is encountered "
1598  "when it tries to send query to master(query: %s).";
1599 
1600  sprintf(errmsg, errmsg_fmt, query);
1601  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
1602  errmsg);
1603  ret= 1;
1604  }
1605  mysql_free_result(mysql_store_result(mysql));
1606  return ret;
1607 }
1608 
1617 static int get_master_uuid(MYSQL *mysql, Master_info *mi)
1618 {
1619  const char *errmsg;
1620  MYSQL_RES *master_res= NULL;
1621  MYSQL_ROW master_row= NULL;
1622  int ret= 0;
1623 
1624  DBUG_EXECUTE_IF("dbug.before_get_MASTER_UUID",
1625  {
1626  const char act[]= "now wait_for signal.get_master_uuid";
1627  DBUG_ASSERT(opt_debug_sync_timeout > 0);
1628  DBUG_ASSERT(!debug_sync_set_action(current_thd,
1629  STRING_WITH_LEN(act)));
1630  };);
1631 
1632  DBUG_EXECUTE_IF("dbug.simulate_busy_io",
1633  {
1634  const char act[]= "now signal Reached wait_for signal.got_stop_slave";
1635  DBUG_ASSERT(opt_debug_sync_timeout > 0);
1636  DBUG_ASSERT(!debug_sync_set_action(current_thd,
1637  STRING_WITH_LEN(act)));
1638  };);
1639  if (!mysql_real_query(mysql,
1640  STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_UUID'")) &&
1641  (master_res= mysql_store_result(mysql)) &&
1642  (master_row= mysql_fetch_row(master_res)))
1643  {
1644  if (!strcmp(::server_uuid, master_row[1]) &&
1645  !mi->rli->replicate_same_server_id)
1646  {
1647  errmsg= "The slave I/O thread stops because master and slave have equal "
1648  "MySQL server UUIDs; these UUIDs must be different for "
1649  "replication to work.";
1650  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
1651  errmsg);
1652  // Fatal error
1653  ret= 1;
1654  }
1655  else
1656  {
1657  if (mi->master_uuid[0] != 0 && strcmp(mi->master_uuid, master_row[1]))
1658  sql_print_warning("The master's UUID has changed, although this should"
1659  " not happen unless you have changed it manually."
1660  " The old UUID was %s.",
1661  mi->master_uuid);
1662  strncpy(mi->master_uuid, master_row[1], UUID_LENGTH);
1663  mi->master_uuid[UUID_LENGTH]= 0;
1664  }
1665  }
1666  else if (mysql_errno(mysql))
1667  {
1668  if (is_network_error(mysql_errno(mysql)))
1669  {
1670  mi->report(WARNING_LEVEL, mysql_errno(mysql),
1671  "Get master SERVER_UUID failed with error: %s",
1672  mysql_error(mysql));
1673  ret= 2;
1674  }
1675  else
1676  {
1677  /* Fatal error */
1678  errmsg= "The slave I/O thread stops because a fatal error is encountered "
1679  "when it tries to get the value of SERVER_UUID variable from master.";
1680  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
1681  errmsg);
1682  ret= 1;
1683  }
1684  }
1685  else if (!master_row && master_res)
1686  {
1687  mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
1688  "Unknown system variable 'SERVER_UUID' on master. "
1689  "A probable cause is that the variable is not supported on the "
1690  "master (version: %s), even though it is on the slave (version: %s)",
1691  mysql->server_version, server_version);
1692  }
1693 
1694  if (master_res)
1695  mysql_free_result(master_res);
1696  return ret;
1697 }
1698 
1699 /*
1700  Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
1701  relying on the binlog's version. This is not perfect: imagine an upgrade
1702  of the master without waiting that all slaves are in sync with the master;
1703  then a slave could be fooled about the binlog's format. This is what happens
1704  when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
1705  slaves are fooled. So we do this only to distinguish between 3.23 and more
1706  recent masters (it's too late to change things for 3.23).
1707 
1708  RETURNS
1709  0 ok
1710  1 error
1711  2 transient network problem, the caller should try to reconnect
1712 */
1713 
1714 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
1715 {
1716  char err_buff[MAX_SLAVE_ERRMSG];
1717  const char* errmsg= 0;
1718  int err_code= 0;
1719  int version_number=0;
1720  version_number= atoi(mysql->server_version);
1721 
1722  MYSQL_RES *master_res= 0;
1723  MYSQL_ROW master_row;
1724  DBUG_ENTER("get_master_version_and_clock");
1725 
1726  /*
1727  Free old mi_description_event (that is needed if we are in
1728  a reconnection).
1729  */
1730  DBUG_EXECUTE_IF("unrecognized_master_version",
1731  {
1732  version_number= 1;
1733  };);
1734  mysql_mutex_lock(&mi->data_lock);
1735  mi->set_mi_description_event(NULL);
1736 
1737  if (!my_isdigit(&my_charset_bin,*mysql->server_version))
1738  {
1739  errmsg = "Master reported unrecognized MySQL version";
1740  err_code= ER_SLAVE_FATAL_ERROR;
1741  sprintf(err_buff, ER(err_code), errmsg);
1742  }
1743  else
1744  {
1745  /*
1746  Note the following switch will bug when we have MySQL branch 30 ;)
1747  */
1748  switch (version_number)
1749  {
1750  case 0:
1751  case 1:
1752  case 2:
1753  errmsg = "Master reported unrecognized MySQL version";
1754  err_code= ER_SLAVE_FATAL_ERROR;
1755  sprintf(err_buff, ER(err_code), errmsg);
1756  break;
1757  case 3:
1758  mi->set_mi_description_event(new
1759  Format_description_log_event(1, mysql->server_version));
1760  break;
1761  case 4:
1762  mi->set_mi_description_event(new
1763  Format_description_log_event(3, mysql->server_version));
1764  break;
1765  default:
1766  /*
1767  Master is MySQL >=5.0. Give a default Format_desc event, so that we can
1768  take the early steps (like tests for "is this a 3.23 master") which we
1769  have to take before we receive the real master's Format_desc which will
1770  override this one. Note that the Format_desc we create below is garbage
1771  (it has the format of the *slave*); it's only good to help know if the
1772  master is 3.23, 4.0, etc.
1773  */
1774  mi->set_mi_description_event(new
1775  Format_description_log_event(4, mysql->server_version));
1776  break;
1777  }
1778  }
1779 
1780  /*
1781  This does not mean that a 5.0 slave will be able to read a 5.5 master; but
1782  as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
1783  can't read a 5.5 master, this will show up when the slave can't read some
1784  events sent by the master, and there will be error messages.
1785  */
1786 
1787  if (errmsg)
1788  {
1789  /* unlock the mutex on master info structure */
1790  mysql_mutex_unlock(&mi->data_lock);
1791  goto err;
1792  }
1793 
1794  /* as we are here, we tried to allocate the event */
1795  if (mi->get_mi_description_event() == NULL)
1796  {
1797  mysql_mutex_unlock(&mi->data_lock);
1798  errmsg= "default Format_description_log_event";
1799  err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
1800  sprintf(err_buff, ER(err_code), errmsg);
1801  goto err;
1802  }
1803 
1804  /*
1805  FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
1806  It's necessary to adjust FD_q.(A) at this point because in the following
1807  course FD_q is going to be dumped to RL.
1808  Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m)
1809  in queue_event and the master's (A) is installed.
1810  At one step with the assignment the Relay-Log's checksum alg is set to
1811  a new value: RL.(A) := FD_q.(A). If the slave service is stopped
1812  the last time assigned RL.(A) will be passed over to the restarting
1813  service (to the current execution point).
1814  RL.A is a "codec" to verify checksum in queue_event() almost all the time
1815  the first fake Rotate event.
1816  Starting from this point IO thread will executes the following checksum
1817  warmup sequence of actions:
1818 
1819  FD_q.A := RL.A,
1820  A_m^0 := master.@@global.binlog_checksum,
1821  {queue_event(R_f): verifies(R_f, A_m^0)},
1822  {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
1823  FD_q := FD_m, RL.A := FD_q.A)}
1824 
1825  See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
1826  docs lines (binlog.h).
1827  In above A_m^0 - the value of master's
1828  @@binlog_checksum determined in the upcoming handshake (stored in
1829  mi->checksum_alg_before_fd).
1830 
1831 
1832  After the warm-up sequence IO gets to "normal" checksum verification mode
1833  to use RL.A in
1834 
1835  {queue_event(E_m): verifies(E_m, RL.A)}
1836 
1837  until it has received a new FD_m.
1838  */
1839  mi->get_mi_description_event()->checksum_alg=
1840  mi->rli->relay_log.relay_log_checksum_alg;
1841 
1842  DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg !=
1843  BINLOG_CHECKSUM_ALG_UNDEF);
1844  DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
1845  BINLOG_CHECKSUM_ALG_UNDEF);
1846 
1847  mysql_mutex_unlock(&mi->data_lock);
1848 
1849  /*
1850  Compare the master and slave's clock. Do not die if master's clock is
1851  unavailable (very old master not supporting UNIX_TIMESTAMP()?).
1852  */
1853 
1854  DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
1855  {
1856  const char act[]=
1857  "now "
1858  "wait_for signal.get_unix_timestamp";
1859  DBUG_ASSERT(opt_debug_sync_timeout > 0);
1860  DBUG_ASSERT(!debug_sync_set_action(current_thd,
1861  STRING_WITH_LEN(act)));
1862  };);
1863 
1864  master_res= NULL;
1865  if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
1866  (master_res= mysql_store_result(mysql)) &&
1867  (master_row= mysql_fetch_row(master_res)))
1868  {
1869  mysql_mutex_lock(&mi->data_lock);
1870  mi->clock_diff_with_master=
1871  (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
1872  mysql_mutex_unlock(&mi->data_lock);
1873  }
1874  else if (check_io_slave_killed(mi->info_thd, mi, NULL))
1875  goto slave_killed_err;
1876  else if (is_network_error(mysql_errno(mysql)))
1877  {
1878  mi->report(WARNING_LEVEL, mysql_errno(mysql),
1879  "Get master clock failed with error: %s", mysql_error(mysql));
1880  goto network_err;
1881  }
1882  else
1883  {
1884  mysql_mutex_lock(&mi->data_lock);
1885  mi->clock_diff_with_master= 0; /* The "most sensible" value */
1886  mysql_mutex_unlock(&mi->data_lock);
1887  sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
1888  "do not trust column Seconds_Behind_Master of SHOW "
1889  "SLAVE STATUS. Error: %s (%d)",
1890  mysql_error(mysql), mysql_errno(mysql));
1891  }
1892  if (master_res)
1893  {
1894  mysql_free_result(master_res);
1895  master_res= NULL;
1896  }
1897 
1898  /*
1899  Check that the master's server id and ours are different. Because if they
1900  are equal (which can result from a simple copy of master's datadir to slave,
1901  thus copying some my.cnf), replication will work but all events will be
1902  skipped.
1903  Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
1904  master?).
1905  Note: we could have put a @@SERVER_ID in the previous SELECT
1906  UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
1907  */
1908  DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
1909  {
1910  const char act[]=
1911  "now "
1912  "wait_for signal.get_server_id";
1913  DBUG_ASSERT(opt_debug_sync_timeout > 0);
1914  DBUG_ASSERT(!debug_sync_set_action(current_thd,
1915  STRING_WITH_LEN(act)));
1916  };);
1917  master_res= NULL;
1918  master_row= NULL;
1919  if (!mysql_real_query(mysql,
1920  STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
1921  (master_res= mysql_store_result(mysql)) &&
1922  (master_row= mysql_fetch_row(master_res)))
1923  {
1924  if ((::server_id == (mi->master_id= strtoul(master_row[1], 0, 10))) &&
1925  !mi->rli->replicate_same_server_id)
1926  {
1927  errmsg= "The slave I/O thread stops because master and slave have equal \
1928 MySQL server ids; these ids must be different for replication to work (or \
1929 the --replicate-same-server-id option must be used on slave but this does \
1930 not always make sense; please check the manual before using it).";
1931  err_code= ER_SLAVE_FATAL_ERROR;
1932  sprintf(err_buff, ER(err_code), errmsg);
1933  goto err;
1934  }
1935  }
1936  else if (mysql_errno(mysql))
1937  {
1938  if (check_io_slave_killed(mi->info_thd, mi, NULL))
1939  goto slave_killed_err;
1940  else if (is_network_error(mysql_errno(mysql)))
1941  {
1942  mi->report(WARNING_LEVEL, mysql_errno(mysql),
1943  "Get master SERVER_ID failed with error: %s", mysql_error(mysql));
1944  goto network_err;
1945  }
1946  /* Fatal error */
1947  errmsg= "The slave I/O thread stops because a fatal error is encountered \
1948 when it try to get the value of SERVER_ID variable from master.";
1949  err_code= mysql_errno(mysql);
1950  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
1951  goto err;
1952  }
1953  else if (!master_row && master_res)
1954  {
1955  mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
1956  "Unknown system variable 'SERVER_ID' on master, \
1957 maybe it is a *VERY OLD MASTER*.");
1958  }
1959  if (master_res)
1960  {
1961  mysql_free_result(master_res);
1962  master_res= NULL;
1963  }
1964  if (mi->master_id == 0 && mi->ignore_server_ids->dynamic_ids.elements > 0)
1965  {
1966  errmsg= "Slave configured with server id filtering could not detect the master server id.";
1967  err_code= ER_SLAVE_FATAL_ERROR;
1968  sprintf(err_buff, ER(err_code), errmsg);
1969  goto err;
1970  }
1971 
1972  /*
1973  Check that the master's global character_set_server and ours are the same.
1974  Not fatal if query fails (old master?).
1975  Note that we don't check for equality of global character_set_client and
1976  collation_connection (neither do we prevent their setting in
1977  set_var.cc). That's because from what I (Guilhem) have tested, the global
1978  values of these 2 are never used (new connections don't use them).
1979  We don't test equality of global collation_database either as it's is
1980  going to be deprecated (made read-only) in 4.1 very soon.
1981  The test is only relevant if master < 5.0.3 (we'll test only if it's older
1982  than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
1983  charset info in each binlog event.
1984  We don't do it for 3.23 because masters <3.23.50 hang on
1985  SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
1986  test only if master is 4.x.
1987  */
1988 
1989  /* redundant with rest of code but safer against later additions */
1990  if (*mysql->server_version == '3')
1991  goto err;
1992 
1993  if (*mysql->server_version == '4')
1994  {
1995  master_res= NULL;
1996  if (!mysql_real_query(mysql,
1997  STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
1998  (master_res= mysql_store_result(mysql)) &&
1999  (master_row= mysql_fetch_row(master_res)))
2000  {
2001  if (strcmp(master_row[0], global_system_variables.collation_server->name))
2002  {
2003  errmsg= "The slave I/O thread stops because master and slave have \
2004 different values for the COLLATION_SERVER global variable. The values must \
2005 be equal for the Statement-format replication to work";
2006  err_code= ER_SLAVE_FATAL_ERROR;
2007  sprintf(err_buff, ER(err_code), errmsg);
2008  goto err;
2009  }
2010  }
2011  else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2012  goto slave_killed_err;
2013  else if (is_network_error(mysql_errno(mysql)))
2014  {
2015  mi->report(WARNING_LEVEL, mysql_errno(mysql),
2016  "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql));
2017  goto network_err;
2018  }
2019  else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE)
2020  {
2021  /* Fatal error */
2022  errmsg= "The slave I/O thread stops because a fatal error is encountered \
2023 when it try to get the value of COLLATION_SERVER global variable from master.";
2024  err_code= mysql_errno(mysql);
2025  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2026  goto err;
2027  }
2028  else
2029  mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2030  "Unknown system variable 'COLLATION_SERVER' on master, \
2031 maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \
2032 inconsistency if replicated data deals with collation.");
2033 
2034  if (master_res)
2035  {
2036  mysql_free_result(master_res);
2037  master_res= NULL;
2038  }
2039  }
2040 
2041  /*
2042  Perform analogous check for time zone. Theoretically we also should
2043  perform check here to verify that SYSTEM time zones are the same on
2044  slave and master, but we can't rely on value of @@system_time_zone
2045  variable (it is time zone abbreviation) since it determined at start
2046  time and so could differ for slave and master even if they are really
2047  in the same system time zone. So we are omiting this check and just
2048  relying on documentation. Also according to Monty there are many users
2049  who are using replication between servers in various time zones. Hence
2050  such check will broke everything for them. (And now everything will
2051  work for them because by default both their master and slave will have
2052  'SYSTEM' time zone).
2053  This check is only necessary for 4.x masters (and < 5.0.4 masters but
2054  those were alpha).
2055  */
2056  if (*mysql->server_version == '4')
2057  {
2058  master_res= NULL;
2059  if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
2060  (master_res= mysql_store_result(mysql)) &&
2061  (master_row= mysql_fetch_row(master_res)))
2062  {
2063  if (strcmp(master_row[0],
2064  global_system_variables.time_zone->get_name()->ptr()))
2065  {
2066  errmsg= "The slave I/O thread stops because master and slave have \
2067 different values for the TIME_ZONE global variable. The values must \
2068 be equal for the Statement-format replication to work";
2069  err_code= ER_SLAVE_FATAL_ERROR;
2070  sprintf(err_buff, ER(err_code), errmsg);
2071  goto err;
2072  }
2073  }
2074  else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2075  goto slave_killed_err;
2076  else if (is_network_error(mysql_errno(mysql)))
2077  {
2078  mi->report(WARNING_LEVEL, mysql_errno(mysql),
2079  "Get master TIME_ZONE failed with error: %s", mysql_error(mysql));
2080  goto network_err;
2081  }
2082  else
2083  {
2084  /* Fatal error */
2085  errmsg= "The slave I/O thread stops because a fatal error is encountered \
2086 when it try to get the value of TIME_ZONE global variable from master.";
2087  err_code= mysql_errno(mysql);
2088  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2089  goto err;
2090  }
2091  if (master_res)
2092  {
2093  mysql_free_result(master_res);
2094  master_res= NULL;
2095  }
2096  }
2097 
2098  if (mi->heartbeat_period != 0.0)
2099  {
2100  char llbuf[22];
2101  const char query_format[]= "SET @master_heartbeat_period= %s";
2102  char query[sizeof(query_format) - 2 + sizeof(llbuf)];
2103  /*
2104  the period is an ulonglong of nano-secs.
2105  */
2106  llstr((ulonglong) (mi->heartbeat_period*1000000000UL), llbuf);
2107  sprintf(query, query_format, llbuf);
2108 
2109  if (mysql_real_query(mysql, query, strlen(query)))
2110  {
2111  if (check_io_slave_killed(mi->info_thd, mi, NULL))
2112  goto slave_killed_err;
2113 
2114  if (is_network_error(mysql_errno(mysql)))
2115  {
2116  mi->report(WARNING_LEVEL, mysql_errno(mysql),
2117  "SET @master_heartbeat_period to master failed with error: %s",
2118  mysql_error(mysql));
2119  mysql_free_result(mysql_store_result(mysql));
2120  goto network_err;
2121  }
2122  else
2123  {
2124  /* Fatal error */
2125  errmsg= "The slave I/O thread stops because a fatal error is encountered "
2126  " when it tries to SET @master_heartbeat_period on master.";
2127  err_code= ER_SLAVE_FATAL_ERROR;
2128  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2129  mysql_free_result(mysql_store_result(mysql));
2130  goto err;
2131  }
2132  }
2133  mysql_free_result(mysql_store_result(mysql));
2134  }
2135 
2136  /*
2137  Querying if master is capable to checksum and notifying it about own
2138  CRC-awareness. The master's side instant value of @@global.binlog_checksum
2139  is stored in the dump thread's uservar area as well as cached locally
2140  to become known in consensus by master and slave.
2141  */
2142  if (DBUG_EVALUATE_IF("simulate_slave_unaware_checksum", 0, 1))
2143  {
2144  int rc;
2145  const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
2146  master_res= NULL;
2147  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
2148  /*
2149  @c checksum_alg_before_fd is queried from master in this block.
2150  If master is old checksum-unaware the value stays undefined.
2151  Once the first FD will be received its alg descriptor will replace
2152  the being queried one.
2153  */
2154  rc= mysql_real_query(mysql, query, strlen(query));
2155  if (rc != 0)
2156  {
2157  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2158  if (check_io_slave_killed(mi->info_thd, mi, NULL))
2159  goto slave_killed_err;
2160 
2161  if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
2162  {
2163  // this is tolerable as OM -> NS is supported
2164  mi->report(WARNING_LEVEL, mysql_errno(mysql),
2165  "Notifying master by %s failed with "
2166  "error: %s", query, mysql_error(mysql));
2167  }
2168  else
2169  {
2170  if (is_network_error(mysql_errno(mysql)))
2171  {
2172  mi->report(WARNING_LEVEL, mysql_errno(mysql),
2173  "Notifying master by %s failed with "
2174  "error: %s", query, mysql_error(mysql));
2175  mysql_free_result(mysql_store_result(mysql));
2176  goto network_err;
2177  }
2178  else
2179  {
2180  errmsg= "The slave I/O thread stops because a fatal error is encountered "
2181  "when it tried to SET @master_binlog_checksum on master.";
2182  err_code= ER_SLAVE_FATAL_ERROR;
2183  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2184  mysql_free_result(mysql_store_result(mysql));
2185  goto err;
2186  }
2187  }
2188  }
2189  else
2190  {
2191  mysql_free_result(mysql_store_result(mysql));
2192  if (!mysql_real_query(mysql,
2193  STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
2194  (master_res= mysql_store_result(mysql)) &&
2195  (master_row= mysql_fetch_row(master_res)) &&
2196  (master_row[0] != NULL))
2197  {
2198  mi->checksum_alg_before_fd= (uint8)
2199  find_type(master_row[0], &binlog_checksum_typelib, 1) - 1;
2200 
2201  DBUG_EXECUTE_IF("undefined_algorithm_on_slave",
2202  mi->checksum_alg_before_fd = BINLOG_CHECKSUM_ALG_UNDEF;);
2203  if(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_UNDEF)
2204  {
2205  errmsg= "The slave I/O thread was stopped because a fatal error is encountered "
2206  "The checksum algorithm used by master is unknown to slave.";
2207  err_code= ER_SLAVE_FATAL_ERROR;
2208  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2209  mysql_free_result(mysql_store_result(mysql));
2210  goto err;
2211  }
2212 
2213  // valid outcome is either of
2214  DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
2215  mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
2216  }
2217  else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2218  goto slave_killed_err;
2219  else if (is_network_error(mysql_errno(mysql)))
2220  {
2221  mi->report(WARNING_LEVEL, mysql_errno(mysql),
2222  "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
2223  goto network_err;
2224  }
2225  else
2226  {
2227  errmsg= "The slave I/O thread stops because a fatal error is encountered "
2228  "when it tried to SELECT @master_binlog_checksum.";
2229  err_code= ER_SLAVE_FATAL_ERROR;
2230  sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2231  mysql_free_result(mysql_store_result(mysql));
2232  goto err;
2233  }
2234  }
2235  if (master_res)
2236  {
2237  mysql_free_result(master_res);
2238  master_res= NULL;
2239  }
2240  }
2241  else
2242  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2243 
2244  if (DBUG_EVALUATE_IF("simulate_slave_unaware_gtid", 0, 1))
2245  {
2246  switch (io_thread_init_command(mi, "SELECT @@GLOBAL.GTID_MODE",
2247  ER_UNKNOWN_SYSTEM_VARIABLE,
2248  &master_res, &master_row))
2249  {
2250  case COMMAND_STATUS_ERROR:
2251  DBUG_RETURN(2);
2252  case COMMAND_STATUS_ALLOWED_ERROR:
2253  // master is old and does not have @@GLOBAL.GTID_MODE
2254  mi->master_gtid_mode= 0;
2255  break;
2256  case COMMAND_STATUS_OK:
2257  int typelib_index= find_type(master_row[0], &gtid_mode_typelib, 1);
2258  mysql_free_result(master_res);
2259  if (typelib_index == 0)
2260  {
2261  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2262  "The slave IO thread stops because the master has "
2263  "an unknown @@GLOBAL.GTID_MODE.");
2264  DBUG_RETURN(1);
2265  }
2266  mi->master_gtid_mode= typelib_index - 1;
2267  break;
2268  }
2269  if (mi->master_gtid_mode > gtid_mode + 1 ||
2270  gtid_mode > mi->master_gtid_mode + 1)
2271  {
2272  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2273  "The slave IO thread stops because the master has "
2274  "@@GLOBAL.GTID_MODE %s and this server has "
2275  "@@GLOBAL.GTID_MODE %s",
2276  gtid_mode_names[mi->master_gtid_mode],
2277  gtid_mode_names[gtid_mode]);
2278  DBUG_RETURN(1);
2279  }
2280  if (mi->is_auto_position() && mi->master_gtid_mode != 3)
2281  {
2282  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2283  "The slave IO thread stops because the master has "
2284  "@@GLOBAL.GTID_MODE %s and we are trying to connect "
2285  "using MASTER_AUTO_POSITION.",
2286  gtid_mode_names[mi->master_gtid_mode]);
2287  DBUG_RETURN(1);
2288  }
2289  }
2290 
2291 err:
2292  if (errmsg)
2293  {
2294  if (master_res)
2295  mysql_free_result(master_res);
2296  DBUG_ASSERT(err_code != 0);
2297  mi->report(ERROR_LEVEL, err_code, "%s", err_buff);
2298  DBUG_RETURN(1);
2299  }
2300 
2301  DBUG_RETURN(0);
2302 
2303 network_err:
2304  if (master_res)
2305  mysql_free_result(master_res);
2306  DBUG_RETURN(2);
2307 
2308 slave_killed_err:
2309  if (master_res)
2310  mysql_free_result(master_res);
2311  DBUG_RETURN(2);
2312 }
2313 
2314 static bool wait_for_relay_log_space(Relay_log_info* rli)
2315 {
2316  bool slave_killed=0;
2317  Master_info* mi = rli->mi;
2318  PSI_stage_info old_stage;
2319  THD* thd = mi->info_thd;
2320  DBUG_ENTER("wait_for_relay_log_space");
2321 
2322  mysql_mutex_lock(&rli->log_space_lock);
2323  thd->ENTER_COND(&rli->log_space_cond,
2324  &rli->log_space_lock,
2325  &stage_waiting_for_relay_log_space,
2326  &old_stage);
2327  while (rli->log_space_limit < rli->log_space_total &&
2328  !(slave_killed=io_slave_killed(thd,mi)) &&
2329  !rli->ignore_log_space_limit)
2330  mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
2331 
2332  /*
2333  Makes the IO thread read only one event at a time
2334  until the SQL thread is able to purge the relay
2335  logs, freeing some space.
2336 
2337  Therefore, once the SQL thread processes this next
2338  event, it goes to sleep (no more events in the queue),
2339  sets ignore_log_space_limit=true and wakes the IO thread.
2340  However, this event may have been enough already for
2341  the SQL thread to purge some log files, freeing
2342  rli->log_space_total .
2343 
2344  This guarantees that the SQL and IO thread move
2345  forward only one event at a time (to avoid deadlocks),
2346  when the relay space limit is reached. It also
2347  guarantees that when the SQL thread is prepared to
2348  rotate (to be able to purge some logs), the IO thread
2349  will know about it and will rotate.
2350 
2351  NOTE: The ignore_log_space_limit is only set when the SQL
2352  thread sleeps waiting for events.
2353 
2354  */
2355  if (rli->ignore_log_space_limit)
2356  {
2357 #ifndef DBUG_OFF
2358  {
2359  char llbuf1[22], llbuf2[22];
2360  DBUG_PRINT("info", ("log_space_limit=%s "
2361  "log_space_total=%s "
2362  "ignore_log_space_limit=%d "
2363  "sql_force_rotate_relay=%d",
2364  llstr(rli->log_space_limit,llbuf1),
2365  llstr(rli->log_space_total,llbuf2),
2366  (int) rli->ignore_log_space_limit,
2367  (int) rli->sql_force_rotate_relay));
2368  }
2369 #endif
2370  if (rli->sql_force_rotate_relay)
2371  {
2372  mysql_mutex_lock(&mi->data_lock);
2373  rotate_relay_log(mi);
2374  mysql_mutex_unlock(&mi->data_lock);
2375  rli->sql_force_rotate_relay= false;
2376  }
2377 
2378  rli->ignore_log_space_limit= false;
2379  }
2380 
2381  thd->EXIT_COND(&old_stage);
2382  DBUG_RETURN(slave_killed);
2383 }
2384 
2385 
2386 /*
2387  Builds a Rotate from the ignored events' info and writes it to relay log.
2388 
2389  The caller must hold mi->data_lock before invoking this function.
2390 
2391  @param thd pointer to I/O Thread's Thd.
2392  @param mi point to I/O Thread metadata class.
2393 
2394  @return 0 if everything went fine, 1 otherwise.
2395 */
2396 static int write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
2397 {
2398  Relay_log_info *rli= mi->rli;
2399  mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
2400  int error= 0;
2401  DBUG_ENTER("write_ignored_events_info_to_relay_log");
2402 
2403  DBUG_ASSERT(thd == mi->info_thd);
2404  mysql_mutex_assert_owner(&mi->data_lock);
2405  mysql_mutex_lock(log_lock);
2406  if (rli->ign_master_log_name_end[0])
2407  {
2408  DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
2409  Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end,
2410  0, rli->ign_master_log_pos_end,
2411  Rotate_log_event::DUP_NAME);
2412  if (mi->get_mi_description_event() != NULL)
2413  ev->checksum_alg= mi->get_mi_description_event()->checksum_alg;
2414 
2415  rli->ign_master_log_name_end[0]= 0;
2416  /* can unlock before writing as slave SQL thd will soon see our Rotate */
2417  mysql_mutex_unlock(log_lock);
2418  if (likely((bool)ev))
2419  {
2420  ev->server_id= 0; // don't be ignored by slave SQL thread
2421  if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
2422  mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
2423  ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2424  "failed to write a Rotate event"
2425  " to the relay log, SHOW SLAVE STATUS may be"
2426  " inaccurate");
2427  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
2428  if (flush_master_info(mi, TRUE))
2429  {
2430  error= 1;
2431  sql_print_error("Failed to flush master info file.");
2432  }
2433  delete ev;
2434  }
2435  else
2436  {
2437  error= 1;
2438  mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
2439  ER(ER_SLAVE_CREATE_EVENT_FAILURE),
2440  "Rotate_event (out of memory?),"
2441  " SHOW SLAVE STATUS may be inaccurate");
2442  }
2443  }
2444  else
2445  mysql_mutex_unlock(log_lock);
2446 
2447  DBUG_RETURN(error);
2448 }
2449 
2450 
2451 int register_slave_on_master(MYSQL* mysql, Master_info *mi,
2452  bool *suppress_warnings)
2453 {
2454  uchar buf[1024], *pos= buf;
2455  uint report_host_len=0, report_user_len=0, report_password_len=0;
2456  DBUG_ENTER("register_slave_on_master");
2457 
2458  *suppress_warnings= FALSE;
2459  if (report_host)
2460  report_host_len= strlen(report_host);
2461  if (report_host_len > HOSTNAME_LENGTH)
2462  {
2463  sql_print_warning("The length of report_host is %d. "
2464  "It is larger than the max length(%d), so this "
2465  "slave cannot be registered to the master.",
2466  report_host_len, HOSTNAME_LENGTH);
2467  DBUG_RETURN(0);
2468  }
2469 
2470  if (report_user)
2471  report_user_len= strlen(report_user);
2472  if (report_user_len > USERNAME_LENGTH)
2473  {
2474  sql_print_warning("The length of report_user is %d. "
2475  "It is larger than the max length(%d), so this "
2476  "slave cannot be registered to the master.",
2477  report_user_len, USERNAME_LENGTH);
2478  DBUG_RETURN(0);
2479  }
2480 
2481  if (report_password)
2482  report_password_len= strlen(report_password);
2483  if (report_password_len > MAX_PASSWORD_LENGTH)
2484  {
2485  sql_print_warning("The length of report_password is %d. "
2486  "It is larger than the max length(%d), so this "
2487  "slave cannot be registered to the master.",
2488  report_password_len, MAX_PASSWORD_LENGTH);
2489  DBUG_RETURN(0);
2490  }
2491 
2492  int4store(pos, server_id); pos+= 4;
2493  pos= net_store_data(pos, (uchar*) report_host, report_host_len);
2494  pos= net_store_data(pos, (uchar*) report_user, report_user_len);
2495  pos= net_store_data(pos, (uchar*) report_password, report_password_len);
2496  int2store(pos, (uint16) report_port); pos+= 2;
2497  /*
2498  Fake rpl_recovery_rank, which was removed in BUG#13963,
2499  so that this server can register itself on old servers,
2500  see BUG#49259.
2501  */
2502  int4store(pos, /* rpl_recovery_rank */ 0); pos+= 4;
2503  /* The master will fill in master_id */
2504  int4store(pos, 0); pos+= 4;
2505 
2506  if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (size_t) (pos- buf), 0))
2507  {
2508  if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
2509  {
2510  *suppress_warnings= TRUE; // Suppress reconnect warning
2511  }
2512  else if (!check_io_slave_killed(mi->info_thd, mi, NULL))
2513  {
2514  char buf[256];
2515  my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql),
2516  mysql_errno(mysql));
2517  mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
2518  ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
2519  }
2520  DBUG_RETURN(1);
2521  }
2522  DBUG_RETURN(0);
2523 }
2524 
2525 
2537 bool show_slave_status(THD* thd, Master_info* mi)
2538 {
2539  // TODO: fix this for multi-master
2540  List<Item> field_list;
2541  Protocol *protocol= thd->protocol;
2542  char *slave_sql_running_state= NULL;
2543  char *sql_gtid_set_buffer= NULL, *io_gtid_set_buffer= NULL;
2544  int sql_gtid_set_size= 0, io_gtid_set_size= 0;
2545  DBUG_ENTER("show_slave_status");
2546 
2547  if (mi != NULL)
2548  {
2549  global_sid_lock->wrlock();
2550  const Gtid_set* sql_gtid_set= gtid_state->get_logged_gtids();
2551  const Gtid_set* io_gtid_set= mi->rli->get_gtid_set();
2552  if ((sql_gtid_set_size= sql_gtid_set->to_string(&sql_gtid_set_buffer)) < 0 ||
2553  (io_gtid_set_size= io_gtid_set->to_string(&io_gtid_set_buffer)) < 0)
2554  {
2555  my_eof(thd);
2556  my_free(sql_gtid_set_buffer);
2557  my_free(io_gtid_set_buffer);
2558  global_sid_lock->unlock();
2559  DBUG_RETURN(true);
2560  }
2561  global_sid_lock->unlock();
2562  }
2563 
2564  field_list.push_back(new Item_empty_string("Slave_IO_State",
2565  14));
2566  field_list.push_back(new Item_empty_string("Master_Host", mi != NULL ?
2567  sizeof(mi->host) : 0));
2568  field_list.push_back(new Item_empty_string("Master_User", mi != NULL ?
2569  mi->get_user_size() : 0));
2570  field_list.push_back(new Item_return_int("Master_Port", 7,
2571  MYSQL_TYPE_LONG));
2572  field_list.push_back(new Item_return_int("Connect_Retry", 10,
2573  MYSQL_TYPE_LONG));
2574  field_list.push_back(new Item_empty_string("Master_Log_File",
2575  FN_REFLEN));
2576  field_list.push_back(new Item_return_int("Read_Master_Log_Pos", 10,
2577  MYSQL_TYPE_LONGLONG));
2578  field_list.push_back(new Item_empty_string("Relay_Log_File",
2579  FN_REFLEN));
2580  field_list.push_back(new Item_return_int("Relay_Log_Pos", 10,
2581  MYSQL_TYPE_LONGLONG));
2582  field_list.push_back(new Item_empty_string("Relay_Master_Log_File",
2583  FN_REFLEN));
2584  field_list.push_back(new Item_empty_string("Slave_IO_Running", 3));
2585  field_list.push_back(new Item_empty_string("Slave_SQL_Running", 3));
2586  field_list.push_back(new Item_empty_string("Replicate_Do_DB", 20));
2587  field_list.push_back(new Item_empty_string("Replicate_Ignore_DB", 20));
2588  field_list.push_back(new Item_empty_string("Replicate_Do_Table", 20));
2589  field_list.push_back(new Item_empty_string("Replicate_Ignore_Table", 23));
2590  field_list.push_back(new Item_empty_string("Replicate_Wild_Do_Table", 24));
2591  field_list.push_back(new Item_empty_string("Replicate_Wild_Ignore_Table",
2592  28));
2593  field_list.push_back(new Item_return_int("Last_Errno", 4, MYSQL_TYPE_LONG));
2594  field_list.push_back(new Item_empty_string("Last_Error", 20));
2595  field_list.push_back(new Item_return_int("Skip_Counter", 10,
2596  MYSQL_TYPE_LONG));
2597  field_list.push_back(new Item_return_int("Exec_Master_Log_Pos", 10,
2598  MYSQL_TYPE_LONGLONG));
2599  field_list.push_back(new Item_return_int("Relay_Log_Space", 10,
2600  MYSQL_TYPE_LONGLONG));
2601  field_list.push_back(new Item_empty_string("Until_Condition", 6));
2602  field_list.push_back(new Item_empty_string("Until_Log_File", FN_REFLEN));
2603  field_list.push_back(new Item_return_int("Until_Log_Pos", 10,
2604  MYSQL_TYPE_LONGLONG));
2605  field_list.push_back(new Item_empty_string("Master_SSL_Allowed", 7));
2606  field_list.push_back(new Item_empty_string("Master_SSL_CA_File", mi != NULL ?
2607  sizeof(mi->ssl_ca) : 0));
2608  field_list.push_back(new Item_empty_string("Master_SSL_CA_Path", mi != NULL ?
2609  sizeof(mi->ssl_capath) : 0));
2610  field_list.push_back(new Item_empty_string("Master_SSL_Cert", mi != NULL ?
2611  sizeof(mi->ssl_cert) : 0));
2612  field_list.push_back(new Item_empty_string("Master_SSL_Cipher", mi != NULL ?
2613  sizeof(mi->ssl_cipher) : 0));
2614  field_list.push_back(new Item_empty_string("Master_SSL_Key", mi != NULL ?
2615  sizeof(mi->ssl_key) : 0));
2616  field_list.push_back(new Item_return_int("Seconds_Behind_Master", 10,
2617  MYSQL_TYPE_LONGLONG));
2618  field_list.push_back(new Item_empty_string("Master_SSL_Verify_Server_Cert",
2619  3));
2620  field_list.push_back(new Item_return_int("Last_IO_Errno", 4, MYSQL_TYPE_LONG));
2621  field_list.push_back(new Item_empty_string("Last_IO_Error", 20));
2622  field_list.push_back(new Item_return_int("Last_SQL_Errno", 4, MYSQL_TYPE_LONG));
2623  field_list.push_back(new Item_empty_string("Last_SQL_Error", 20));
2624  field_list.push_back(new Item_empty_string("Replicate_Ignore_Server_Ids",
2625  FN_REFLEN));
2626  field_list.push_back(new Item_return_int("Master_Server_Id", sizeof(ulong),
2627  MYSQL_TYPE_LONG));
2628  field_list.push_back(new Item_empty_string("Master_UUID", UUID_LENGTH));
2629  field_list.push_back(new Item_empty_string("Master_Info_File",
2630  2 * FN_REFLEN));
2631  field_list.push_back(new Item_return_int("SQL_Delay", 10, MYSQL_TYPE_LONG));
2632  field_list.push_back(new Item_return_int("SQL_Remaining_Delay", 8, MYSQL_TYPE_LONG));
2633  field_list.push_back(new Item_empty_string("Slave_SQL_Running_State", 20));
2634  field_list.push_back(new Item_return_int("Master_Retry_Count", 10,
2635  MYSQL_TYPE_LONGLONG));
2636  field_list.push_back(new Item_empty_string("Master_Bind", mi != NULL ?
2637  sizeof(mi->bind_addr) : 0));
2638  field_list.push_back(new Item_empty_string("Last_IO_Error_Timestamp", 20));
2639  field_list.push_back(new Item_empty_string("Last_SQL_Error_Timestamp", 20));
2640  field_list.push_back(new Item_empty_string("Master_SSL_Crl", mi != NULL ?
2641  sizeof(mi->ssl_crl) : 0));
2642  field_list.push_back(new Item_empty_string("Master_SSL_Crlpath", mi != NULL ?
2643  sizeof(mi->ssl_crlpath) : 0));
2644  field_list.push_back(new Item_empty_string("Retrieved_Gtid_Set",
2645  io_gtid_set_size));
2646  field_list.push_back(new Item_empty_string("Executed_Gtid_Set",
2647  sql_gtid_set_size));
2648  field_list.push_back(new Item_return_int("Auto_Position", sizeof(ulong),
2649  MYSQL_TYPE_LONG));
2650 
2651  if (protocol->send_result_set_metadata(&field_list,
2652  Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2653  {
2654  my_free(sql_gtid_set_buffer);
2655  my_free(io_gtid_set_buffer);
2656  DBUG_RETURN(true);
2657  }
2658 
2659  if (mi != NULL && mi->host[0])
2660  {
2661  DBUG_PRINT("info",("host is set: '%s'", mi->host));
2662  String *packet= &thd->packet;
2663  protocol->prepare_for_resend();
2664 
2665  /*
2666  slave_running can be accessed without run_lock but not other
2667  non-volotile members like mi->info_thd, which is guarded by the mutex.
2668  */
2669  mysql_mutex_lock(&mi->run_lock);
2670  protocol->store(mi->info_thd ? mi->info_thd->get_proc_info() : "", &my_charset_bin);
2671  mysql_mutex_unlock(&mi->run_lock);
2672 
2673  mysql_mutex_lock(&mi->rli->run_lock);
2674  slave_sql_running_state= const_cast<char *>(mi->rli->info_thd ? mi->rli->info_thd->get_proc_info() : "");
2675  mysql_mutex_unlock(&mi->rli->run_lock);
2676 
2677  mysql_mutex_lock(&mi->data_lock);
2678  mysql_mutex_lock(&mi->rli->data_lock);
2679  mysql_mutex_lock(&mi->err_lock);
2680  mysql_mutex_lock(&mi->rli->err_lock);
2681 
2682  DEBUG_SYNC(thd, "wait_after_lock_active_mi_and_rli_data_lock_is_acquired");
2683  protocol->store(mi->host, &my_charset_bin);
2684  protocol->store(mi->get_user(), &my_charset_bin);
2685  protocol->store((uint32) mi->port);
2686  protocol->store((uint32) mi->connect_retry);
2687  protocol->store(mi->get_master_log_name(), &my_charset_bin);
2688  protocol->store((ulonglong) mi->get_master_log_pos());
2689  protocol->store(mi->rli->get_group_relay_log_name() +
2690  dirname_length(mi->rli->get_group_relay_log_name()),
2691  &my_charset_bin);
2692  protocol->store((ulonglong) mi->rli->get_group_relay_log_pos());
2693  protocol->store(mi->rli->get_group_master_log_name(), &my_charset_bin);
2694  protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ?
2695  "Yes" : (mi->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT ?
2696  "Connecting" : "No"), &my_charset_bin);
2697  protocol->store(mi->rli->slave_running ? "Yes":"No", &my_charset_bin);
2698  protocol->store(rpl_filter->get_do_db());
2699  protocol->store(rpl_filter->get_ignore_db());
2700 
2701  char buf[256];
2702  String tmp(buf, sizeof(buf), &my_charset_bin);
2703  rpl_filter->get_do_table(&tmp);
2704  protocol->store(&tmp);
2705  rpl_filter->get_ignore_table(&tmp);
2706  protocol->store(&tmp);
2707  rpl_filter->get_wild_do_table(&tmp);
2708  protocol->store(&tmp);
2709  rpl_filter->get_wild_ignore_table(&tmp);
2710  protocol->store(&tmp);
2711 
2712  protocol->store(mi->rli->last_error().number);
2713  protocol->store(mi->rli->last_error().message, &my_charset_bin);
2714  protocol->store((uint32) mi->rli->slave_skip_counter);
2715  protocol->store((ulonglong) mi->rli->get_group_master_log_pos());
2716  protocol->store((ulonglong) mi->rli->log_space_total);
2717 
2718  const char *until_type= "";
2719 
2720  switch (mi->rli->until_condition)
2721  {
2722  case Relay_log_info::UNTIL_NONE:
2723  until_type= "None";
2724  break;
2725  case Relay_log_info::UNTIL_MASTER_POS:
2726  until_type= "Master";
2727  break;
2728  case Relay_log_info::UNTIL_RELAY_POS:
2729  until_type= "Relay";
2730  break;
2731  case Relay_log_info::UNTIL_SQL_BEFORE_GTIDS:
2732  until_type= "SQL_BEFORE_GTIDS";
2733  break;
2734  case Relay_log_info::UNTIL_SQL_AFTER_GTIDS:
2735  until_type= "SQL_AFTER_GTIDS";
2736  break;
2737  case Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS:
2738  until_type= "SQL_AFTER_MTS_GAPS";
2739 #ifndef DBUG_OFF
2740  case Relay_log_info::UNTIL_DONE:
2741  until_type= "DONE";
2742  break;
2743 #endif
2744  default:
2745  DBUG_ASSERT(0);
2746  }
2747  protocol->store(until_type, &my_charset_bin);
2748  protocol->store(mi->rli->until_log_name, &my_charset_bin);
2749  protocol->store((ulonglong) mi->rli->until_log_pos);
2750 
2751 #ifdef HAVE_OPENSSL
2752  protocol->store(mi->ssl? "Yes":"No", &my_charset_bin);
2753 #else
2754  protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin);
2755 #endif
2756  protocol->store(mi->ssl_ca, &my_charset_bin);
2757  protocol->store(mi->ssl_capath, &my_charset_bin);
2758  protocol->store(mi->ssl_cert, &my_charset_bin);
2759  protocol->store(mi->ssl_cipher, &my_charset_bin);
2760  protocol->store(mi->ssl_key, &my_charset_bin);
2761 
2762  /*
2763  The pseudo code to compute Seconds_Behind_Master:
2764  if (SQL thread is running)
2765  {
2766  if (SQL thread processed all the available relay log)
2767  {
2768  if (IO thread is running)
2769  print 0;
2770  else
2771  print NULL;
2772  }
2773  else
2774  compute Seconds_Behind_Master;
2775  }
2776  else
2777  print NULL;
2778  */
2779  if (mi->rli->slave_running)
2780  {
2781  /* Check if SQL thread is at the end of relay log
2782  Checking should be done using two conditions
2783  condition1: compare the log positions and
2784  condition2: compare the file names (to handle rotation case)
2785  */
2786  if ((mi->get_master_log_pos() == mi->rli->get_group_master_log_pos()) &&
2787  (!strcmp(mi->get_master_log_name(), mi->rli->get_group_master_log_name())))
2788  {
2789  if (mi->slave_running == MYSQL_SLAVE_RUN_CONNECT)
2790  protocol->store(0LL);
2791  else
2792  protocol->store_null();
2793  }
2794  else
2795  {
2796  long time_diff= ((long)(time(0) - mi->rli->last_master_timestamp)
2797  - mi->clock_diff_with_master);
2798  /*
2799  Apparently on some systems time_diff can be <0. Here are possible
2800  reasons related to MySQL:
2801  - the master is itself a slave of another master whose time is ahead.
2802  - somebody used an explicit SET TIMESTAMP on the master.
2803  Possible reason related to granularity-to-second of time functions
2804  (nothing to do with MySQL), which can explain a value of -1:
2805  assume the master's and slave's time are perfectly synchronized, and
2806  that at slave's connection time, when the master's timestamp is read,
2807  it is at the very end of second 1, and (a very short time later) when
2808  the slave's timestamp is read it is at the very beginning of second
2809  2. Then the recorded value for master is 1 and the recorded value for
2810  slave is 2. At SHOW SLAVE STATUS time, assume that the difference
2811  between timestamp of slave and rli->last_master_timestamp is 0
2812  (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
2813  This confuses users, so we don't go below 0: hence the max().
2814 
2815  last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
2816  special marker to say "consider we have caught up".
2817  */
2818  protocol->store((longlong)(mi->rli->last_master_timestamp ?
2819  max(0L, time_diff) : 0));
2820  }
2821  }
2822  else
2823  {
2824  protocol->store_null();
2825  }
2826  protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin);
2827 
2828  // Last_IO_Errno
2829  protocol->store(mi->last_error().number);
2830  // Last_IO_Error
2831  protocol->store(mi->last_error().message, &my_charset_bin);
2832  // Last_SQL_Errno
2833  protocol->store(mi->rli->last_error().number);
2834  // Last_SQL_Error
2835  protocol->store(mi->rli->last_error().message, &my_charset_bin);
2836  // Replicate_Ignore_Server_Ids
2837  {
2838  char buff[FN_REFLEN];
2839  ulong i, cur_len;
2840  for (i= 0, buff[0]= 0, cur_len= 0;
2841  i < mi->ignore_server_ids->dynamic_ids.elements; i++)
2842  {
2843  ulong s_id, slen;
2844  char sbuff[FN_REFLEN];
2845  get_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id, i);
2846  slen= sprintf(sbuff, (i == 0 ? "%lu" : ", %lu"), s_id);
2847  if (cur_len + slen + 4 > FN_REFLEN)
2848  {
2849  /*
2850  break the loop whenever remained space could not fit
2851  ellipses on the next cycle
2852  */
2853  sprintf(buff + cur_len, "...");
2854  break;
2855  }
2856  cur_len += sprintf(buff + cur_len, "%s", sbuff);
2857  }
2858  protocol->store(buff, &my_charset_bin);
2859  }
2860  // Master_Server_id
2861  protocol->store((uint32) mi->master_id);
2862  protocol->store(mi->master_uuid, &my_charset_bin);
2863  // Master_Info_File
2864  protocol->store(mi->get_description_info(), &my_charset_bin);
2865  // SQL_Delay
2866  protocol->store((uint32) mi->rli->get_sql_delay());
2867  // SQL_Remaining_Delay
2868  if (slave_sql_running_state == stage_sql_thd_waiting_until_delay.m_name)
2869  {
2870  time_t t= my_time(0), sql_delay_end= mi->rli->get_sql_delay_end();
2871  protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
2872  }
2873  else
2874  protocol->store_null();
2875  // Slave_SQL_Running_State
2876  protocol->store(slave_sql_running_state, &my_charset_bin);
2877  // Master_Retry_Count
2878  protocol->store((ulonglong) mi->retry_count);
2879  // Master_Bind
2880  protocol->store(mi->bind_addr, &my_charset_bin);
2881  // Last_IO_Error_Timestamp
2882  protocol->store(mi->last_error().timestamp, &my_charset_bin);
2883  // Last_SQL_Error_Timestamp
2884  protocol->store(mi->rli->last_error().timestamp, &my_charset_bin);
2885  // Master_Ssl_Crl
2886  protocol->store(mi->ssl_ca, &my_charset_bin);
2887  // Master_Ssl_Crlpath
2888  protocol->store(mi->ssl_capath, &my_charset_bin);
2889  // Retrieved_Gtid_Set
2890  protocol->store(io_gtid_set_buffer, &my_charset_bin);
2891  // Executed_Gtid_Set
2892  protocol->store(sql_gtid_set_buffer, &my_charset_bin);
2893  // Auto_Position
2894  protocol->store(mi->is_auto_position() ? 1 : 0);
2895 
2896  mysql_mutex_unlock(&mi->rli->err_lock);
2897  mysql_mutex_unlock(&mi->err_lock);
2898  mysql_mutex_unlock(&mi->rli->data_lock);
2899  mysql_mutex_unlock(&mi->data_lock);
2900 
2901  if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
2902  {
2903  my_free(sql_gtid_set_buffer);
2904  my_free(io_gtid_set_buffer);
2905  DBUG_RETURN(true);
2906  }
2907  }
2908  my_eof(thd);
2909  my_free(sql_gtid_set_buffer);
2910  my_free(io_gtid_set_buffer);
2911  DBUG_RETURN(false);
2912 }
2913 
2914 
2915 void set_slave_thread_options(THD* thd)
2916 {
2917  DBUG_ENTER("set_slave_thread_options");
2918  /*
2919  It's nonsense to constrain the slave threads with max_join_size; if a
2920  query succeeded on master, we HAVE to execute it. So set
2921  OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
2922  (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
2923  SELECT examining more than 4 billion rows would still fail (yes, because
2924  when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
2925  only for client threads.
2926  */
2927  ulonglong options= thd->variables.option_bits | OPTION_BIG_SELECTS;
2928  if (opt_log_slave_updates)
2929  options|= OPTION_BIN_LOG;
2930  else
2931  options&= ~OPTION_BIN_LOG;
2932  thd->variables.option_bits= options;
2933  thd->variables.completion_type= 0;
2934 
2935  /*
2936  Set autocommit= 1 when info tables are used and autocommit == 0 to
2937  avoid trigger asserts on mysql_execute_command(THD *thd) caused by
2938  info tables updates which do not commit, like Rotate, Stop and
2939  skipped events handling.
2940  */
2941  if ((thd->variables.option_bits & OPTION_NOT_AUTOCOMMIT) &&
2942  (opt_mi_repository_id == INFO_REPOSITORY_TABLE ||
2943  opt_rli_repository_id == INFO_REPOSITORY_TABLE))
2944  {
2945  thd->variables.option_bits|= OPTION_AUTOCOMMIT;
2946  thd->variables.option_bits&= ~OPTION_NOT_AUTOCOMMIT;
2947  thd->server_status|= SERVER_STATUS_AUTOCOMMIT;
2948  }
2949 
2950  DBUG_VOID_RETURN;
2951 }
2952 
2953 void set_slave_thread_default_charset(THD* thd, Relay_log_info const *rli)
2954 {
2955  DBUG_ENTER("set_slave_thread_default_charset");
2956 
2957  thd->variables.character_set_client=
2958  global_system_variables.character_set_client;
2959  thd->variables.collation_connection=
2960  global_system_variables.collation_connection;
2961  thd->variables.collation_server=
2962  global_system_variables.collation_server;
2963  thd->update_charset();
2964 
2965  /*
2966  We use a const cast here since the conceptual (and externally
2967  visible) behavior of the function is to set the default charset of
2968  the thread. That the cache has to be invalidated is a secondary
2969  effect.
2970  */
2971  const_cast<Relay_log_info*>(rli)->cached_charset_invalidate();
2972  DBUG_VOID_RETURN;
2973 }
2974 
2975 /*
2976  init_slave_thread()
2977 */
2978 
2979 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
2980 {
2981  DBUG_ENTER("init_slave_thread");
2982 #if !defined(DBUG_OFF)
2983  int simulate_error= 0;
2984 #endif
2985  thd->system_thread= (thd_type == SLAVE_THD_WORKER) ?
2986  SYSTEM_THREAD_SLAVE_WORKER : (thd_type == SLAVE_THD_SQL) ?
2987  SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
2988  thd->security_ctx->skip_grants();
2989  my_net_init(&thd->net, 0);
2990  thd->slave_thread = 1;
2991  thd->enable_slow_log= opt_log_slow_slave_statements;
2992  set_slave_thread_options(thd);
2993  thd->client_capabilities = CLIENT_LOCAL_FILES;
2994  mysql_mutex_lock(&LOCK_thread_count);
2995  thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
2996  mysql_mutex_unlock(&LOCK_thread_count);
2997 
2998  DBUG_EXECUTE_IF("simulate_io_slave_error_on_init",
2999  simulate_error|= (1 << SLAVE_THD_IO););
3000  DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
3001  simulate_error|= (1 << SLAVE_THD_SQL););
3002 #if !defined(DBUG_OFF)
3003  if (init_thr_lock() || thd->store_globals() || simulate_error & (1<< thd_type))
3004 #else
3005  if (init_thr_lock() || thd->store_globals())
3006 #endif
3007  {
3008  DBUG_RETURN(-1);
3009  }
3010 
3011  if (thd_type == SLAVE_THD_SQL)
3012  {
3013  THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log);
3014  }
3015  else
3016  {
3017  THD_STAGE_INFO(thd, stage_waiting_for_master_update);
3018  }
3019  thd->set_time();
3020  /* Do not use user-supplied timeout value for system threads. */
3021  thd->variables.lock_wait_timeout= LONG_TIMEOUT;
3022  DBUG_RETURN(0);
3023 }
3024 
3025 
3036 template <typename killed_func, typename rpl_info>
3037 static inline bool slave_sleep(THD *thd, time_t seconds,
3038  killed_func func, rpl_info info)
3039 {
3040  bool ret;
3041  struct timespec abstime;
3042  mysql_mutex_t *lock= &info->sleep_lock;
3043  mysql_cond_t *cond= &info->sleep_cond;
3044 
3045  /* Absolute system time at which the sleep time expires. */
3046  set_timespec(abstime, seconds);
3047 
3048  mysql_mutex_lock(lock);
3049  thd->ENTER_COND(cond, lock, NULL, NULL);
3050 
3051  while (! (ret= func(thd, info)))
3052  {
3053  int error= mysql_cond_timedwait(cond, lock, &abstime);
3054  if (error == ETIMEDOUT || error == ETIME)
3055  break;
3056  }
3057 
3058  /* Implicitly unlocks the mutex. */
3059  thd->EXIT_COND(NULL);
3060 
3061  return ret;
3062 }
3063 
3064 static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
3065  bool *suppress_warnings)
3066 {
3067  DBUG_ENTER("request_dump");
3068 
3069  const int BINLOG_NAME_INFO_SIZE= strlen(mi->get_master_log_name());
3070  int error= 1;
3071  size_t command_size= 0;
3072  enum_server_command command= mi->is_auto_position() ?
3073  COM_BINLOG_DUMP_GTID : COM_BINLOG_DUMP;
3074  uchar* command_buffer= NULL;
3075  ushort binlog_flags= 0;
3076 
3077  if (RUN_HOOK(binlog_relay_io,
3078  before_request_transmit,
3079  (thd, mi, binlog_flags)))
3080  goto err;
3081 
3082  *suppress_warnings= false;
3083  if (command == COM_BINLOG_DUMP_GTID)
3084  {
3085  // get set of GTIDs
3086  Sid_map sid_map(NULL/*no lock needed*/);
3087  Gtid_set gtid_executed(&sid_map);
3088  global_sid_lock->wrlock();
3089  gtid_state->dbug_print();
3090  if (gtid_executed.add_gtid_set(mi->rli->get_gtid_set()) != RETURN_STATUS_OK ||
3091  gtid_executed.add_gtid_set(gtid_state->get_logged_gtids()) !=
3092  RETURN_STATUS_OK)
3093  {
3094  global_sid_lock->unlock();
3095  goto err;
3096  }
3097  global_sid_lock->unlock();
3098 
3099  // allocate buffer
3100  size_t encoded_data_size= gtid_executed.get_encoded_length();
3101  size_t allocation_size=
3102  ::BINLOG_FLAGS_INFO_SIZE + ::BINLOG_SERVER_ID_INFO_SIZE +
3103  ::BINLOG_NAME_SIZE_INFO_SIZE + BINLOG_NAME_INFO_SIZE +
3104  ::BINLOG_POS_INFO_SIZE + ::BINLOG_DATA_SIZE_INFO_SIZE +
3105  encoded_data_size + 1;
3106  if (!(command_buffer= (uchar *) my_malloc(allocation_size, MYF(MY_WME))))
3107  goto err;
3108  uchar* ptr_buffer= command_buffer;
3109 
3110  DBUG_PRINT("info", ("Do I know something about the master? (binary log's name %s - auto position %d).",
3111  mi->get_master_log_name(), mi->is_auto_position()));
3112  /*
3113  Note: binlog_flags is always 0. However, in versions up to 5.6
3114  RC, the master would check the lowest bit and do something
3115  unexpected if it was set; in early versions of 5.6 it would also
3116  use the two next bits. Therefore, for backward compatibility,
3117  if we ever start to use the flags, we should leave the three
3118  lowest bits unused.
3119  */
3120  int2store(ptr_buffer, binlog_flags);
3121  ptr_buffer+= ::BINLOG_FLAGS_INFO_SIZE;
3122  int4store(ptr_buffer, server_id);
3123  ptr_buffer+= ::BINLOG_SERVER_ID_INFO_SIZE;
3124  int4store(ptr_buffer, BINLOG_NAME_INFO_SIZE);
3125  ptr_buffer+= ::BINLOG_NAME_SIZE_INFO_SIZE;
3126  memset(ptr_buffer, 0, BINLOG_NAME_INFO_SIZE);
3127  ptr_buffer+= BINLOG_NAME_INFO_SIZE;
3128  int8store(ptr_buffer, 4LL);
3129  ptr_buffer+= ::BINLOG_POS_INFO_SIZE;
3130 
3131  int4store(ptr_buffer, encoded_data_size);
3132  ptr_buffer+= ::BINLOG_DATA_SIZE_INFO_SIZE;
3133  gtid_executed.encode(ptr_buffer);
3134  ptr_buffer+= encoded_data_size;
3135 
3136  command_size= ptr_buffer - command_buffer;
3137  DBUG_ASSERT(command_size == (allocation_size - 1));
3138  }
3139  else
3140  {
3141  size_t allocation_size= ::BINLOG_POS_OLD_INFO_SIZE +
3142  BINLOG_NAME_INFO_SIZE + ::BINLOG_FLAGS_INFO_SIZE +
3143  ::BINLOG_SERVER_ID_INFO_SIZE + 1;
3144  if (!(command_buffer= (uchar *) my_malloc(allocation_size, MYF(MY_WME))))
3145  goto err;
3146  uchar* ptr_buffer= command_buffer;
3147 
3148  int4store(ptr_buffer, mi->get_master_log_pos());
3149  ptr_buffer+= ::BINLOG_POS_OLD_INFO_SIZE;
3150  // See comment regarding binlog_flags above.
3151  int2store(ptr_buffer, binlog_flags);
3152  ptr_buffer+= ::BINLOG_FLAGS_INFO_SIZE;
3153  int4store(ptr_buffer, server_id);
3154  ptr_buffer+= ::BINLOG_SERVER_ID_INFO_SIZE;
3155  memcpy(ptr_buffer, mi->get_master_log_name(), BINLOG_NAME_INFO_SIZE);
3156  ptr_buffer+= BINLOG_NAME_INFO_SIZE;
3157 
3158  command_size= ptr_buffer - command_buffer;
3159  DBUG_ASSERT(command_size == (allocation_size - 1));
3160  }
3161 
3162  if (simple_command(mysql, command, command_buffer, command_size, 1))
3163  {
3164  /*
3165  Something went wrong, so we will just reconnect and retry later
3166  in the future, we should do a better error analysis, but for
3167  now we just fill up the error log :-)
3168  */
3169  if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3170  *suppress_warnings= true; // Suppress reconnect warning
3171  else
3172  sql_print_error("Error on %s: %d %s, will retry in %d secs",
3173  command_name[command].str,
3174  mysql_errno(mysql), mysql_error(mysql),
3175  mi->connect_retry);
3176  goto err;
3177  }
3178  error= 0;
3179 
3180 err:
3181  my_free(command_buffer);
3182  DBUG_RETURN(error);
3183 }
3184 
3185 
3186 /*
3187  Read one event from the master
3188 
3189  SYNOPSIS
3190  read_event()
3191  mysql MySQL connection
3192  mi Master connection information
3193  suppress_warnings TRUE when a normal net read timeout has caused us to
3194  try a reconnect. We do not want to print anything to
3195  the error log in this case because this a anormal
3196  event in an idle server.
3197 
3198  RETURN VALUES
3199  'packet_error' Error
3200  number Length of packet
3201 */
3202 
3203 static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
3204 {
3205  ulong len;
3206  DBUG_ENTER("read_event");
3207 
3208  *suppress_warnings= FALSE;
3209  /*
3210  my_real_read() will time us out
3211  We check if we were told to die, and if not, try reading again
3212  */
3213 #ifndef DBUG_OFF
3214  if (disconnect_slave_event_count && !(mi->events_until_exit--))
3215  DBUG_RETURN(packet_error);
3216 #endif
3217 
3218  len = cli_safe_read(mysql);
3219  if (len == packet_error || (long) len < 1)
3220  {
3221  if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3222  {
3223  /*
3224  We are trying a normal reconnect after a read timeout;
3225  we suppress prints to .err file as long as the reconnect
3226  happens without problems
3227  */
3228  *suppress_warnings= TRUE;
3229  }
3230  else
3231  sql_print_error("Error reading packet from server: %s ( server_errno=%d)",
3232  mysql_error(mysql), mysql_errno(mysql));
3233  DBUG_RETURN(packet_error);
3234  }
3235 
3236  /* Check if eof packet */
3237  if (len < 8 && mysql->net.read_pos[0] == 254)
3238  {
3239  sql_print_information("Slave: received end packet from server, apparent "
3240  "master shutdown: %s",
3241  mysql_error(mysql));
3242  DBUG_RETURN(packet_error);
3243  }
3244 
3245  DBUG_PRINT("exit", ("len: %lu net->read_pos[4]: %d",
3246  len, mysql->net.read_pos[4]));
3247  DBUG_RETURN(len - 1);
3248 }
3249 
3250 
3268 static int sql_delay_event(Log_event *ev, THD *thd, Relay_log_info *rli)
3269 {
3270  long sql_delay= rli->get_sql_delay();
3271 
3272  DBUG_ENTER("sql_delay_event");
3273  mysql_mutex_assert_owner(&rli->data_lock);
3274  DBUG_ASSERT(!rli->belongs_to_client());
3275 
3276  int type= ev->get_type_code();
3277  if (sql_delay && type != ROTATE_EVENT &&
3278  type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3)
3279  {
3280  // The time when we should execute the event.
3281  time_t sql_delay_end=
3282  ev->when.tv_sec + rli->mi->clock_diff_with_master + sql_delay;
3283  // The current time.
3284  time_t now= my_time(0);
3285  // The time we will have to sleep before executing the event.
3286  unsigned long nap_time= 0;
3287  if (sql_delay_end > now)
3288  nap_time= sql_delay_end - now;
3289 
3290  DBUG_PRINT("info", ("sql_delay= %lu "
3291  "ev->when= %lu "
3292  "rli->mi->clock_diff_with_master= %lu "
3293  "now= %ld "
3294  "sql_delay_end= %ld "
3295  "nap_time= %ld",
3296  sql_delay, (long) ev->when.tv_sec,
3297  rli->mi->clock_diff_with_master,
3298  (long)now, (long)sql_delay_end, (long)nap_time));
3299 
3300  if (sql_delay_end > now)
3301  {
3302  DBUG_PRINT("info", ("delaying replication event %lu secs",
3303  nap_time));
3304  rli->start_sql_delay(sql_delay_end);
3305  mysql_mutex_unlock(&rli->data_lock);
3306  DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rli));
3307  }
3308  }
3309 
3310  mysql_mutex_unlock(&rli->data_lock);
3311 
3312  DBUG_RETURN(0);
3313 }
3314 
3319 int ulong_cmp(ulong *id1, ulong *id2)
3320 {
3321  return *id1 < *id2? -1 : (*id1 > *id2? 1 : 0);
3322 }
3323 
3378 enum enum_slave_apply_event_and_update_pos_retval
3379 apply_event_and_update_pos(Log_event** ptr_ev, THD* thd, Relay_log_info* rli)
3380 {
3381  int exec_res= 0;
3382  bool skip_event= FALSE;
3383  Log_event *ev= *ptr_ev;
3385 
3386  DBUG_ENTER("apply_event_and_update_pos");
3387 
3388  DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
3389  ev->get_type_str(), ev->get_type_code(),
3390  ev->server_id));
3391  DBUG_PRINT("info", ("thd->options: %s%s; rli->last_event_start_time: %lu",
3392  FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
3393  FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
3394  (ulong) rli->last_event_start_time));
3395 
3396  /*
3397  Execute the event to change the database and update the binary
3398  log coordinates, but first we set some data that is needed for
3399  the thread.
3400 
3401  The event will be executed unless it is supposed to be skipped.
3402 
3403  Queries originating from this server must be skipped. Low-level
3404  events (Format_description_log_event, Rotate_log_event,
3405  Stop_log_event) from this server must also be skipped. But for
3406  those we don't want to modify 'group_master_log_pos', because
3407  these events did not exist on the master.
3408  Format_description_log_event is not completely skipped.
3409 
3410  Skip queries specified by the user in 'slave_skip_counter'. We
3411  can't however skip events that has something to do with the log
3412  files themselves.
3413 
3414  Filtering on own server id is extremely important, to ignore
3415  execution of events created by the creation/rotation of the relay
3416  log (remember that now the relay log starts with its Format_desc,
3417  has a Rotate etc).
3418  */
3419  /*
3420  Set the unmasked and actual server ids from the event
3421  */
3422  thd->server_id = ev->server_id; // use the original server id for logging
3423  thd->unmasked_server_id = ev->unmasked_server_id;
3424  thd->set_time(); // time the query
3425  thd->lex->current_select= 0;
3426  if (!ev->when.tv_sec)
3427  my_micro_time_to_timeval(my_micro_time(), &ev->when);
3428  ev->thd = thd; // because up to this point, ev->thd == 0
3429 
3430  if (!(rli->is_mts_recovery() && bitmap_is_set(&rli->recovery_groups,
3431  rli->mts_recovery_index)))
3432  {
3433  reason= ev->shall_skip(rli);
3434  }
3435 #ifndef DBUG_OFF
3436  if (rli->is_mts_recovery())
3437  {
3438  DBUG_PRINT("mts", ("Mts is recovering %d, number of bits set %d, "
3439  "bitmap is set %d, index %lu.\n",
3440  rli->is_mts_recovery(),
3441  bitmap_bits_set(&rli->recovery_groups),
3442  bitmap_is_set(&rli->recovery_groups,
3443  rli->mts_recovery_index),
3444  rli->mts_recovery_index));
3445  }
3446 #endif
3447  if (reason == Log_event::EVENT_SKIP_COUNT)
3448  {
3449  sql_slave_skip_counter= --rli->slave_skip_counter;
3450  skip_event= TRUE;
3451  }
3452  if (reason == Log_event::EVENT_SKIP_NOT)
3453  {
3454  // Sleeps if needed, and unlocks rli->data_lock.
3455  if (sql_delay_event(ev, thd, rli))
3456  DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK);
3457 
3458  exec_res= ev->apply_event(rli);
3459 
3460  if (!exec_res && (ev->worker != rli))
3461  {
3462  if (ev->worker)
3463  {
3464  Slave_job_item item= {ev}, *job_item= &item;
3465  Slave_worker *w= (Slave_worker *) ev->worker;
3466  // specially marked group typically with OVER_MAX_DBS_IN_EVENT_MTS db:s
3467  bool need_sync= ev->is_mts_group_isolated();
3468 
3469  // all events except BEGIN-query must be marked with a non-NULL Worker
3470  DBUG_ASSERT(((Slave_worker*) ev->worker) == rli->last_assigned_worker);
3471 
3472  DBUG_PRINT("Log_event::apply_event:",
3473  ("-> job item data %p to W_%lu", job_item->data, w->id));
3474 
3475  // Reset mts in-group state
3476  if (rli->mts_group_status == Relay_log_info::MTS_END_GROUP)
3477  {
3478  // CGAP cleanup
3479  for (uint i= rli->curr_group_assigned_parts.elements; i > 0; i--)
3480  delete_dynamic_element(&rli->
3481  curr_group_assigned_parts, i - 1);
3482  // reset the B-group and Gtid-group marker
3483  rli->curr_group_seen_begin= rli->curr_group_seen_gtid= false;
3484  rli->last_assigned_worker= NULL;
3485  }
3486  /*
3487  Stroring GAQ index of the group that the event belongs to
3488  in the event. Deferred events are handled similarly below.
3489  */
3490  ev->mts_group_idx= rli->gaq->assigned_group_index;
3491 
3492  bool append_item_to_jobs_error= false;
3493  if (rli->curr_group_da.elements > 0)
3494  {
3495  /*
3496  the current event sorted out which partion the current group
3497  belongs to. It's time now to processed deferred array events.
3498  */
3499  for (uint i= 0; i < rli->curr_group_da.elements; i++)
3500  {
3501  Slave_job_item da_item;
3502  get_dynamic(&rli->curr_group_da, (uchar*) &da_item.data, i);
3503  DBUG_PRINT("mts", ("Assigning job %llu to worker %lu",
3504  ((Log_event* )da_item.data)->log_pos, w->id));
3505  static_cast<Log_event*>(da_item.data)->mts_group_idx=
3506  rli->gaq->assigned_group_index; // similarly to above
3507  if (!append_item_to_jobs_error)
3508  append_item_to_jobs_error= append_item_to_jobs(&da_item, w, rli);
3509  if (append_item_to_jobs_error)
3510  delete static_cast<Log_event*>(da_item.data);
3511  }
3512  if (rli->curr_group_da.elements > rli->curr_group_da.max_element)
3513  {
3514  // reallocate to less mem
3515  rli->curr_group_da.elements= rli->curr_group_da.max_element;
3516  rli->curr_group_da.max_element= 0;
3517  freeze_size(&rli->curr_group_da); // restores max_element
3518  }
3519  rli->curr_group_da.elements= 0;
3520  }
3521  if (append_item_to_jobs_error)
3522  DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR);
3523 
3524  DBUG_PRINT("mts", ("Assigning job %llu to worker %lu\n",
3525  ((Log_event* )job_item->data)->log_pos, w->id));
3526 
3527  /* Notice `ev' instance can be destoyed after `append()' */
3528  if (append_item_to_jobs(job_item, w, rli))
3529  DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR);
3530  if (need_sync)
3531  {
3532  /*
3533  combination of over-max db:s and end of the current group
3534  forces to wait for the assigned groups completion by assigned
3535  to the event worker.
3536  Indeed MTS group status could be safely set to MTS_NOT_IN_GROUP
3537  after wait_() returns.
3538  No need to know a possible error out of synchronization call.
3539  */
3540  (void) wait_for_workers_to_finish(rli);
3541  }
3542 
3543  }
3544  *ptr_ev= NULL; // announcing the event is passed to w-worker
3545 
3546  if (log_warnings > 1 &&
3547  rli->is_parallel_exec() && rli->mts_events_assigned % 1024 == 1)
3548  {
3549  time_t my_now= my_time(0);
3550 
3551  if ((my_now - rli->mts_last_online_stat) >=
3552  mts_online_stat_period)
3553  {
3554  sql_print_information("Multi-threaded slave statistics: "
3555  "seconds elapsed = %lu; "
3556  "events assigned = %llu; "
3557  "worker queues filled over overrun level = %lu; "
3558  "waited due a Worker queue full = %lu; "
3559  "waited due the total size = %lu; "
3560  "slept when Workers occupied = %lu ",
3561  static_cast<unsigned long>
3562  (my_now - rli->mts_last_online_stat),
3563  rli->mts_events_assigned,
3564  rli->mts_wq_overrun_cnt,
3565  rli->mts_wq_overfill_cnt,
3566  rli->wq_size_waits_cnt,
3567  rli->mts_wq_no_underrun_cnt);
3568  rli->mts_last_online_stat= my_now;
3569  }
3570  }
3571  }
3572  }
3573  else
3574  mysql_mutex_unlock(&rli->data_lock);
3575 
3576  DBUG_PRINT("info", ("apply_event error = %d", exec_res));
3577  if (exec_res == 0)
3578  {
3579  /*
3580  Positions are not updated here when an XID is processed. To make
3581  a slave crash-safe, positions must be updated while processing a
3582  XID event and as such do not need to be updated here again.
3583 
3584  However, if the event needs to be skipped, this means that it
3585  will not be processed and then positions need to be updated here.
3586 
3587  See sql/rpl_rli.h for further details.
3588  */
3589  int error= 0;
3590  if (*ptr_ev &&
3591  (ev->get_type_code() != XID_EVENT ||
3592  skip_event || (rli->is_mts_recovery() && !is_gtid_event(ev) &&
3593  (ev->ends_group() || !rli->mts_recovery_group_seen_begin) &&
3594  bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index))))
3595  {
3596 #ifndef DBUG_OFF
3597  /*
3598  This only prints information to the debug trace.
3599 
3600  TODO: Print an informational message to the error log?
3601  */
3602  static const char *const explain[] = {
3603  // EVENT_SKIP_NOT,
3604  "not skipped",
3605  // EVENT_SKIP_IGNORE,
3606  "skipped because event should be ignored",
3607  // EVENT_SKIP_COUNT
3608  "skipped because event skip counter was non-zero"
3609  };
3610  DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d",
3611  test(thd->variables.option_bits & OPTION_BEGIN),
3613  DBUG_PRINT("skip_event", ("%s event was %s",
3614  ev->get_type_str(), explain[reason]));
3615 #endif
3616 
3617  error= ev->update_pos(rli);
3618 
3619 #ifndef DBUG_OFF
3620  DBUG_PRINT("info", ("update_pos error = %d", error));
3621  if (!rli->belongs_to_client())
3622  {
3623  char buf[22];
3624  DBUG_PRINT("info", ("group %s %s",
3625  llstr(rli->get_group_relay_log_pos(), buf),
3626  rli->get_group_relay_log_name()));
3627  DBUG_PRINT("info", ("event %s %s",
3628  llstr(rli->get_event_relay_log_pos(), buf),
3629  rli->get_event_relay_log_name()));
3630  }
3631 #endif
3632  }
3633  else
3634  {
3635  DBUG_ASSERT(*ptr_ev == ev || rli->is_parallel_exec() ||
3636  (!ev->worker &&
3637  (ev->get_type_code() == INTVAR_EVENT ||
3638  ev->get_type_code() == RAND_EVENT ||
3639  ev->get_type_code() == USER_VAR_EVENT)));
3640 
3641  rli->inc_event_relay_log_pos();
3642  }
3643 
3644  if (!error && rli->is_mts_recovery() &&
3645  ev->get_type_code() != ROTATE_EVENT &&
3646  ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
3647  ev->get_type_code() != PREVIOUS_GTIDS_LOG_EVENT)
3648  {
3649  if (ev->starts_group())
3650  {
3651  rli->mts_recovery_group_seen_begin= true;
3652  }
3653  else if ((ev->ends_group() || !rli->mts_recovery_group_seen_begin) &&
3654  !is_gtid_event(ev))
3655  {
3656  rli->mts_recovery_index++;
3657  if (--rli->mts_recovery_group_cnt == 0)
3658  {
3659  rli->mts_recovery_index= 0;
3660  sql_print_information("Slave: MTS Recovery has completed at "
3661  "relay log %s, position %llu "
3662  "master log %s, position %llu.",
3663  rli->get_group_relay_log_name(),
3664  rli->get_group_relay_log_pos(),
3665  rli->get_group_master_log_name(),
3666  rli->get_group_master_log_pos());
3667 #ifndef DBUG_OFF
3668  /*
3669  Few tests wait for UNTIL_SQL_AFTER_MTS_GAPS completion.
3670  Due to exisiting convention the status won't change
3671  prior to slave restarts.
3672  So making of UNTIL_SQL_AFTER_MTS_GAPS completion isdone here,
3673  and only in the debug build to make the test to catch the change
3674  despite a faulty design of UNTIL checking before execution.
3675  */
3676  if (rli->until_condition == Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS)
3677  {
3678  rli->until_condition= Relay_log_info::UNTIL_DONE;
3679  }
3680 #endif
3681  // reset the Worker tables to remove last slave session time info
3682  if ((error= rli->mts_finalize_recovery()))
3683  {
3684  (void) Rpl_info_factory::reset_workers(rli);
3685  }
3686  }
3687  rli->mts_recovery_group_seen_begin= false;
3688  if (!error)
3689  error= rli->flush_info(true);
3690  }
3691  }
3692 
3693  if (error)
3694  {
3695  /*
3696  The update should not fail, so print an error message and
3697  return an error code.
3698 
3699  TODO: Replace this with a decent error message when merged
3700  with BUG#24954 (which adds several new error message).
3701  */
3702  char buf[22];
3703  rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
3704  "It was not possible to update the positions"
3705  " of the relay log information: the slave may"
3706  " be in an inconsistent state."
3707  " Stopped in %s position %s",
3708  rli->get_group_relay_log_name(),
3709  llstr(rli->get_group_relay_log_pos(), buf));
3710  DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR);
3711  }
3712  }
3713 
3714  DBUG_RETURN(exec_res ? SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR
3715  : SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK);
3716 }
3717 
3718 
3748 static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
3749 {
3750  DBUG_ENTER("exec_relay_log_event");
3751 
3752  /*
3753  We acquire this mutex since we need it for all operations except
3754  event execution. But we will release it in places where we will
3755  wait for something for example inside of next_event().
3756  */
3757  mysql_mutex_lock(&rli->data_lock);
3758 
3759  /*
3760  UNTIL_SQL_AFTER_GTIDS requires special handling since we have to check
3761  whether the until_condition is satisfied *before* the SQL threads goes on
3762  a wait inside next_event() for the relay log to grow. This is reuired since
3763  if we have already applied the last event in the waiting set but since he
3764  check happens only at the start of the next event we may end up waiting
3765  forever the next event is not available or is delayed.
3766  */
3767  if (rli->until_condition == Relay_log_info::UNTIL_SQL_AFTER_GTIDS &&
3768  rli->is_until_satisfied(thd, NULL))
3769  {
3770  rli->abort_slave= 1;
3771  mysql_mutex_unlock(&rli->data_lock);
3772  DBUG_RETURN(1);
3773  }
3774 
3775  Log_event *ev = next_event(rli), **ptr_ev;
3776 
3777  DBUG_ASSERT(rli->info_thd==thd);
3778 
3779  if (sql_slave_killed(thd,rli))
3780  {
3781  mysql_mutex_unlock(&rli->data_lock);
3782  delete ev;
3783  DBUG_RETURN(1);
3784  }
3785  if (ev)
3786  {
3787  enum enum_slave_apply_event_and_update_pos_retval exec_res;
3788 
3789  ptr_ev= &ev;
3790  /*
3791  Even if we don't execute this event, we keep the master timestamp,
3792  so that seconds behind master shows correct delta (there are events
3793  that are not replayed, so we keep falling behind).
3794 
3795  If it is an artificial event, or a relay log event (IO thread generated
3796  event) or ev->when is set to 0, or a FD from master, we don't update the
3797  last_master_timestamp.
3798  */
3799  if (!(rli->is_parallel_exec() ||
3800  ev->is_artificial_event() || ev->is_relay_log_event() ||
3801  (ev->when.tv_sec == 0) || ev->get_type_code() == FORMAT_DESCRIPTION_EVENT))
3802  {
3803  rli->last_master_timestamp= ev->when.tv_sec + (time_t) ev->exec_time;
3804  DBUG_ASSERT(rli->last_master_timestamp >= 0);
3805  }
3806 
3807  /*
3808  This tests if the position of the beginning of the current event
3809  hits the UNTIL barrier.
3810  MTS: since the master and the relay-group coordinates change
3811  asynchronously logics of rli->is_until_satisfied() can't apply.
3812  A special UNTIL_SQL_AFTER_MTS_GAPS is still deployed here
3813  temporarily (see is_until_satisfied todo).
3814  */
3815  if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
3816  rli->until_condition != Relay_log_info::UNTIL_SQL_AFTER_GTIDS &&
3817  rli->is_until_satisfied(thd, ev))
3818  {
3819  /*
3820  Setting abort_slave flag because we do not want additional message about
3821  error in query execution to be printed.
3822  */
3823  rli->abort_slave= 1;
3824  mysql_mutex_unlock(&rli->data_lock);
3825  delete ev;
3826  DBUG_RETURN(1);
3827  }
3828 
3829  {
3835  DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
3836  if ((ev->get_type_code() == XID_EVENT) ||
3837  ((ev->get_type_code() == QUERY_EVENT) &&
3838  strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
3839  {
3840  DBUG_ASSERT(thd->transaction.all.cannot_safely_rollback());
3841  rli->abort_slave= 1;
3842  mysql_mutex_unlock(&rli->data_lock);
3843  delete ev;
3844  rli->inc_event_relay_log_pos();
3845  DBUG_RETURN(0);
3846  };);
3847  }
3848 
3849  /* ptr_ev can change to NULL indicating MTS coorinator passed to a Worker */
3850  exec_res= apply_event_and_update_pos(ptr_ev, thd, rli);
3851  /*
3852  Note: the above call to apply_event_and_update_pos executes
3853  mysql_mutex_unlock(&rli->data_lock);
3854  */
3855 
3856  /* For deferred events, the ptr_ev is set to NULL
3857  in Deferred_log_events::add() function.
3858  Hence deferred events wont be deleted here.
3859  They will be deleted in Deferred_log_events::rewind() funciton.
3860  */
3861  if (*ptr_ev)
3862  {
3863  DBUG_ASSERT(*ptr_ev == ev); // event remains to belong to Coordinator
3864 
3865  /*
3866  Format_description_log_event should not be deleted because it will be
3867  used to read info about the relay log's format; it will be deleted when
3868  the SQL thread does not need it, i.e. when this thread terminates.
3869  ROWS_QUERY_LOG_EVENT is destroyed at the end of the current statement
3870  clean-up routine.
3871  */
3872  if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
3873  ev->get_type_code() != ROWS_QUERY_LOG_EVENT)
3874  {
3875  DBUG_PRINT("info", ("Deleting the event after it has been executed"));
3876  delete ev;
3877  ev= NULL;
3878  }
3879  }
3880 
3881  /*
3882  exec_res == SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR
3883  update_log_pos failed: this should not happen, so we
3884  don't retry.
3885  exec_res == SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR
3886  append_item_to_jobs() failed, this happened because
3887  thread was killed while waiting for enqueue on worker.
3888  */
3889  if (exec_res >= SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR)
3890  {
3891  delete ev;
3892  DBUG_RETURN(1);
3893  }
3894 
3895  if (slave_trans_retries)
3896  {
3897  int UNINIT_VAR(temp_err);
3898  bool silent= false;
3899  if (exec_res && !is_mts_worker(thd) /* no reexecution in MTS mode */ &&
3900  (temp_err= rli->has_temporary_error(thd, 0, &silent)) &&
3901  !thd->transaction.all.cannot_safely_rollback())
3902  {
3903  const char *errmsg;
3904  /*
3905  We were in a transaction which has been rolled back because of a
3906  temporary error;
3907  let's seek back to BEGIN log event and retry it all again.
3908  Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
3909  there is no rollback since 5.0.13 (ref: manual).
3910  We have to not only seek but also
3911  a) init_info(), to seek back to hot relay log's start for later
3912  (for when we will come back to this hot log after re-processing the
3913  possibly existing old logs where BEGIN is: check_binlog_magic() will
3914  then need the cache to be at position 0 (see comments at beginning of
3915  init_info()).
3916  b) init_relay_log_pos(), because the BEGIN may be an older relay log.
3917  */
3918  if (rli->trans_retries < slave_trans_retries)
3919  {
3920  /*
3921  We need to figure out if there is a test case that covers
3922  this part. \Alfranio.
3923  */
3924  if (global_init_info(rli->mi, false, SLAVE_SQL))
3925  sql_print_error("Failed to initialize the master info structure");
3926  else if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
3927  rli->get_group_relay_log_pos(),
3928  true/*need_data_lock=true*/,
3929  &errmsg, 1))
3930  sql_print_error("Error initializing relay log position: %s",
3931  errmsg);
3932  else
3933  {
3934  exec_res= SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK;
3935  rli->cleanup_context(thd, 1);
3936  /* chance for concurrent connection to get more locks */
3937  slave_sleep(thd, min<ulong>(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
3938  sql_slave_killed, rli);
3939  mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
3940  if (!silent)
3941  rli->trans_retries++;
3942 
3943  rli->retried_trans++;
3944  mysql_mutex_unlock(&rli->data_lock);
3945  DBUG_PRINT("info", ("Slave retries transaction "
3946  "rli->trans_retries: %lu", rli->trans_retries));
3947  }
3948  }
3949  else
3950  {
3951  thd->is_fatal_error= 1;
3952  rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
3953  "Slave SQL thread retried transaction %lu time(s) "
3954  "in vain, giving up. Consider raising the value of "
3955  "the slave_transaction_retries variable.", rli->trans_retries);
3956  }
3957  }
3958  else if ((exec_res && !temp_err) ||
3959  (opt_using_transactions &&
3960  rli->get_group_relay_log_pos() == rli->get_event_relay_log_pos()))
3961  {
3962  /*
3963  Only reset the retry counter if the entire group succeeded
3964  or failed with a non-transient error. On a successful
3965  event, the execution will proceed as usual; in the case of a
3966  non-transient error, the slave will stop with an error.
3967  */
3968  rli->trans_retries= 0; // restart from fresh
3969  DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %lu",
3970  rli->trans_retries));
3971  }
3972  }
3973  if (exec_res)
3974  delete ev;
3975  DBUG_RETURN(exec_res);
3976  }
3977  mysql_mutex_unlock(&rli->data_lock);
3978  rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE,
3979  ER(ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
3980 Could not parse relay log event entry. The possible reasons are: the master's \
3981 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
3982 binary log), the slave's relay log is corrupted (you can check this by running \
3983 'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
3984 or slave's MySQL code. If you want to check the master's binary log or slave's \
3985 relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
3986 on this slave.\
3987 ");
3988  DBUG_RETURN(1);
3989 }
3990 
3991 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info)
3992 {
3993  if (io_slave_killed(thd, mi))
3994  {
3995  if (info && log_warnings)
3996  sql_print_information("%s", info);
3997  return TRUE;
3998  }
3999  return FALSE;
4000 }
4001 
4028 static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
4029  uint *retry_count, bool suppress_warnings,
4030  const char *messages[SLAVE_RECON_MSG_MAX])
4031 {
4032  mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
4033  thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
4034 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
4035  thd->clear_active_vio();
4036 #endif
4037  end_server(mysql);
4038  if ((*retry_count)++)
4039  {
4040  if (*retry_count > mi->retry_count)
4041  return 1; // Don't retry forever
4042  slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
4043  }
4044  if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
4045  return 1;
4046  thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
4047  if (!suppress_warnings)
4048  {
4049  char buf[256], llbuff[22];
4050  my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED],
4051  mi->get_io_rpl_log_name(), llstr(mi->get_master_log_pos(),
4052  llbuff));
4053  /*
4054  Raise a warining during registering on master/requesting dump.
4055  Log a message reading event.
4056  */
4057  if (messages[SLAVE_RECON_MSG_COMMAND][0])
4058  {
4059  mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
4060  ER(ER_SLAVE_MASTER_COM_FAILURE),
4061  messages[SLAVE_RECON_MSG_COMMAND], buf);
4062  }
4063  else
4064  {
4065  sql_print_information("%s", buf);
4066  }
4067  }
4068  if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi))
4069  {
4070  if (log_warnings)
4071  sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]);
4072  return 1;
4073  }
4074  return 0;
4075 }
4076 
4077 
4086 pthread_handler_t handle_slave_io(void *arg)
4087 {
4088  THD *thd= NULL; // needs to be first for thread_stack
4089  bool thd_added= false;
4090  MYSQL *mysql;
4091  Master_info *mi = (Master_info*)arg;
4092  Relay_log_info *rli= mi->rli;
4093  char llbuff[22];
4094  uint retry_count;
4095  bool suppress_warnings;
4096  int ret;
4097  int binlog_version;
4098 #ifndef DBUG_OFF
4099  uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0;
4100 #endif
4101  // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
4102  my_thread_init();
4103  DBUG_ENTER("handle_slave_io");
4104 
4105  DBUG_ASSERT(mi->inited);
4106  mysql= NULL ;
4107  retry_count= 0;
4108 
4109  mysql_mutex_lock(&mi->run_lock);
4110  /* Inform waiting threads that slave has started */
4111  mi->slave_run_id++;
4112 
4113 #ifndef DBUG_OFF
4114  mi->events_until_exit = disconnect_slave_event_count;
4115 #endif
4116 
4117  thd= new THD; // note that contructor of THD uses DBUG_ !
4118  THD_CHECK_SENTRY(thd);
4119  mi->info_thd = thd;
4120 
4121  pthread_detach_this_thread();
4122  thd->thread_stack= (char*) &thd; // remember where our stack is
4123  mi->clear_error();
4124  if (init_slave_thread(thd, SLAVE_THD_IO))
4125  {
4126  mysql_cond_broadcast(&mi->start_cond);
4127  mysql_mutex_unlock(&mi->run_lock);
4128  sql_print_error("Failed during slave I/O thread initialization");
4129  goto err;
4130  }
4131 
4132  mysql_mutex_lock(&LOCK_thread_count);
4133  add_global_thread(thd);
4134  thd_added= true;
4135  mysql_mutex_unlock(&LOCK_thread_count);
4136 
4137  mi->slave_running = 1;
4138  mi->abort_slave = 0;
4139  mysql_mutex_unlock(&mi->run_lock);
4140  mysql_cond_broadcast(&mi->start_cond);
4141 
4142  DBUG_PRINT("master_info",("log_file_name: '%s' position: %s",
4143  mi->get_master_log_name(),
4144  llstr(mi->get_master_log_pos(), llbuff)));
4145 
4146  /* This must be called before run any binlog_relay_io hooks */
4147  my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi);
4148 
4149  if (RUN_HOOK(binlog_relay_io, thread_start, (thd, mi)))
4150  {
4151  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4152  ER(ER_SLAVE_FATAL_ERROR), "Failed to run 'thread_start' hook");
4153  goto err;
4154  }
4155 
4156  if (!(mi->mysql = mysql = mysql_init(NULL)))
4157  {
4158  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4159  ER(ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
4160  goto err;
4161  }
4162 
4163  THD_STAGE_INFO(thd, stage_connecting_to_master);
4164  // we can get killed during safe_connect
4165  if (!safe_connect(thd, mysql, mi))
4166  {
4167  sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4168  "replication started in log '%s' at position %s",
4169  mi->get_user(), mi->host, mi->port,
4170  mi->get_io_rpl_log_name(),
4171  llstr(mi->get_master_log_pos(), llbuff));
4172  }
4173  else
4174  {
4175  sql_print_information("Slave I/O thread killed while connecting to master");
4176  goto err;
4177  }
4178 
4179 connected:
4180 
4181  DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
4182  {
4183  const char act[]=
4184  "now "
4185  "wait_for signal.io_thread_let_running";
4186  DBUG_ASSERT(opt_debug_sync_timeout > 0);
4187  DBUG_ASSERT(!debug_sync_set_action(thd,
4188  STRING_WITH_LEN(act)));
4189  };);
4190  mysql_mutex_lock(&mi->run_lock);
4191  mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
4192  mysql_mutex_unlock(&mi->run_lock);
4193 
4194  thd->slave_net = &mysql->net;
4195  THD_STAGE_INFO(thd, stage_checking_master_version);
4196  ret= get_master_version_and_clock(mysql, mi);
4197  if (!ret)
4198  ret= get_master_uuid(mysql, mi);
4199  if (!ret)
4200  ret= io_thread_init_commands(mysql, mi);
4201 
4202  if (ret == 1)
4203  /* Fatal error */
4204  goto err;
4205 
4206  if (ret == 2)
4207  {
4208  if (check_io_slave_killed(mi->info_thd, mi, "Slave I/O thread killed"
4209  "while calling get_master_version_and_clock(...)"))
4210  goto err;
4211  suppress_warnings= FALSE;
4212  /* Try to reconnect because the error was caused by a transient network problem */
4213  if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4214  reconnect_messages[SLAVE_RECON_ACT_REG]))
4215  goto err;
4216  goto connected;
4217  }
4218 
4219  mysql_mutex_lock(&mi->data_lock);
4220  binlog_version= mi->get_mi_description_event()->binlog_version;
4221  mysql_mutex_unlock(&mi->data_lock);
4222 
4223  if (binlog_version > 1)
4224  {
4225  /*
4226  Register ourselves with the master.
4227  */
4228  THD_STAGE_INFO(thd, stage_registering_slave_on_master);
4229  if (register_slave_on_master(mysql, mi, &suppress_warnings))
4230  {
4231  if (!check_io_slave_killed(thd, mi, "Slave I/O thread killed "
4232  "while registering slave on master"))
4233  {
4234  sql_print_error("Slave I/O thread couldn't register on master");
4235  if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4236  reconnect_messages[SLAVE_RECON_ACT_REG]))
4237  goto err;
4238  }
4239  else
4240  goto err;
4241  goto connected;
4242  }
4243  DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_REG",
4244  if (!retry_count_reg)
4245  {
4246  retry_count_reg++;
4247  sql_print_information("Forcing to reconnect slave I/O thread");
4248  if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4249  reconnect_messages[SLAVE_RECON_ACT_REG]))
4250  goto err;
4251  goto connected;
4252  });
4253  }
4254 
4255  DBUG_PRINT("info",("Starting reading binary log from master"));
4256  while (!io_slave_killed(thd,mi))
4257  {
4258  THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
4259  if (request_dump(thd, mysql, mi, &suppress_warnings))
4260  {
4261  sql_print_error("Failed on request_dump()");
4262  if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
4263 requesting master dump") ||
4264  try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4265  reconnect_messages[SLAVE_RECON_ACT_DUMP]))
4266  goto err;
4267  goto connected;
4268  }
4269  DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_DUMP",
4270  if (!retry_count_dump)
4271  {
4272  retry_count_dump++;
4273  sql_print_information("Forcing to reconnect slave I/O thread");
4274  if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4275  reconnect_messages[SLAVE_RECON_ACT_DUMP]))
4276  goto err;
4277  goto connected;
4278  });
4279  const char *event_buf;
4280 
4281  DBUG_ASSERT(mi->last_error().number == 0);
4282  while (!io_slave_killed(thd,mi))
4283  {
4284  ulong event_len;
4285  /*
4286  We say "waiting" because read_event() will wait if there's nothing to
4287  read. But if there's something to read, it will not wait. The
4288  important thing is to not confuse users by saying "reading" whereas
4289  we're in fact receiving nothing.
4290  */
4291  THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event);
4292  event_len= read_event(mysql, mi, &suppress_warnings);
4293  if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
4294 reading event"))
4295  goto err;
4296  DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_EVENT",
4297  if (!retry_count_event)
4298  {
4299  retry_count_event++;
4300  sql_print_information("Forcing to reconnect slave I/O thread");
4301  if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4302  reconnect_messages[SLAVE_RECON_ACT_EVENT]))
4303  goto err;
4304  goto connected;
4305  });
4306 
4307  if (event_len == packet_error)
4308  {
4309  uint mysql_error_number= mysql_errno(mysql);
4310  switch (mysql_error_number) {
4311  case CR_NET_PACKET_TOO_LARGE:
4312  sql_print_error("\
4313 Log entry on master is longer than slave_max_allowed_packet (%lu) on \
4314 slave. If the entry is correct, restart the server with a higher value of \
4315 slave_max_allowed_packet",
4316  slave_max_allowed_packet);
4317  mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE,
4318  "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes");
4319  goto err;
4320  case ER_MASTER_FATAL_ERROR_READING_BINLOG:
4321  mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG,
4322  ER(ER_MASTER_FATAL_ERROR_READING_BINLOG),
4323  mysql_error_number, mysql_error(mysql));
4324  goto err;
4325  case ER_OUT_OF_RESOURCES:
4326  sql_print_error("\
4327 Stopping slave I/O thread due to out-of-memory error from master");
4328  mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES,
4329  "%s", ER(ER_OUT_OF_RESOURCES));
4330  goto err;
4331  }
4332  if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4333  reconnect_messages[SLAVE_RECON_ACT_EVENT]))
4334  goto err;
4335  goto connected;
4336  } // if (event_len == packet_error)
4337 
4338  retry_count=0; // ok event, reset retry counter
4339  THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
4340  event_buf= (const char*)mysql->net.read_pos + 1;
4341  DBUG_PRINT("info", ("IO thread received event of type %s", Log_event::get_type_str((Log_event_type)event_buf[EVENT_TYPE_OFFSET])));
4342  if (RUN_HOOK(binlog_relay_io, after_read_event,
4343  (thd, mi,(const char*)mysql->net.read_pos + 1,
4344  event_len, &event_buf, &event_len)))
4345  {
4346  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4347  ER(ER_SLAVE_FATAL_ERROR),
4348  "Failed to run 'after_read_event' hook");
4349  goto err;
4350  }
4351 
4352  /* XXX: 'synced' should be updated by queue_event to indicate
4353  whether event has been synced to disk */
4354  bool synced= 0;
4355  if (queue_event(mi, event_buf, event_len))
4356  {
4357  mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
4358  ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
4359  "could not queue event from master");
4360  goto err;
4361  }
4362 
4363  if (RUN_HOOK(binlog_relay_io, after_queue_event,
4364  (thd, mi, event_buf, event_len, synced)))
4365  {
4366  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4367  ER(ER_SLAVE_FATAL_ERROR),
4368  "Failed to run 'after_queue_event' hook");
4369  goto err;
4370  }
4371 
4372  mysql_mutex_lock(&mi->data_lock);
4373  if (flush_master_info(mi, FALSE))
4374  {
4375  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4376  ER(ER_SLAVE_FATAL_ERROR),
4377  "Failed to flush master info.");
4378  mysql_mutex_unlock(&mi->data_lock);
4379  goto err;
4380  }
4381  mysql_mutex_unlock(&mi->data_lock);
4382 
4383  /*
4384  See if the relay logs take too much space.
4385  We don't lock mi->rli->log_space_lock here; this dirty read saves time
4386  and does not introduce any problem:
4387  - if mi->rli->ignore_log_space_limit is 1 but becomes 0 just after (so
4388  the clean value is 0), then we are reading only one more event as we
4389  should, and we'll block only at the next event. No big deal.
4390  - if mi->rli->ignore_log_space_limit is 0 but becomes 1 just after (so
4391  the clean value is 1), then we are going into wait_for_relay_log_space()
4392  for no reason, but this function will do a clean read, notice the clean
4393  value and exit immediately.
4394  */
4395 #ifndef DBUG_OFF
4396  {
4397  char llbuf1[22], llbuf2[22];
4398  DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \
4399 ignore_log_space_limit=%d",
4400  llstr(rli->log_space_limit,llbuf1),
4401  llstr(rli->log_space_total,llbuf2),
4402  (int) rli->ignore_log_space_limit));
4403  }
4404 #endif
4405 
4406  if (rli->log_space_limit && rli->log_space_limit <
4407  rli->log_space_total &&
4408  !rli->ignore_log_space_limit)
4409  if (wait_for_relay_log_space(rli))
4410  {
4411  sql_print_error("Slave I/O thread aborted while waiting for relay \
4412 log space");
4413  goto err;
4414  }
4415  }
4416  }
4417 
4418  // error = 0;
4419 err:
4420  // print the current replication position
4421  sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s",
4422  mi->get_io_rpl_log_name(), llstr(mi->get_master_log_pos(), llbuff));
4423  (void) RUN_HOOK(binlog_relay_io, thread_stop, (thd, mi));
4424  thd->reset_query();
4425  thd->reset_db(NULL, 0);
4426  if (mysql)
4427  {
4428  /*
4429  Here we need to clear the active VIO before closing the
4430  connection with the master. The reason is that THD::awake()
4431  might be called from terminate_slave_thread() because somebody
4432  issued a STOP SLAVE. If that happends, the shutdown_active_vio()
4433  can be called in the middle of closing the VIO associated with
4434  the 'mysql' object, causing a crash.
4435  */
4436 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
4437  thd->clear_active_vio();
4438 #endif
4439  mysql_close(mysql);
4440  mi->mysql=0;
4441  }
4442  mysql_mutex_lock(&mi->data_lock);
4443  write_ignored_events_info_to_relay_log(thd, mi);
4444  mysql_mutex_unlock(&mi->data_lock);
4445  THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
4446  mysql_mutex_lock(&mi->run_lock);
4447  /*
4448  Clean information used to start slave in order to avoid
4449  security issues.
4450  */
4451  mi->reset_start_info();
4452  /* Forget the relay log's format */
4453  mysql_mutex_lock(&mi->data_lock);
4454  mi->set_mi_description_event(NULL);
4455  mysql_mutex_unlock(&mi->data_lock);
4456 
4457  DBUG_ASSERT(thd->net.buff != 0);
4458  net_end(&thd->net); // destructor will not free it, because net.vio is 0
4459 
4460  thd->release_resources();
4461  mysql_mutex_lock(&LOCK_thread_count);
4462  THD_CHECK_SENTRY(thd);
4463  if (thd_added)
4464  remove_global_thread(thd);
4465  mysql_mutex_unlock(&LOCK_thread_count);
4466  delete thd;
4467 
4468  mi->abort_slave= 0;
4469  mi->slave_running= 0;
4470  mi->info_thd= 0;
4471  /*
4472  Note: the order of the two following calls (first broadcast, then unlock)
4473  is important. Otherwise a killer_thread can execute between the calls and
4474  delete the mi structure leading to a crash! (see BUG#25306 for details)
4475  */
4476  mysql_cond_broadcast(&mi->stop_cond); // tell the world we are done
4477  DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
4478  mysql_mutex_unlock(&mi->run_lock);
4479  DBUG_LEAVE; // Must match DBUG_ENTER()
4480  my_thread_end();
4481  ERR_remove_state(0);
4482  pthread_exit(0);
4483  return(0); // Avoid compiler warnings
4484 }
4485 
4486 /*
4487  Check the temporary directory used by commands like
4488  LOAD DATA INFILE.
4489  */
4490 static
4491 int check_temp_dir(char* tmp_file)
4492 {
4493  int fd;
4494  MY_DIR *dirp;
4495  char tmp_dir[FN_REFLEN];
4496  size_t tmp_dir_size;
4497 
4498  DBUG_ENTER("check_temp_dir");
4499 
4500  /*
4501  Get the directory from the temporary file.
4502  */
4503  dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
4504 
4505  /*
4506  Check if the directory exists.
4507  */
4508  if (!(dirp=my_dir(tmp_dir,MYF(MY_WME))))
4509  DBUG_RETURN(1);
4510  my_dirend(dirp);
4511 
4512  /*
4513  Check permissions to create a file.
4514  */
4515  //append the server UUID to the temp file name.
4516  char *unique_tmp_file_name= (char*)my_malloc((FN_REFLEN+TEMP_FILE_MAX_LEN)*sizeof(char), MYF(0));
4517  sprintf(unique_tmp_file_name, "%s%s", tmp_file, server_uuid);
4518  if ((fd= mysql_file_create(key_file_misc,
4519  unique_tmp_file_name, CREATE_MODE,
4520  O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW,
4521  MYF(MY_WME))) < 0)
4522  DBUG_RETURN(1);
4523 
4524  /*
4525  Clean up.
4526  */
4527  mysql_file_close(fd, MYF(0));
4528 
4529  mysql_file_delete(key_file_misc, unique_tmp_file_name, MYF(0));
4530  my_free(unique_tmp_file_name);
4531  DBUG_RETURN(0);
4532 }
4533 
4534 /*
4535  Worker thread for the parallel execution of the replication events.
4536 */
4537 pthread_handler_t handle_slave_worker(void *arg)
4538 {
4539  THD *thd; /* needs to be first for thread_stack */
4540  bool thd_added= false;
4541  int error= 0;
4542  Slave_worker *w= (Slave_worker *) arg;
4543  Relay_log_info* rli= w->c_rli;
4544  ulong purge_cnt= 0;
4545  ulonglong purge_size= 0;
4546  struct slave_job_item _item, *job_item= &_item;
4547 
4548  my_thread_init();
4549  DBUG_ENTER("handle_slave_worker");
4550 
4551  thd= new THD;
4552  if (!thd)
4553  {
4554  sql_print_error("Failed during slave worker initialization");
4555  goto err;
4556  }
4557  w->info_thd= thd;
4558  thd->thread_stack = (char*)&thd;
4559 
4560  pthread_detach_this_thread();
4561  if (init_slave_thread(thd, SLAVE_THD_WORKER))
4562  {
4563  // todo make SQL thread killed
4564  sql_print_error("Failed during slave worker initialization");
4565  goto err;
4566  }
4567  thd->init_for_queries(w);
4568 
4569  mysql_mutex_lock(&LOCK_thread_count);
4570  add_global_thread(thd);
4571  thd_added= true;
4572  mysql_mutex_unlock(&LOCK_thread_count);
4573 
4574  if (w->update_is_transactional())
4575  {
4576  rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4577  "Error checking if the worker repository is transactional.");
4578  goto err;
4579  }
4580 
4581  mysql_mutex_lock(&w->jobs_lock);
4582  w->running_status= Slave_worker::RUNNING;
4583  mysql_cond_signal(&w->jobs_cond);
4584 
4585  mysql_mutex_unlock(&w->jobs_lock);
4586 
4587  DBUG_ASSERT(thd->is_slave_error == 0);
4588 
4589  while (!error)
4590  {
4591  error= slave_worker_exec_job(w, rli);
4592  }
4593 
4594  /*
4595  Cleanup after an error requires clear_error() go first.
4596  Otherwise assert(!all) in binlog_rollback()
4597  */
4598  thd->clear_error();
4599  w->cleanup_context(thd, error);
4600 
4601  mysql_mutex_lock(&w->jobs_lock);
4602 
4603  while(de_queue(&w->jobs, job_item))
4604  {
4605  purge_cnt++;
4606  purge_size += ((Log_event*) (job_item->data))->data_written;
4607  DBUG_ASSERT(job_item->data);
4608  delete static_cast<Log_event*>(job_item->data);
4609  }
4610 
4611  DBUG_ASSERT(w->jobs.len == 0);
4612 
4613  mysql_mutex_unlock(&w->jobs_lock);
4614 
4615  mysql_mutex_lock(&rli->pending_jobs_lock);
4616  rli->pending_jobs -= purge_cnt;
4617  rli->mts_pending_jobs_size -= purge_size;
4618  DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max);
4619 
4620  mysql_mutex_unlock(&rli->pending_jobs_lock);
4621 
4622  /*
4623  In MTS case cleanup_after_session() has be called explicitly.
4624  TODO: to make worker thd be deleted before Slave_worker instance.
4625  */
4626  if (thd->rli_slave)
4627  {
4628  w->cleanup_after_session();
4629  thd->rli_slave= NULL;
4630  }
4631  mysql_mutex_lock(&w->jobs_lock);
4632 
4633  w->running_status= Slave_worker::NOT_RUNNING;
4634  if (log_warnings > 1)
4635  sql_print_information("Worker %lu statistics: "
4636  "events processed = %lu "
4637  "hungry waits = %lu "
4638  "priv queue overfills = %llu ",
4639  w->id, w->events_done, w->wq_size_waits_cnt,
4640  w->jobs.waited_overfill);
4641  mysql_cond_signal(&w->jobs_cond); // famous last goodbye
4642 
4643  mysql_mutex_unlock(&w->jobs_lock);
4644 
4645 err:
4646 
4647  if (thd)
4648  {
4649  /*
4650  The slave code is very bad. Notice that it is missing
4651  several clean up calls here. I've just added what was
4652  necessary to avoid valgrind errors.
4653 
4654  /Alfranio
4655  */
4656  DBUG_ASSERT(thd->net.buff != 0);
4657  net_end(&thd->net);
4658 
4659  /*
4660  to avoid close_temporary_tables() closing temp tables as those
4661  are Coordinator's burden.
4662  */
4663  thd->system_thread= NON_SYSTEM_THREAD;
4664  thd->release_resources();
4665 
4666  mysql_mutex_lock(&LOCK_thread_count);
4667  THD_CHECK_SENTRY(thd);
4668  if (thd_added)
4669  remove_global_thread(thd);
4670  mysql_mutex_unlock(&LOCK_thread_count);
4671  delete thd;
4672  }
4673 
4674  my_thread_end();
4675  ERR_remove_state(0);
4676  pthread_exit(0);
4677  DBUG_RETURN(0);
4678 }
4679 
4684 int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2)
4685 {
4686  longlong filecmp= strcmp(id1->file_name, id2->file_name);
4687  longlong poscmp= id1->pos - id2->pos;
4688  return (filecmp < 0 ? -1 : (filecmp > 0 ? 1 :
4689  (poscmp < 0 ? -1 : (poscmp > 0 ? 1 : 0))));
4690 }
4691 
4692 int mts_recovery_groups(Relay_log_info *rli)
4693 {
4694  Log_event *ev= NULL;
4695  const char *errmsg= NULL;
4696  bool error= FALSE;
4697  bool flag_group_seen_begin= FALSE;
4698  uint recovery_group_cnt= 0;
4699  bool not_reached_commit= true;
4700  DYNAMIC_ARRAY above_lwm_jobs;
4701  Slave_job_group job_worker;
4702  IO_CACHE log;
4703  File file;
4704  LOG_INFO linfo;
4705  my_off_t offset= 0;
4706  MY_BITMAP *groups= &rli->recovery_groups;
4707 
4708  DBUG_ENTER("mts_recovery_groups");
4709 
4710  DBUG_ASSERT(rli->slave_parallel_workers == 0);
4711 
4712  /*
4713  Although mts_recovery_groups() is reentrant it returns
4714  early if the previous invocation raised any bit in
4715  recovery_groups bitmap.
4716  */
4717  if (rli->is_mts_recovery())
4718  DBUG_RETURN(0);
4719 
4720  /*
4721  Save relay log position to compare with worker's position.
4722  */
4723  LOG_POS_COORD cp=
4724  {
4725  (char *) rli->get_group_master_log_name(),
4726  rli->get_group_master_log_pos()
4727  };
4728 
4729  Format_description_log_event fdle(BINLOG_VERSION), *p_fdle= &fdle;
4730 
4731  if (!p_fdle->is_valid())
4732  DBUG_RETURN(TRUE);
4733 
4734  /*
4735  Gathers information on valuable workers and stores it in
4736  above_lwm_jobs in asc ordered by the master binlog coordinates.
4737  */
4738  my_init_dynamic_array(&above_lwm_jobs, sizeof(Slave_job_group),
4739  rli->recovery_parallel_workers,
4740  rli->recovery_parallel_workers);
4741 
4742  for (uint id= 0; id < rli->recovery_parallel_workers; id++)
4743  {
4744  Slave_worker *worker=
4745  Rpl_info_factory::create_worker(opt_rli_repository_id, id, rli, true);
4746 
4747  if (!worker)
4748  {
4749  error= TRUE;
4750  goto err;
4751  }
4752 
4753  LOG_POS_COORD w_last= { const_cast<char*>(worker->get_group_master_log_name()),
4754  worker->get_group_master_log_pos() };
4755  if (mts_event_coord_cmp(&w_last, &cp) > 0)
4756  {
4757  /*
4758  Inserts information into a dynamic array for further processing.
4759  The jobs/workers are ordered by the last checkpoint positions
4760  workers have seen.
4761  */
4762  job_worker.worker= worker;
4763  job_worker.checkpoint_log_pos= worker->checkpoint_master_log_pos;
4764  job_worker.checkpoint_log_name= worker->checkpoint_master_log_name;
4765 
4766  insert_dynamic(&above_lwm_jobs, (uchar*) &job_worker);
4767  }
4768  else
4769  {
4770  /*
4771  Deletes the worker because its jobs are included in the latest
4772  checkpoint.
4773  */
4774  delete worker;
4775  }
4776  }
4777 
4778  /*
4779  In what follows, the group Recovery Bitmap is constructed.
4780 
4781  seek(lwm);
4782 
4783  while(w= next(above_lwm_w))
4784  do
4785  read G
4786  if G == w->last_comm
4787  w.B << group_cnt++;
4788  RB |= w.B;
4789  break;
4790  else
4791  group_cnt++;
4792  while(!eof);
4793  continue;
4794  */
4795  DBUG_ASSERT(!rli->recovery_groups_inited);
4796 
4797  if (above_lwm_jobs.elements != 0)
4798  {
4799  bitmap_init(groups, NULL, MTS_MAX_BITS_IN_GROUP, FALSE);
4800  rli->recovery_groups_inited= true;
4801  bitmap_clear_all(groups);
4802  }
4803  rli->mts_recovery_group_cnt= 0;
4804  for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
4805  {
4806  Slave_worker *w= ((Slave_job_group *)
4807  dynamic_array_ptr(&above_lwm_jobs, it_job))->worker;
4808  LOG_POS_COORD w_last= { const_cast<char*>(w->get_group_master_log_name()),
4809  w->get_group_master_log_pos() };
4810  bool checksum_detected= FALSE;
4811 
4812  sql_print_information("Slave: MTS group recovery relay log info based on Worker-Id %lu, "
4813  "group_relay_log_name %s, group_relay_log_pos %llu "
4814  "group_master_log_name %s, group_master_log_pos %llu",
4815  w->id,
4816  w->get_group_relay_log_name(),
4817  w->get_group_relay_log_pos(),
4818  w->get_group_master_log_name(),
4819  w->get_group_master_log_pos());
4820 
4821  recovery_group_cnt= 0;
4822  not_reached_commit= true;
4823  if (rli->relay_log.find_log_pos(&linfo, rli->get_group_relay_log_name(), 1))
4824  {
4825  error= TRUE;
4826  sql_print_error("Error looking for %s.", rli->get_group_relay_log_name());
4827  goto err;
4828  }
4829  offset= rli->get_group_relay_log_pos();
4830  for (int checking= 0 ; not_reached_commit; checking++)
4831  {
4832  if ((file= open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
4833  {
4834  error= TRUE;
4835  sql_print_error("%s", errmsg);
4836  goto err;
4837  }
4838  /*
4839  Looking for the actual relay checksum algorithm that is present in
4840  a FD at head events of the relay log.
4841  */
4842  if (!checksum_detected)
4843  {
4844  int i= 0;
4845  while (i < 4 && (ev= Log_event::read_log_event(&log,
4846  (mysql_mutex_t*) 0, p_fdle, 0)))
4847  {
4848  if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
4849  {
4850  p_fdle->checksum_alg= ev->checksum_alg;
4851  checksum_detected= TRUE;
4852  }
4853  delete ev;
4854  i++;
4855  }
4856  if (!checksum_detected)
4857  {
4858  error= TRUE;
4859  sql_print_error("%s", "malformed or very old relay log which "
4860  "does not have FormatDescriptor");
4861  goto err;
4862  }
4863  }
4864 
4865  my_b_seek(&log, offset);
4866 
4867  while (not_reached_commit &&
4868  (ev= Log_event::read_log_event(&log, 0, p_fdle,
4869  opt_slave_sql_verify_checksum)))
4870  {
4871  DBUG_ASSERT(ev->is_valid());
4872 
4873  if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
4874  p_fdle->checksum_alg= ev->checksum_alg;
4875 
4876  if (ev->get_type_code() == ROTATE_EVENT ||
4877  ev->get_type_code() == FORMAT_DESCRIPTION_EVENT ||
4878  ev->get_type_code() == PREVIOUS_GTIDS_LOG_EVENT)
4879  {
4880  delete ev;
4881  ev= NULL;
4882  continue;
4883  }
4884 
4885  DBUG_PRINT("mts", ("Event Recoverying relay log info "
4886  "group_mster_log_name %s, event_master_log_pos %llu type code %u.",
4887  linfo.log_file_name, ev->log_pos, ev->get_type_code()));
4888 
4889  if (ev->starts_group())
4890  {
4891  flag_group_seen_begin= true;
4892  }
4893  else if ((ev->ends_group() || !flag_group_seen_begin) &&
4894  !is_gtid_event(ev))
4895  {
4896  int ret= 0;
4897  LOG_POS_COORD ev_coord= { (char *) rli->get_group_master_log_name(),
4898  ev->log_pos };
4899  flag_group_seen_begin= false;
4900  recovery_group_cnt++;
4901 
4902  sql_print_information("Slave: MTS group recovery relay log info "
4903  "group_master_log_name %s, "
4904  "event_master_log_pos %llu.",
4905  rli->get_group_master_log_name(), ev->log_pos);
4906  if ((ret= mts_event_coord_cmp(&ev_coord, &w_last)) == 0)
4907  {
4908 #ifndef DBUG_OFF
4909  for (uint i= 0; i <= w->checkpoint_seqno; i++)
4910  {
4911  if (bitmap_is_set(&w->group_executed, i))
4912  DBUG_PRINT("mts", ("Bit %u is set.", i));
4913  else
4914  DBUG_PRINT("mts", ("Bit %u is not set.", i));
4915  }
4916 #endif
4917  DBUG_PRINT("mts",
4918  ("Doing a shift ini(%lu) end(%lu).",
4919  (w->checkpoint_seqno + 1) - recovery_group_cnt,
4920  w->checkpoint_seqno));
4921 
4922  for (uint i= (w->checkpoint_seqno + 1) - recovery_group_cnt,
4923  j= 0; i <= w->checkpoint_seqno; i++, j++)
4924  {
4925  if (bitmap_is_set(&w->group_executed, i))
4926  {
4927  DBUG_PRINT("mts", ("Setting bit %u.", j));
4928  bitmap_fast_test_and_set(groups, j);
4929  }
4930  }
4931  not_reached_commit= false;
4932  }
4933  else
4934  DBUG_ASSERT(ret < 0);
4935  }
4936  delete ev;
4937  ev= NULL;
4938  }
4939  end_io_cache(&log);
4940  mysql_file_close(file, MYF(MY_WME));
4941  offset= BIN_LOG_HEADER_SIZE;
4942  if (not_reached_commit && rli->relay_log.find_next_log(&linfo, 1))
4943  {
4944  error= TRUE;
4945  sql_print_error("Error looking for file after %s.", linfo.log_file_name);
4946  goto err;
4947  }
4948  }
4949 
4950  rli->mts_recovery_group_cnt= (rli->mts_recovery_group_cnt < recovery_group_cnt ?
4951  recovery_group_cnt : rli->mts_recovery_group_cnt);
4952  }
4953 
4954  DBUG_ASSERT(!rli->recovery_groups_inited ||
4955  rli->mts_recovery_group_cnt <= groups->n_bits);
4956 
4957 err:
4958 
4959  for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
4960  {
4961  get_dynamic(&above_lwm_jobs, (uchar *) &job_worker, it_job);
4962  delete job_worker.worker;
4963  }
4964 
4965  delete_dynamic(&above_lwm_jobs);
4966  if (rli->recovery_groups_inited && rli->mts_recovery_group_cnt == 0)
4967  {
4968  bitmap_free(groups);
4969  rli->recovery_groups_inited= false;
4970  }
4971 
4972  DBUG_RETURN(error ? ER_MTS_RECOVERY_FAILURE : 0);
4973 }
4974 
4988 bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
4989  bool force, bool need_data_lock)
4990 {
4991  ulong cnt;
4992  bool error= FALSE;
4993  struct timespec curr_clock;
4994 
4995  DBUG_ENTER("checkpoint_routine");
4996 
4997 #ifndef DBUG_OFF
4998  if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
4999  {
5000  if (!rli->gaq->count_done(rli))
5001  DBUG_RETURN(FALSE);
5002  }
5003 #endif
5004 
5005  /*
5006  rli->checkpoint_group can have two possible values due to
5007  two possible status of the last (being scheduled) group.
5008  */
5009  DBUG_ASSERT(!rli->gaq->full() ||
5010  ((rli->checkpoint_seqno == rli->checkpoint_group -1 &&
5011  rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) ||
5012  rli->checkpoint_seqno == rli->checkpoint_group));
5013 
5014  /*
5015  Currently, the checkpoint routine is being called by the SQL Thread.
5016  For that reason, this function is called call from appropriate points
5017  in the SQL Thread's execution path and the elapsed time is calculated
5018  here to check if it is time to execute it.
5019  */
5020  set_timespec_nsec(curr_clock, 0);
5021  ulonglong diff= diff_timespec(curr_clock, rli->last_clock);
5022  if (!force && diff < period)
5023  {
5024  /*
5025  We do not need to execute the checkpoint now because
5026  the time elapsed is not enough.
5027  */
5028  DBUG_RETURN(FALSE);
5029  }
5030 
5031  do
5032  {
5033  cnt= rli->gaq->move_queue_head(&rli->workers);
5034 #ifndef DBUG_OFF
5035  if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
5036  cnt != opt_mts_checkpoint_period)
5037  sql_print_error("This an error cnt != mts_checkpoint_period");
5038 #endif
5039  } while (!sql_slave_killed(rli->info_thd, rli) &&
5040  cnt == 0 && force &&
5041  !DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
5042  (my_sleep(rli->mts_coordinator_basic_nap), 1));
5043  /*
5044  This checks how many consecutive jobs where processed.
5045  If this value is different than zero the checkpoint
5046  routine can proceed. Otherwise, there is nothing to be
5047  done.
5048  */
5049  if (cnt == 0)
5050  goto end;
5051 
5052 
5053  /* TODO:
5054  to turn the least occupied selection in terms of jobs pieces
5055  */
5056  for (uint i= 0; i < rli->workers.elements; i++)
5057  {
5058  Slave_worker *w_i;
5059  get_dynamic(&rli->workers, (uchar *) &w_i, i);
5060  set_dynamic(&rli->least_occupied_workers, (uchar*) &w_i->jobs.len, w_i->id);
5061  };
5062  sort_dynamic(&rli->least_occupied_workers, (qsort_cmp) ulong_cmp);
5063 
5064  if (need_data_lock)
5065  mysql_mutex_lock(&rli->data_lock);
5066  else
5067  mysql_mutex_assert_owner(&rli->data_lock);
5068 
5069  /*
5070  "Coordinator::commit_positions" {
5071 
5072  rli->gaq->lwm has been updated in move_queue_head() and
5073  to contain all but rli->group_master_log_name which
5074  is altered solely by Coordinator at special checkpoints.
5075  */
5076  rli->set_group_master_log_pos(rli->gaq->lwm.group_master_log_pos);
5077  rli->set_group_relay_log_pos(rli->gaq->lwm.group_relay_log_pos);
5078  DBUG_PRINT("mts", ("New checkpoint %llu %llu %s",
5079  rli->gaq->lwm.group_master_log_pos,
5080  rli->gaq->lwm.group_relay_log_pos,
5081  rli->gaq->lwm.group_relay_log_name));
5082 
5083  if (rli->gaq->lwm.group_relay_log_name[0] != 0)
5084  rli->set_group_relay_log_name(rli->gaq->lwm.group_relay_log_name);
5085 
5086  /*
5087  todo: uncomment notifies when UNTIL will be supported
5088 
5089  rli->notify_group_master_log_name_update();
5090  rli->notify_group_relay_log_name_update();
5091 
5092  Todo: optimize with if (wait_flag) broadcast
5093  waiter: set wait_flag; waits....; drops wait_flag;
5094  */
5095 
5096  error= rli->flush_info(TRUE);
5097 
5098  mysql_cond_broadcast(&rli->data_cond);
5099  if (need_data_lock)
5100  mysql_mutex_unlock(&rli->data_lock);
5101 
5102  /*
5103  We need to ensure that this is never called at this point when
5104  cnt is zero. This value means that the checkpoint information
5105  will be completely reset.
5106  */
5107  rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, need_data_lock);
5108 
5109  /* end-of "Coordinator::"commit_positions" */
5110 
5111 end:
5112 #ifndef DBUG_OFF
5113  if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
5114  DBUG_SUICIDE();
5115 #endif
5116  set_timespec_nsec(rli->last_clock, 0);
5117 
5118  DBUG_RETURN(error);
5119 }
5120 
5129 int slave_start_single_worker(Relay_log_info *rli, ulong i)
5130 {
5131  int error= 0;
5132  pthread_t th;
5133  Slave_worker *w= NULL;
5134 
5135  mysql_mutex_assert_owner(&rli->run_lock);
5136 
5137  if (!(w=
5138  Rpl_info_factory::create_worker(opt_rli_repository_id, i, rli, false)))
5139  {
5140  sql_print_error("Failed during slave worker thread create");
5141  error= 1;
5142  goto err;
5143  }
5144 
5145  if (w->init_worker(rli, i))
5146  {
5147  sql_print_error("Failed during slave worker thread create");
5148  error= 1;
5149  goto err;
5150  }
5151  set_dynamic(&rli->workers, (uchar*) &w, i);
5152 
5153  if (DBUG_EVALUATE_IF("mts_worker_thread_fails", i == 1, 0) ||
5154  (error= mysql_thread_create(key_thread_slave_worker, &th,
5155  &connection_attrib,
5156  handle_slave_worker, (void*) w)))
5157  {
5158  sql_print_error("Failed during slave worker thread create (errno= %d)",
5159  error);
5160  error= 1;
5161  goto err;
5162  }
5163 
5164  mysql_mutex_lock(&w->jobs_lock);
5165  if (w->running_status == Slave_worker::NOT_RUNNING)
5166  mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
5167  mysql_mutex_unlock(&w->jobs_lock);
5168  // Least occupied inited with zero
5169  insert_dynamic(&rli->least_occupied_workers, (uchar*) &w->jobs.len);
5170 
5171 err:
5172  if (error && w)
5173  {
5174  delete w;
5175  /*
5176  Any failure after dynarray inserted must follow with deletion
5177  of just created item.
5178  */
5179  if (rli->workers.elements == i + 1)
5180  delete_dynamic_element(&rli->workers, i);
5181  }
5182  return error;
5183 }
5184 
5195 int slave_start_workers(Relay_log_info *rli, ulong n, bool *mts_inited)
5196 {
5197  uint i;
5198  int error= 0;
5199 
5200  mysql_mutex_assert_owner(&rli->run_lock);
5201 
5202  if (n == 0 && rli->mts_recovery_group_cnt == 0)
5203  {
5204  reset_dynamic(&rli->workers);
5205  goto end;
5206  }
5207 
5208  *mts_inited= true;
5209 
5210  /*
5211  The requested through argument number of Workers can be different
5212  from the previous time which ended with an error. Thereby
5213  the effective number of configured Workers is max of the two.
5214  */
5215  rli->init_workers(max(n, rli->recovery_parallel_workers));
5216 
5217  // CGAP dynarray holds id:s of partitions of the Current being executed Group
5218  my_init_dynamic_array(&rli->curr_group_assigned_parts,
5219  sizeof(db_worker_hash_entry*),
5220  SLAVE_INIT_DBS_IN_GROUP, 1);
5221  rli->last_assigned_worker= NULL; // associated with curr_group_assigned
5222  my_init_dynamic_array(&rli->curr_group_da, sizeof(Log_event*), 8, 2);
5223  // Least_occupied_workers array to hold items size of Slave_jobs_queue::len
5224  my_init_dynamic_array(&rli->least_occupied_workers, sizeof(ulong), n, 0);
5225 
5226  /*
5227  GAQ queue holds seqno:s of scheduled groups. C polls workers in
5228  @c opt_mts_checkpoint_period to update GAQ (see @c next_event())
5229  The length of GAQ is set to be equal to checkpoint_group.
5230  Notice, the size matters for mts_checkpoint_routine's progress loop.
5231  */
5232 
5233  rli->gaq= new Slave_committed_queue(rli->get_group_master_log_name(),
5234  sizeof(Slave_job_group),
5235  rli->checkpoint_group, n);
5236  if (!rli->gaq->inited)
5237  return 1;
5238 
5239  // length of WQ is actually constant though can be made configurable
5240  rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max;
5241  rli->mts_pending_jobs_size= 0;
5242  rli->mts_pending_jobs_size_max= ::opt_mts_pending_jobs_size_max;
5243  rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF;
5244  rli->mts_wq_excess_cnt= 0;
5245  rli->mts_wq_overrun_cnt= 0;
5246  rli->mts_wq_oversize= FALSE;
5247  rli->mts_coordinator_basic_nap= mts_coordinator_basic_nap;
5248  rli->mts_worker_underrun_level= mts_worker_underrun_level;
5249  rli->curr_group_seen_begin= rli->curr_group_seen_gtid= false;
5250  rli->curr_group_isolated= FALSE;
5251  rli->checkpoint_seqno= 0;
5252  rli->mts_last_online_stat= my_time(0);
5253  rli->mts_group_status= Relay_log_info::MTS_NOT_IN_GROUP;
5254  /*
5255  dyn memory to consume by Coordinator per event
5256  */
5257  init_alloc_root(&rli->mts_coor_mem_root, NAME_LEN,
5258  (MAX_DBS_IN_EVENT_MTS / 2) * NAME_LEN);
5259 
5260  if (init_hash_workers(n)) // MTS: mapping_db_to_worker
5261  {
5262  sql_print_error("Failed to init partitions hash");
5263  error= 1;
5264  goto err;
5265  }
5266 
5267  for (i= 0; i < n; i++)
5268  {
5269  if ((error= slave_start_single_worker(rli, i)))
5270  goto err;
5271  }
5272 
5273 end:
5274  rli->slave_parallel_workers= n;
5275  // Effective end of the recovery right now when there is no gaps
5276  if (!error && rli->mts_recovery_group_cnt == 0)
5277  {
5278  if ((error= rli->mts_finalize_recovery()))
5279  (void) Rpl_info_factory::reset_workers(rli);
5280  if (!error)
5281  error= rli->flush_info(TRUE);
5282  }
5283 
5284 err:
5285  return error;
5286 }
5287 
5288 /*
5289  Ending Worker threads.
5290 
5291  Not in case Coordinator is killed itself, it first waits for
5292  Workers have finished their assignements, and then updates checkpoint.
5293  Workers are notified with setting KILLED status
5294  and waited for their acknowledgment as specified by
5295  worker's running_status.
5296  Coordinator finalizes with its MTS running status to reset few objects.
5297 */
5298 void slave_stop_workers(Relay_log_info *rli, bool *mts_inited)
5299 {
5300  int i;
5301  THD *thd= rli->info_thd;
5302 
5303  if (!*mts_inited)
5304  return;
5305  else if (rli->slave_parallel_workers == 0)
5306  goto end;
5307 
5308  /*
5309  In case of the "soft" graceful stop Coordinator
5310  guaranteed Workers were assigned with full groups so waiting
5311  will be resultful.
5312  "Hard" stop with KILLing Coordinator or erroring out by a Worker
5313  can't wait for Workers' completion because those may not receive
5314  commit-events of last assigned groups.
5315  */
5316  if (rli->mts_group_status != Relay_log_info::MTS_KILLED_GROUP &&
5317  thd->killed == THD::NOT_KILLED)
5318  {
5319  DBUG_ASSERT(rli->mts_group_status != Relay_log_info::MTS_IN_GROUP ||
5320  thd->is_error());
5321 
5322 #ifndef DBUG_OFF
5323  if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
5324  {
5325  sql_print_error("This is not supposed to happen at this point...");
5326  DBUG_SUICIDE();
5327  }
5328 #endif
5329  // No need to know a possible error out of synchronization call.
5330  (void) wait_for_workers_to_finish(rli);
5331  /*
5332  At this point the coordinator has been stopped and the checkpoint
5333  routine is executed to eliminate possible gaps.
5334  */
5335  (void) mts_checkpoint_routine(rli, 0, false, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
5336  }
5337  for (i= rli->workers.elements - 1; i >= 0; i--)
5338  {
5339  Slave_worker *w;
5340  get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
5341 
5342  mysql_mutex_lock(&w->jobs_lock);
5343 
5344  if (w->running_status != Slave_worker::RUNNING)
5345  {
5346  mysql_mutex_unlock(&w->jobs_lock);
5347  continue;
5348  }
5349 
5350  w->running_status= Slave_worker::KILLED;
5351  mysql_cond_signal(&w->jobs_cond);
5352 
5353  mysql_mutex_unlock(&w->jobs_lock);
5354 
5355  if (log_warnings > 1)
5356  sql_print_information("Notifying Worker %lu to exit, thd %p", w->id,
5357  w->info_thd);
5358  }
5359 
5360  thd_proc_info(thd, "Waiting for workers to exit");
5361 
5362  for (i= rli->workers.elements - 1; i >= 0; i--)
5363  {
5364  Slave_worker *w= NULL;
5365  get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
5366 
5367  mysql_mutex_lock(&w->jobs_lock);
5368  while (w->running_status != Slave_worker::NOT_RUNNING)
5369  {
5370  PSI_stage_info old_stage;
5371  DBUG_ASSERT(w->running_status == Slave_worker::KILLED ||
5372  w->running_status == Slave_worker::ERROR_LEAVING);
5373 
5374  thd->ENTER_COND(&w->jobs_cond, &w->jobs_lock,
5375  &stage_slave_waiting_workers_to_exit, &old_stage);
5376  mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
5377  thd->EXIT_COND(&old_stage);
5378  mysql_mutex_lock(&w->jobs_lock);
5379  }
5380  mysql_mutex_unlock(&w->jobs_lock);
5381 
5382  delete_dynamic_element(&rli->workers, i);
5383  delete w;
5384  }
5385 
5386  if (log_warnings > 1)
5387  sql_print_information("Total MTS session statistics: "
5388  "events processed = %llu; "
5389  "worker queues filled over overrun level = %lu; "
5390  "waited due a Worker queue full = %lu; "
5391  "waited due the total size = %lu; "
5392  "slept when Workers occupied = %lu ",
5393  rli->mts_events_assigned, rli->mts_wq_overrun_cnt,
5394  rli->mts_wq_overfill_cnt, rli->wq_size_waits_cnt,
5395  rli->mts_wq_no_underrun_cnt);
5396 
5397  DBUG_ASSERT(rli->pending_jobs == 0);
5398  DBUG_ASSERT(rli->mts_pending_jobs_size == 0);
5399 
5400 end:
5401  rli->mts_group_status= Relay_log_info::MTS_NOT_IN_GROUP;
5402  destroy_hash_workers(rli);
5403  delete rli->gaq;
5404  delete_dynamic(&rli->least_occupied_workers); // least occupied
5405 
5406  // Destroy buffered events of the current group prior to exit.
5407  for (uint i= 0; i < rli->curr_group_da.elements; i++)
5408  delete *(Log_event**) dynamic_array_ptr(&rli->curr_group_da, i);
5409  delete_dynamic(&rli->curr_group_da); // GCDA
5410 
5411  delete_dynamic(&rli->curr_group_assigned_parts); // GCAP
5412  rli->deinit_workers();
5413  rli->slave_parallel_workers= 0;
5414  free_root(&rli->mts_coor_mem_root, MYF(0));
5415  *mts_inited= false;
5416 }
5417 
5418 
5427 pthread_handler_t handle_slave_sql(void *arg)
5428 {
5429  THD *thd; /* needs to be first for thread_stack */
5430  bool thd_added= false;
5431  char llbuff[22],llbuff1[22];
5432  char saved_log_name[FN_REFLEN];
5433  char saved_master_log_name[FN_REFLEN];
5434  my_off_t saved_log_pos= 0;
5435  my_off_t saved_master_log_pos= 0;
5436  my_off_t saved_skip= 0;
5437 
5438  Relay_log_info* rli = ((Master_info*)arg)->rli;
5439  const char *errmsg;
5440  bool mts_inited= false;
5441 
5442  // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
5443  my_thread_init();
5444  DBUG_ENTER("handle_slave_sql");
5445 
5446  DBUG_ASSERT(rli->inited);
5447  mysql_mutex_lock(&rli->run_lock);
5448  DBUG_ASSERT(!rli->slave_running);
5449  errmsg= 0;
5450 #ifndef DBUG_OFF
5451  rli->events_until_exit = abort_slave_event_count;
5452 #endif
5453 
5454  thd = new THD; // note that contructor of THD uses DBUG_ !
5455  thd->thread_stack = (char*)&thd; // remember where our stack is
5456  rli->info_thd= thd;
5457 
5458  /* Inform waiting threads that slave has started */
5459  rli->slave_run_id++;
5460  rli->slave_running = 1;
5461  rli->reported_unsafe_warning= false;
5462 
5463  pthread_detach_this_thread();
5464  if (init_slave_thread(thd, SLAVE_THD_SQL))
5465  {
5466  /*
5467  TODO: this is currently broken - slave start and change master
5468  will be stuck if we fail here
5469  */
5470  mysql_cond_broadcast(&rli->start_cond);
5471  mysql_mutex_unlock(&rli->run_lock);
5472  rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5473  "Failed during slave thread initialization");
5474  goto err;
5475  }
5476  thd->init_for_queries(rli);
5477  thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
5478  set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables
5479 
5480  mysql_mutex_lock(&LOCK_thread_count);
5481  add_global_thread(thd);
5482  thd_added= true;
5483  mysql_mutex_unlock(&LOCK_thread_count);
5484 
5485  /* MTS: starting the worker pool */
5486  if (slave_start_workers(rli, rli->opt_slave_parallel_workers, &mts_inited) != 0)
5487  {
5488  mysql_cond_broadcast(&rli->start_cond);
5489  mysql_mutex_unlock(&rli->run_lock);
5490  rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5491  "Failed during slave workers initialization");
5492  goto err;
5493  }
5494  /*
5495  We are going to set slave_running to 1. Assuming slave I/O thread is
5496  alive and connected, this is going to make Seconds_Behind_Master be 0
5497  i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
5498  the moment we start we can think we are caught up, and the next second we
5499  start receiving data so we realize we are not caught up and
5500  Seconds_Behind_Master grows. No big deal.
5501  */
5502  rli->abort_slave = 0;
5503 
5504  /*
5505  Reset errors for a clean start (otherwise, if the master is idle, the SQL
5506  thread may execute no Query_log_event, so the error will remain even
5507  though there's no problem anymore). Do not reset the master timestamp
5508  (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
5509  as we are not sure that we are going to receive a query, we want to
5510  remember the last master timestamp (to say how many seconds behind we are
5511  now.
5512  But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
5513  */
5514  rli->clear_error();
5515 
5516  if (rli->update_is_transactional())
5517  {
5518  mysql_cond_broadcast(&rli->start_cond);
5519  mysql_mutex_unlock(&rli->run_lock);
5520  rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5521  "Error checking if the relay log repository is transactional.");
5522  goto err;
5523  }
5524 
5525  if (!rli->is_transactional())
5526  rli->report(WARNING_LEVEL, 0,
5527  "If a crash happens this configuration does not guarantee that the relay "
5528  "log info will be consistent");
5529 
5530  mysql_mutex_unlock(&rli->run_lock);
5531  mysql_cond_broadcast(&rli->start_cond);
5532 
5533  DEBUG_SYNC(thd, "after_start_slave");
5534 
5535  //tell the I/O thread to take relay_log_space_limit into account from now on
5536  mysql_mutex_lock(&rli->log_space_lock);
5537  rli->ignore_log_space_limit= 0;
5538  mysql_mutex_unlock(&rli->log_space_lock);
5539  rli->trans_retries= 0; // start from "no error"
5540  DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries));
5541 
5542  if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
5543  rli->get_group_relay_log_pos(),
5544  true/*need_data_lock=true*/, &errmsg,
5545  1 /*look for a description_event*/))
5546  {
5547  rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5548  "Error initializing relay log position: %s", errmsg);
5549  goto err;
5550  }
5551  THD_CHECK_SENTRY(thd);
5552 #ifndef DBUG_OFF
5553  {
5554  char llbuf1[22], llbuf2[22];
5555  DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
5556  llstr(my_b_tell(rli->cur_log),llbuf1),
5557  llstr(rli->get_event_relay_log_pos(),llbuf2)));
5558  DBUG_ASSERT(rli->get_event_relay_log_pos() >= BIN_LOG_HEADER_SIZE);
5559  /*
5560  Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
5561  correct position when it's called just after my_b_seek() (the questionable
5562  stuff is those "seek is done on next read" comments in the my_b_seek()
5563  source code).
5564  The crude reality is that this assertion randomly fails whereas
5565  replication seems to work fine. And there is no easy explanation why it
5566  fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
5567  init_relay_log_pos() called above). Maybe the assertion would be
5568  meaningful if we held rli->data_lock between the my_b_seek() and the
5569  DBUG_ASSERT().
5570  */
5571 #ifdef SHOULD_BE_CHECKED
5572  DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->get_event_relay_log_pos());
5573 #endif
5574  }
5575 #endif
5576  DBUG_ASSERT(rli->info_thd == thd);
5577 
5578 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
5579  /* engine specific hook, to be made generic */
5580  if (ndb_wait_setup_func && ndb_wait_setup_func(opt_ndb_wait_setup))
5581  {
5582  sql_print_warning("Slave SQL thread : NDB : Tables not available after %lu"
5583  " seconds. Consider increasing --ndb-wait-setup value",
5584  opt_ndb_wait_setup);
5585  }
5586 #endif
5587 
5588  DBUG_PRINT("master_info",("log_file_name: %s position: %s",
5589  rli->get_group_master_log_name(),
5590  llstr(rli->get_group_master_log_pos(),llbuff)));
5591  if (log_warnings)
5592  sql_print_information("Slave SQL thread initialized, starting replication in \
5593 log '%s' at position %s, relay log '%s' position: %s", rli->get_rpl_log_name(),
5594  llstr(rli->get_group_master_log_pos(),llbuff),rli->get_group_relay_log_name(),
5595  llstr(rli->get_group_relay_log_pos(),llbuff1));
5596 
5597  if (check_temp_dir(rli->slave_patternload_file))
5598  {
5599  rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
5600  "Unable to use slave's temporary directory %s - %s",
5601  slave_load_tmpdir, thd->get_stmt_da()->message());
5602  goto err;
5603  }
5604 
5605  /* execute init_slave variable */
5606  if (opt_init_slave.length)
5607  {
5608  execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave);
5609  if (thd->is_slave_error)
5610  {
5611  rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
5612  "Slave SQL thread aborted. Can't execute init_slave query");
5613  goto err;
5614  }
5615  }
5616 
5617  /*
5618  First check until condition - probably there is nothing to execute. We
5619  do not want to wait for next event in this case.
5620  */
5621  mysql_mutex_lock(&rli->data_lock);
5622  if (rli->slave_skip_counter)
5623  {
5624  strmake(saved_log_name, rli->get_group_relay_log_name(), FN_REFLEN - 1);
5625  strmake(saved_master_log_name, rli->get_group_master_log_name(), FN_REFLEN - 1);
5626  saved_log_pos= rli->get_group_relay_log_pos();
5627  saved_master_log_pos= rli->get_group_master_log_pos();
5628  saved_skip= rli->slave_skip_counter;
5629  }
5630  if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
5631  rli->is_until_satisfied(thd, NULL))
5632  {
5633  mysql_mutex_unlock(&rli->data_lock);
5634  goto err;
5635  }
5636  mysql_mutex_unlock(&rli->data_lock);
5637 
5638  /* Read queries from the IO/THREAD until this thread is killed */
5639 
5640  while (!sql_slave_killed(thd,rli))
5641  {
5642  THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
5643  DBUG_ASSERT(rli->info_thd == thd);
5644  THD_CHECK_SENTRY(thd);
5645 
5646  if (saved_skip && rli->slave_skip_counter == 0)
5647  {
5648  sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
5649  "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
5650  "master_log_pos='%ld' and new position at "
5651  "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
5652  "master_log_pos='%ld' ",
5653  (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
5654  saved_master_log_name, (ulong) saved_master_log_pos,
5655  rli->get_group_relay_log_name(), (ulong) rli->get_group_relay_log_pos(),
5656  rli->get_group_master_log_name(), (ulong) rli->get_group_master_log_pos());
5657  saved_skip= 0;
5658  }
5659 
5660  if (exec_relay_log_event(thd,rli))
5661  {
5662  DBUG_PRINT("info", ("exec_relay_log_event() failed"));
5663  // do not scare the user if SQL thread was simply killed or stopped
5664  if (!sql_slave_killed(thd,rli))
5665  {
5666  /*
5667  retrieve as much info as possible from the thd and, error
5668  codes and warnings and print this to the error log as to
5669  allow the user to locate the error
5670  */
5671  uint32 const last_errno= rli->last_error().number;
5672 
5673  if (thd->is_error())
5674  {
5675  char const *const errmsg= thd->get_stmt_da()->message();
5676 
5677  DBUG_PRINT("info",
5678  ("thd->get_stmt_da()->sql_errno()=%d; "
5679  "rli->last_error.number=%d",
5680  thd->get_stmt_da()->sql_errno(), last_errno));
5681  if (last_errno == 0)
5682  {
5683  /*
5684  This function is reporting an error which was not reported
5685  while executing exec_relay_log_event().
5686  */
5687  rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
5688  "%s", errmsg);
5689  }
5690  else if (last_errno != thd->get_stmt_da()->sql_errno())
5691  {
5692  /*
5693  * An error was reported while executing exec_relay_log_event()
5694  * however the error code differs from what is in the thread.
5695  * This function prints out more information to help finding
5696  * what caused the problem.
5697  */
5698  sql_print_error("Slave (additional info): %s Error_code: %d",
5699  errmsg, thd->get_stmt_da()->sql_errno());
5700  }
5701  }
5702 
5703  /* Print any warnings issued */
5705  thd->get_stmt_da()->sql_conditions();
5706  const Sql_condition *err;
5707  /*
5708  Added controlled slave thread cancel for replication
5709  of user-defined variables.
5710  */
5711  bool udf_error = false;
5712  while ((err= it++))
5713  {
5714  if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY)
5715  udf_error = true;
5716  sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
5717  }
5718  if (udf_error)
5719  sql_print_error("Error loading user-defined library, slave SQL "
5720  "thread aborted. Install the missing library, and restart the "
5721  "slave SQL thread with \"SLAVE START\". We stopped at log '%s' "
5722  "position %s", rli->get_rpl_log_name(),
5723  llstr(rli->get_group_master_log_pos(), llbuff));
5724  else
5725  sql_print_error("\
5726 Error running query, slave SQL thread aborted. Fix the problem, and restart \
5727 the slave SQL thread with \"SLAVE START\". We stopped at log \
5728 '%s' position %s", rli->get_rpl_log_name(),
5729 llstr(rli->get_group_master_log_pos(), llbuff));
5730  }
5731  goto err;
5732  }
5733  }
5734 
5735  /* Thread stopped. Print the current replication position to the log */
5736  sql_print_information("Slave SQL thread exiting, replication stopped in log "
5737  "'%s' at position %s",
5738  rli->get_rpl_log_name(),
5739  llstr(rli->get_group_master_log_pos(), llbuff));
5740 
5741  err:
5742 
5743  slave_stop_workers(rli, &mts_inited); // stopping worker pool
5744  if (rli->recovery_groups_inited)
5745  {
5746  bitmap_free(&rli->recovery_groups);
5747  rli->mts_recovery_group_cnt= 0;
5748  rli->recovery_groups_inited= false;
5749  }
5750 
5751  /*
5752  Some events set some playgrounds, which won't be cleared because thread
5753  stops. Stopping of this thread may not be known to these events ("stop"
5754  request is detected only by the present function, not by events), so we
5755  must "proactively" clear playgrounds:
5756  */
5757  thd->clear_error();
5758  rli->cleanup_context(thd, 1);
5759  /*
5760  Some extra safety, which should not been needed (normally, event deletion
5761  should already have done these assignments (each event which sets these
5762  variables is supposed to set them to 0 before terminating)).
5763  */
5764  thd->catalog= 0;
5765  thd->reset_query();
5766  thd->reset_db(NULL, 0);
5767 
5768  THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
5769  mysql_mutex_lock(&rli->run_lock);
5770  /* We need data_lock, at least to wake up any waiting master_pos_wait() */
5771  mysql_mutex_lock(&rli->data_lock);
5772  DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
5773  /* When master_pos_wait() wakes up it will check this and terminate */
5774  rli->slave_running= 0;
5775  /* Forget the relay log's format */
5776  rli->set_rli_description_event(NULL);
5777  /* Wake up master_pos_wait() */
5778  mysql_mutex_unlock(&rli->data_lock);
5779  DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
5780  mysql_cond_broadcast(&rli->data_cond);
5781  rli->ignore_log_space_limit= 0; /* don't need any lock */
5782  /* we die so won't remember charset - re-update them on next thread start */
5784  rli->save_temporary_tables = thd->temporary_tables;
5785 
5786  /*
5787  TODO: see if we can do this conditionally in next_event() instead
5788  to avoid unneeded position re-init
5789  */
5790  thd->temporary_tables = 0; // remove tempation from destructor to close them
5791  DBUG_ASSERT(thd->net.buff != 0);
5792  net_end(&thd->net); // destructor will not free it, because we are weird
5793  DBUG_ASSERT(rli->info_thd == thd);
5794  THD_CHECK_SENTRY(thd);
5795  rli->info_thd= 0;
5796  set_thd_in_use_temporary_tables(rli); // (re)set info_thd in use for saved temp tables
5797 
5798  thd->release_resources();
5799  mysql_mutex_lock(&LOCK_thread_count);
5800  THD_CHECK_SENTRY(thd);
5801  if (thd_added)
5802  remove_global_thread(thd);
5803  mysql_mutex_unlock(&LOCK_thread_count);
5804  delete thd;
5805  /*
5806  Note: the order of the broadcast and unlock calls below (first broadcast, then unlock)
5807  is important. Otherwise a killer_thread can execute between the calls and
5808  delete the mi structure leading to a crash! (see BUG#25306 for details)
5809  */
5810  mysql_cond_broadcast(&rli->stop_cond);
5811  DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
5812  mysql_mutex_unlock(&rli->run_lock); // tell the world we are done
5813 
5814  DBUG_LEAVE; // Must match DBUG_ENTER()
5815  my_thread_end();
5816  ERR_remove_state(0);
5817  pthread_exit(0);
5818  return 0; // Avoid compiler warnings
5819 }
5820 
5821 
5822 /*
5823  process_io_create_file()
5824 */
5825 
5826 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
5827 {
5828  int error = 1;
5829  ulong num_bytes;
5830  bool cev_not_written;
5831  THD *thd = mi->info_thd;
5832  NET *net = &mi->mysql->net;
5833  DBUG_ENTER("process_io_create_file");
5834 
5835  mysql_mutex_assert_owner(&mi->data_lock);
5836 
5837  if (unlikely(!cev->is_valid()))
5838  DBUG_RETURN(1);
5839 
5840  if (!rpl_filter->db_ok(cev->db))
5841  {
5842  skip_load_data_infile(net);
5843  DBUG_RETURN(0);
5844  }
5845  DBUG_ASSERT(cev->inited_from_old);
5846  thd->file_id = cev->file_id = mi->file_id++;
5847  thd->server_id = cev->server_id;
5848  cev_not_written = 1;
5849 
5850  if (unlikely(net_request_file(net,cev->fname)))
5851  {
5852  sql_print_error("Slave I/O: failed requesting download of '%s'",
5853  cev->fname);
5854  goto err;
5855  }
5856 
5857  /*
5858  This dummy block is so we could instantiate Append_block_log_event
5859  once and then modify it slightly instead of doing it multiple times
5860  in the loop
5861  */
5862  {
5863  Append_block_log_event aev(thd,0,0,0,0);
5864 
5865  for (;;)
5866  {
5867  if (unlikely((num_bytes=my_net_read(net)) == packet_error))
5868  {
5869  sql_print_error("Network read error downloading '%s' from master",
5870  cev->fname);
5871  goto err;
5872  }
5873  if (unlikely(!num_bytes)) /* eof */
5874  {
5875  /* 3.23 master wants it */
5876  net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
5877  /*
5878  If we wrote Create_file_log_event, then we need to write
5879  Execute_load_log_event. If we did not write Create_file_log_event,
5880  then this is an empty file and we can just do as if the LOAD DATA
5881  INFILE had not existed, i.e. write nothing.
5882  */
5883  if (unlikely(cev_not_written))
5884  break;
5885  Execute_load_log_event xev(thd,0,0);
5886  xev.log_pos = cev->log_pos;
5887  if (unlikely(mi->rli->relay_log.append_event(&xev, mi) != 0))
5888  {
5889  mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
5890  ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5891  "error writing Exec_load event to relay log");
5892  goto err;
5893  }
5894  mi->rli->relay_log.harvest_bytes_written(&mi->rli->log_space_total);
5895  break;
5896  }
5897  if (unlikely(cev_not_written))
5898  {
5899  cev->block = net->read_pos;
5900  cev->block_len = num_bytes;
5901  if (unlikely(mi->rli->relay_log.append_event(cev, mi) != 0))
5902  {
5903  mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
5904  ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5905  "error writing Create_file event to relay log");
5906  goto err;
5907  }
5908  cev_not_written=0;
5909  mi->rli->relay_log.harvest_bytes_written(&mi->rli->log_space_total);
5910  }
5911  else
5912  {
5913  aev.block = net->read_pos;
5914  aev.block_len = num_bytes;
5915  aev.log_pos = cev->log_pos;
5916  if (unlikely(mi->rli->relay_log.append_event(&aev, mi) != 0))
5917  {
5918  mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
5919  ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5920  "error writing Append_block event to relay log");
5921  goto err;
5922  }
5923  mi->rli->relay_log.harvest_bytes_written(&mi->rli->log_space_total);
5924  }
5925  }
5926  }
5927  error=0;
5928 err:
5929  DBUG_RETURN(error);
5930 }
5931 
5932 
5949 static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
5950 {
5951  DBUG_ENTER("process_io_rotate");
5952  mysql_mutex_assert_owner(&mi->data_lock);
5953 
5954  if (unlikely(!rev->is_valid()))
5955  DBUG_RETURN(1);
5956 
5957  /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
5958  memcpy(const_cast<char *>(mi->get_master_log_name()),
5959  rev->new_log_ident, rev->ident_len + 1);
5960  mi->set_master_log_pos(rev->pos);
5961  DBUG_PRINT("info", ("new (master_log_name, master_log_pos): ('%s', %lu)",
5962  mi->get_master_log_name(), (ulong) mi->get_master_log_pos()));
5963 #ifndef DBUG_OFF
5964  /*
5965  If we do not do this, we will be getting the first
5966  rotate event forever, so we need to not disconnect after one.
5967  */
5968  if (disconnect_slave_event_count)
5969  mi->events_until_exit++;
5970 #endif
5971 
5972  /*
5973  If mi_description_event is format <4, there is conversion in the
5974  relay log to the slave's format (4). And Rotate can mean upgrade or
5975  nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
5976  no need to reset mi_description_event now. And if it's nothing (same
5977  master version as before), no need (still using the slave's format).
5978  */
5979  Format_description_log_event *old_fdle= mi->get_mi_description_event();
5980  if (old_fdle->binlog_version >= 4)
5981  {
5982  DBUG_ASSERT(old_fdle->checksum_alg ==
5983  mi->rli->relay_log.relay_log_checksum_alg);
5984  Format_description_log_event *new_fdle= new
5986  new_fdle->checksum_alg= mi->rli->relay_log.relay_log_checksum_alg;
5987  mi->set_mi_description_event(new_fdle);
5988  }
5989  /*
5990  Rotate the relay log makes binlog format detection easier (at next slave
5991  start or mysqlbinlog)
5992  */
5993  int ret= rotate_relay_log(mi);
5994  DBUG_RETURN(ret);
5995 }
5996 
6003 static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
6004  ulong event_len)
6005 {
6006  const char *errmsg = 0;
6007  ulong inc_pos;
6008  bool ignore_event= 0;
6009  char *tmp_buf = 0;
6010  Relay_log_info *rli= mi->rli;
6011  DBUG_ENTER("queue_binlog_ver_1_event");
6012 
6013  mysql_mutex_assert_owner(&mi->data_lock);
6014 
6015  /*
6016  If we get Load event, we need to pass a non-reusable buffer
6017  to read_log_event, so we do a trick
6018  */
6019  if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
6020  {
6021  if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
6022  {
6023  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6024  ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
6025  DBUG_RETURN(1);
6026  }
6027  memcpy(tmp_buf,buf,event_len);
6028  /*
6029  Create_file constructor wants a 0 as last char of buffer, this 0 will
6030  serve as the string-termination char for the file's name (which is at the
6031  end of the buffer)
6032  We must increment event_len, otherwise the event constructor will not see
6033  this end 0, which leads to segfault.
6034  */
6035  tmp_buf[event_len++]=0;
6036  int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
6037  buf = (const char*)tmp_buf;
6038  }
6039  /*
6040  This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
6041  send the loaded file, and write it to the relay log in the form of
6042  Append_block/Exec_load (the SQL thread needs the data, as that thread is not
6043  connected to the master).
6044  */
6045  Log_event *ev=
6046  Log_event::read_log_event(buf, event_len, &errmsg,
6047  mi->get_mi_description_event(), 0);
6048  if (unlikely(!ev))
6049  {
6050  sql_print_error("Read invalid event from master: '%s',\
6051  master could be corrupt but a more likely cause of this is a bug",
6052  errmsg);
6053  my_free((char*) tmp_buf);
6054  DBUG_RETURN(1);
6055  }
6056 
6057  mi->set_master_log_pos(ev->log_pos); /* 3.23 events don't contain log_pos */
6058  switch (ev->get_type_code()) {
6059  case STOP_EVENT:
6060  ignore_event= 1;
6061  inc_pos= event_len;
6062  break;
6063  case ROTATE_EVENT:
6064  if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6065  {
6066  delete ev;
6067  DBUG_RETURN(1);
6068  }
6069  inc_pos= 0;
6070  break;
6071  case CREATE_FILE_EVENT:
6072  /*
6073  Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
6074  queue_old_event() which is for 3.23 events which don't comprise
6075  CREATE_FILE_EVENT. This is because read_log_event() above has just
6076  transformed LOAD_EVENT into CREATE_FILE_EVENT.
6077  */
6078  {
6079  /* We come here when and only when tmp_buf != 0 */
6080  DBUG_ASSERT(tmp_buf != 0);
6081  inc_pos=event_len;
6082  ev->log_pos+= inc_pos;
6083  int error = process_io_create_file(mi,(Create_file_log_event*)ev);
6084  delete ev;
6085  mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6086  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6087  my_free((char*)tmp_buf);
6088  DBUG_RETURN(error);
6089  }
6090  default:
6091  inc_pos= event_len;
6092  break;
6093  }
6094  if (likely(!ignore_event))
6095  {
6096  if (ev->log_pos)
6097  /*
6098  Don't do it for fake Rotate events (see comment in
6099  Log_event::Log_event(const char* buf...) in log_event.cc).
6100  */
6101  ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
6102  if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
6103  {
6104  delete ev;
6105  DBUG_RETURN(1);
6106  }
6107  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6108  }
6109  delete ev;
6110  mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6111  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6112  DBUG_RETURN(0);
6113 }
6114 
6121 static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
6122  ulong event_len)
6123 {
6124  const char *errmsg = 0;
6125  ulong inc_pos;
6126  char *tmp_buf = 0;
6127  Relay_log_info *rli= mi->rli;
6128  DBUG_ENTER("queue_binlog_ver_3_event");
6129 
6130  mysql_mutex_assert_owner(&mi->data_lock);
6131 
6132  /* read_log_event() will adjust log_pos to be end_log_pos */
6133  Log_event *ev=
6134  Log_event::read_log_event(buf, event_len, &errmsg,
6135  mi->get_mi_description_event(), 0);
6136  if (unlikely(!ev))
6137  {
6138  sql_print_error("Read invalid event from master: '%s',\
6139  master could be corrupt but a more likely cause of this is a bug",
6140  errmsg);
6141  my_free((char*) tmp_buf);
6142  DBUG_RETURN(1);
6143  }
6144  switch (ev->get_type_code()) {
6145  case STOP_EVENT:
6146  goto err;
6147  case ROTATE_EVENT:
6148  if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6149  {
6150  delete ev;
6151  DBUG_RETURN(1);
6152  }
6153  inc_pos= 0;
6154  break;
6155  default:
6156  inc_pos= event_len;
6157  break;
6158  }
6159 
6160  if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
6161  {
6162  delete ev;
6163  DBUG_RETURN(1);
6164  }
6165  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6166  delete ev;
6167  mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6168 err:
6169  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6170  DBUG_RETURN(0);
6171 }
6172 
6173 /*
6174  queue_old_event()
6175 
6176  Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
6177  (exactly, slave's) format. To do the conversion, we create a 5.0 event from
6178  the 3.23/4.0 bytes, then write this event to the relay log.
6179 
6180  TODO:
6181  Test this code before release - it has to be tested on a separate
6182  setup with 3.23 master or 4.0 master
6183 */
6184 
6185 static int queue_old_event(Master_info *mi, const char *buf,
6186  ulong event_len)
6187 {
6188  DBUG_ENTER("queue_old_event");
6189 
6190  mysql_mutex_assert_owner(&mi->data_lock);
6191 
6192  switch (mi->get_mi_description_event()->binlog_version)
6193  {
6194  case 1:
6195  DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
6196  case 3:
6197  DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
6198  default: /* unsupported format; eg version 2 */
6199  DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
6200  mi->get_mi_description_event()->binlog_version));
6201  DBUG_RETURN(1);
6202  }
6203 }
6204 
6205 /*
6206  queue_event()
6207 
6208  If the event is 3.23/4.0, passes it to queue_old_event() which will convert
6209  it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
6210  no format conversion, it's pure read/write of bytes.
6211  So a 5.0.0 slave's relay log can contain events in the slave's format or in
6212  any >=5.0.0 format.
6213 */
6214 
6215 static int queue_event(Master_info* mi,const char* buf, ulong event_len)
6216 {
6217  int error= 0;
6218  String error_msg;
6219  ulong inc_pos= 0;
6220  Relay_log_info *rli= mi->rli;
6221  mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
6222  ulong s_id;
6223  bool unlock_data_lock= TRUE;
6224  /*
6225  FD_q must have been prepared for the first R_a event
6226  inside get_master_version_and_clock()
6227  Show-up of FD:s affects checksum_alg at once because
6228  that changes FD_queue.
6229  */
6230  uint8 checksum_alg= mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ?
6231  mi->checksum_alg_before_fd :
6232  mi->rli->relay_log.relay_log_checksum_alg;
6233 
6234  char *save_buf= NULL; // needed for checksumming the fake Rotate event
6235  char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
6236  Gtid gtid= { 0, 0 };
6237  Log_event_type event_type= (Log_event_type)buf[EVENT_TYPE_OFFSET];
6238 
6239  DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
6240  checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
6241  checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
6242 
6243  DBUG_ENTER("queue_event");
6244  /*
6245  FD_queue checksum alg description does not apply in a case of
6246  FD itself. The one carries both parts of the checksum data.
6247  */
6248  if (event_type == FORMAT_DESCRIPTION_EVENT)
6249  {
6250  checksum_alg= get_checksum_alg(buf, event_len);
6251  }
6252  else if (event_type == START_EVENT_V3)
6253  {
6254  // checksum behaviour is similar to the pre-checksum FD handling
6255  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
6256  mysql_mutex_lock(&mi->data_lock);
6257  mi->get_mi_description_event()->checksum_alg=
6258  mi->rli->relay_log.relay_log_checksum_alg= checksum_alg=
6259  BINLOG_CHECKSUM_ALG_OFF;
6260  mysql_mutex_unlock(&mi->data_lock);
6261  }
6262 
6263  // does not hold always because of old binlog can work with NM
6264  // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
6265 
6266  // should hold unless manipulations with RL. Tests that do that
6267  // will have to refine the clause.
6268  DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
6269  BINLOG_CHECKSUM_ALG_UNDEF);
6270 
6271  // Emulate the network corruption
6272  DBUG_EXECUTE_IF("corrupt_queue_event",
6273  if (event_type != FORMAT_DESCRIPTION_EVENT)
6274  {
6275  char *debug_event_buf_c = (char*) buf;
6276  int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
6277  debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
6278  DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
6279  DBUG_SET("");
6280  }
6281  );
6282 
6283  if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
6284  {
6285  error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
6286  unlock_data_lock= FALSE;
6287  goto err;
6288  }
6289 
6290  mysql_mutex_lock(&mi->data_lock);
6291 
6292  if (mi->get_mi_description_event()->binlog_version < 4 &&
6293  event_type != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
6294  {
6295  int ret= queue_old_event(mi,buf,event_len);
6296  mysql_mutex_unlock(&mi->data_lock);
6297  DBUG_RETURN(ret);
6298  }
6299 
6300  switch (event_type) {
6301  case STOP_EVENT:
6302  /*
6303  We needn't write this event to the relay log. Indeed, it just indicates a
6304  master server shutdown. The only thing this does is cleaning. But
6305  cleaning is already done on a per-master-thread basis (as the master
6306  server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
6307  prepared statements' deletion are TODO only when we binlog prep stmts).
6308 
6309  We don't even increment mi->get_master_log_pos(), because we may be just after
6310  a Rotate event. Btw, in a few milliseconds we are going to have a Start
6311  event from the next binlog (unless the master is presently running
6312  without --log-bin).
6313  */
6314  goto err;
6315  case ROTATE_EVENT:
6316  {
6317  Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
6318  event_len - BINLOG_CHECKSUM_LEN : event_len,
6319  mi->get_mi_description_event());
6320 
6321  if (unlikely(process_io_rotate(mi, &rev)))
6322  {
6323  error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6324  goto err;
6325  }
6326  /*
6327  Checksum special cases for the fake Rotate (R_f) event caused by the protocol
6328  of events generation and serialization in RL where Rotate of master is
6329  queued right next to FD of slave.
6330  Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
6331  Two special rules apply only to the first R_f which comes in before any FD_m.
6332  The 2nd R_f should be compatible with the FD_s that must have taken over
6333  the last seen FD_m's (A).
6334 
6335  RSC_1: If OM \and fake Rotate \and slave is configured to
6336  to compute checksum for its first FD event for RL
6337  the fake Rotate gets checksummed here.
6338  */
6339  if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
6340  mi->rli->relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
6341  {
6342  ha_checksum rot_crc= my_checksum(0L, NULL, 0);
6343  event_len += BINLOG_CHECKSUM_LEN;
6344  memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
6345  int4store(&rot_buf[EVENT_LEN_OFFSET],
6346  uint4korr(rot_buf + EVENT_LEN_OFFSET) + BINLOG_CHECKSUM_LEN);
6347  rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
6348  event_len - BINLOG_CHECKSUM_LEN);
6349  int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
6350  DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
6351  DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg ==
6352  mi->rli->relay_log.relay_log_checksum_alg);
6353  /* the first one */
6354  DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
6355  save_buf= (char *) buf;
6356  buf= rot_buf;
6357  }
6358  else
6359  /*
6360  RSC_2: If NM \and fake Rotate \and slave does not compute checksum
6361  the fake Rotate's checksum is stripped off before relay-logging.
6362  */
6363  if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
6364  mi->rli->relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
6365  {
6366  event_len -= BINLOG_CHECKSUM_LEN;
6367  memcpy(rot_buf, buf, event_len);
6368  int4store(&rot_buf[EVENT_LEN_OFFSET],
6369  uint4korr(rot_buf + EVENT_LEN_OFFSET) - BINLOG_CHECKSUM_LEN);
6370  DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
6371  DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg ==
6372  mi->rli->relay_log.relay_log_checksum_alg);
6373  /* the first one */
6374  DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
6375  save_buf= (char *) buf;
6376  buf= rot_buf;
6377  }
6378  /*
6379  Now the I/O thread has just changed its mi->get_master_log_name(), so
6380  incrementing mi->get_master_log_pos() is nonsense.
6381  */
6382  inc_pos= 0;
6383  break;
6384  }
6385  case FORMAT_DESCRIPTION_EVENT:
6386  {
6387  /*
6388  Create an event, and save it (when we rotate the relay log, we will have
6389  to write this event again).
6390  */
6391  /*
6392  We are the only thread which reads/writes mi_description_event.
6393  The relay_log struct does not move (though some members of it can
6394  change), so we needn't any lock (no rli->data_lock, no log lock).
6395  */
6396  const char* errmsg;
6397  // mark it as undefined that is irrelevant anymore
6398  mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
6399  Format_description_log_event *new_fdle=
6401  Log_event::read_log_event(buf, event_len, &errmsg,
6402  mi->get_mi_description_event(), 1);
6403  if (new_fdle == NULL)
6404  {
6405  error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6406  goto err;
6407  }
6408  if (new_fdle->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
6409  new_fdle->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
6410  mi->set_mi_description_event(new_fdle);
6411 
6412  /* installing new value of checksum Alg for relay log */
6413  mi->rli->relay_log.relay_log_checksum_alg= new_fdle->checksum_alg;
6414 
6415  /*
6416  Though this does some conversion to the slave's format, this will
6417  preserve the master's binlog format version, and number of event types.
6418  */
6419  /*
6420  If the event was not requested by the slave (the slave did not ask for
6421  it), i.e. has end_log_pos=0, we do not increment mi->get_master_log_pos()
6422  */
6423  inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
6424  DBUG_PRINT("info",("binlog format is now %d",
6425  mi->get_mi_description_event()->binlog_version));
6426 
6427  }
6428  break;
6429 
6430  case HEARTBEAT_LOG_EVENT:
6431  {
6432  /*
6433  HB (heartbeat) cannot come before RL (Relay)
6434  */
6435  char llbuf[22];
6436  Heartbeat_log_event hb(buf,
6437  mi->rli->relay_log.relay_log_checksum_alg
6438  != BINLOG_CHECKSUM_ALG_OFF ?
6439  event_len - BINLOG_CHECKSUM_LEN : event_len,
6440  mi->get_mi_description_event());
6441  if (!hb.is_valid())
6442  {
6443  error= ER_SLAVE_HEARTBEAT_FAILURE;
6444  error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;"));
6445  error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
6446  error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
6447  error_msg.append(STRING_WITH_LEN(" log_pos "));
6448  llstr(hb.log_pos, llbuf);
6449  error_msg.append(llbuf, strlen(llbuf));
6450  goto err;
6451  }
6452  mi->received_heartbeats++;
6453  mi->last_heartbeat= my_time(0);
6454 
6455 
6456  /*
6457  During GTID protocol, if the master skips transactions,
6458  a heartbeat event is sent to the slave at the end of last
6459  skipped transaction to update coordinates.
6460 
6461  I/O thread receives the heartbeat event and updates mi
6462  only if the received heartbeat position is greater than
6463  mi->get_master_log_pos(). This event is written to the
6464  relay log as an ignored Rotate event. SQL thread reads
6465  the rotate event only to update the coordinates corresponding
6466  to the last skipped transaction. Note that,
6467  we update only the positions and not the file names, as a ROTATE
6468  EVENT from the master prior to this will update the file name.
6469  */
6470  if (mi->is_auto_position() && mi->get_master_log_pos() < hb.log_pos
6471  && mi->get_master_log_name() != NULL)
6472  {
6473 
6474  DBUG_ASSERT(memcmp(const_cast<char*>(mi->get_master_log_name()),
6475  hb.get_log_ident(), hb.get_ident_len()) == 0);
6476 
6477  mi->set_master_log_pos(hb.log_pos);
6478 
6479  /*
6480  Put this heartbeat event in the relay log as a Rotate Event.
6481  */
6482  inc_pos= 0;
6483  memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(),
6484  FN_REFLEN);
6485  rli->ign_master_log_pos_end = mi->get_master_log_pos();
6486 
6487  if (write_ignored_events_info_to_relay_log(mi->info_thd, mi))
6488  goto err;
6489  }
6490 
6491  /*
6492  compare local and event's versions of log_file, log_pos.
6493 
6494  Heartbeat is sent only after an event corresponding to the corrdinates
6495  the heartbeat carries.
6496  Slave can not have a difference in coordinates except in the only
6497  special case when mi->get_master_log_name(), mi->get_master_log_pos() have never
6498  been updated by Rotate event i.e when slave does not have any history
6499  with the master (and thereafter mi->get_master_log_pos() is NULL).
6500 
6501  TODO: handling `when' for SHOW SLAVE STATUS' snds behind
6502  */
6503  if ((memcmp(const_cast<char *>(mi->get_master_log_name()),
6504  hb.get_log_ident(), hb.get_ident_len())
6505  && mi->get_master_log_name() != NULL)
6506  || ((mi->get_master_log_pos() != hb.log_pos && gtid_mode == 0) ||
6507  /*
6508  When Gtid mode is on only monotocity can be claimed.
6509  Todo: enhance HB event with the skipped events size
6510  and to convert HB.pos == MI.pos to HB.pos - HB.skip_size == MI.pos
6511  */
6512  (mi->get_master_log_pos() > hb.log_pos)))
6513  {
6514  /* missed events of heartbeat from the past */
6515  error= ER_SLAVE_HEARTBEAT_FAILURE;
6516  error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;"));
6517  error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
6518  error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
6519  error_msg.append(STRING_WITH_LEN(" log_pos "));
6520  llstr(hb.log_pos, llbuf);
6521  error_msg.append(llbuf, strlen(llbuf));
6522  goto err;
6523  }
6524  goto skip_relay_logging;
6525  }
6526  break;
6527 
6528  case PREVIOUS_GTIDS_LOG_EVENT:
6529  {
6530  if (gtid_mode == 0)
6531  {
6532  error= ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF;
6533  goto err;
6534  }
6535  /*
6536  This event does not have any meaning for the slave and
6537  was just sent to show the slave the master is making
6538  progress and avoid possible deadlocks.
6539  So at this point, the event is replaced by a rotate
6540  event what will make the slave to update what it knows
6541  about the master's coordinates.
6542  */
6543  inc_pos= 0;
6544  mi->set_master_log_pos(mi->get_master_log_pos() + event_len);
6545  memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(), FN_REFLEN);
6546  rli->ign_master_log_pos_end= mi->get_master_log_pos();
6547 
6548  if (write_ignored_events_info_to_relay_log(mi->info_thd, mi))
6549  goto err;
6550 
6551  goto skip_relay_logging;
6552  }
6553  break;
6554 
6555  case GTID_LOG_EVENT:
6556  {
6557  if (gtid_mode == 0)
6558  {
6559  error= ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF;
6560  goto err;
6561  }
6562  global_sid_lock->rdlock();
6563  Gtid_log_event gtid_ev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
6564  event_len - BINLOG_CHECKSUM_LEN : event_len,
6565  mi->get_mi_description_event());
6566  gtid.sidno= gtid_ev.get_sidno(false);
6567  global_sid_lock->unlock();
6568  if (gtid.sidno < 0)
6569  goto err;
6570  gtid.gno= gtid_ev.get_gno();
6571  inc_pos= event_len;
6572  }
6573  break;
6574 
6575  case ANONYMOUS_GTID_LOG_EVENT:
6576 
6577  default:
6578  inc_pos= event_len;
6579  break;
6580  }
6581 
6582  /*
6583  If this event is originating from this server, don't queue it.
6584  We don't check this for 3.23 events because it's simpler like this; 3.23
6585  will be filtered anyway by the SQL slave thread which also tests the
6586  server id (we must also keep this test in the SQL thread, in case somebody
6587  upgrades a 4.0 slave which has a not-filtered relay log).
6588 
6589  ANY event coming from ourselves can be ignored: it is obvious for queries;
6590  for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
6591  (--log-slave-updates would not log that) unless this slave is also its
6592  direct master (an unsupported, useless setup!).
6593  */
6594 
6595  mysql_mutex_lock(log_lock);
6596  s_id= uint4korr(buf + SERVER_ID_OFFSET);
6597 
6598  /*
6599  If server_id_bits option is set we need to mask out irrelevant bits
6600  when checking server_id, but we still put the full unmasked server_id
6601  into the Relay log so that it can be accessed when applying the event
6602  */
6603  s_id&= opt_server_id_mask;
6604 
6605  if ((s_id == ::server_id && !mi->rli->replicate_same_server_id) ||
6606  /*
6607  the following conjunction deals with IGNORE_SERVER_IDS, if set
6608  If the master is on the ignore list, execution of
6609  format description log events and rotate events is necessary.
6610  */
6611  (mi->ignore_server_ids->dynamic_ids.elements > 0 &&
6612  mi->shall_ignore_server_id(s_id) &&
6613  /* everything is filtered out from non-master */
6614  (s_id != mi->master_id ||
6615  /* for the master meta information is necessary */
6616  (event_type != FORMAT_DESCRIPTION_EVENT &&
6617  event_type != ROTATE_EVENT))))
6618  {
6619  /*
6620  Do not write it to the relay log.
6621  a) We still want to increment mi->get_master_log_pos(), so that we won't
6622  re-read this event from the master if the slave IO thread is now
6623  stopped/restarted (more efficient if the events we are ignoring are big
6624  LOAD DATA INFILE).
6625  b) We want to record that we are skipping events, for the information of
6626  the slave SQL thread, otherwise that thread may let
6627  rli->group_relay_log_pos stay too small if the last binlog's event is
6628  ignored.
6629  But events which were generated by this slave and which do not exist in
6630  the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
6631  mi->get_master_log_pos().
6632  If the event is originated remotely and is being filtered out by
6633  IGNORE_SERVER_IDS it increments mi->get_master_log_pos()
6634  as well as rli->group_relay_log_pos.
6635  */
6636  if (!(s_id == ::server_id && !mi->rli->replicate_same_server_id) ||
6637  (event_type != FORMAT_DESCRIPTION_EVENT &&
6638  event_type != ROTATE_EVENT &&
6639  event_type != STOP_EVENT))
6640  {
6641  mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6642  memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(), FN_REFLEN);
6643  DBUG_ASSERT(rli->ign_master_log_name_end[0]);
6644  rli->ign_master_log_pos_end= mi->get_master_log_pos();
6645  }
6646  rli->relay_log.signal_update(); // the slave SQL thread needs to re-check
6647  DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored",
6648  (ulong) mi->get_master_log_pos(), uint4korr(buf + SERVER_ID_OFFSET)));
6649  }
6650  else
6651  {
6652  /* write the event to the relay log */
6653  if (likely(rli->relay_log.append_buffer(buf, event_len, mi) == 0))
6654  {
6655  mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6656  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6657  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6658 
6659  if (event_type == GTID_LOG_EVENT)
6660  {
6661  global_sid_lock->rdlock();
6662  int ret= rli->add_logged_gtid(gtid.sidno, gtid.gno);
6663  global_sid_lock->unlock();
6664  if (ret != 0)
6665  goto err;
6666  }
6667  }
6668  else
6669  {
6670  error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6671  }
6672  rli->ign_master_log_name_end[0]= 0; // last event is not ignored
6673  if (save_buf != NULL)
6674  buf= save_buf;
6675  }
6676  mysql_mutex_unlock(log_lock);
6677 
6678 skip_relay_logging:
6679 
6680 err:
6681  if (unlock_data_lock)
6682  mysql_mutex_unlock(&mi->data_lock);
6683  DBUG_PRINT("info", ("error: %d", error));
6684  if (error)
6685  mi->report(ERROR_LEVEL, error, ER(error),
6686  (error == ER_SLAVE_RELAY_LOG_WRITE_FAILURE)?
6687  "could not queue event from master" :
6688  error_msg.ptr());
6689  DBUG_RETURN(error);
6690 }
6691 
6706 extern "C" void slave_io_thread_detach_vio()
6707 {
6708 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
6709  THD *thd= current_thd;
6710  if (thd && thd->slave_thread)
6711  thd->clear_active_vio();
6712 #endif
6713 }
6714 
6715 
6716 /*
6717  Try to connect until successful or slave killed
6718 
6719  SYNPOSIS
6720  safe_connect()
6721  thd Thread handler for slave
6722  mysql MySQL connection handle
6723  mi Replication handle
6724 
6725  RETURN
6726  0 ok
6727  # Error
6728 */
6729 
6730 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
6731 {
6732  DBUG_ENTER("safe_connect");
6733 
6734  DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
6735 }
6736 
6737 
6738 /*
6739  SYNPOSIS
6740  connect_to_master()
6741 
6742  IMPLEMENTATION
6743  Try to connect until successful or slave killed or we have retried
6744  mi->retry_count times
6745 */
6746 
6747 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
6748  bool reconnect, bool suppress_warnings)
6749 {
6750  int slave_was_killed= 0;
6751  int last_errno= -2; // impossible error
6752  ulong err_count=0;
6753  char llbuff[22];
6754  char password[MAX_PASSWORD_LENGTH + 1];
6755  int password_size= sizeof(password);
6756  DBUG_ENTER("connect_to_master");
6757  set_slave_max_allowed_packet(thd, mysql);
6758 #ifndef DBUG_OFF
6759  mi->events_until_exit = disconnect_slave_event_count;
6760 #endif
6761  ulong client_flag= CLIENT_REMEMBER_OPTIONS;
6762  if (opt_slave_compressed_protocol)
6763  client_flag=CLIENT_COMPRESS; /* We will use compression */
6764 
6765  mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
6766  mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
6767 
6768  if (mi->bind_addr[0])
6769  {
6770  DBUG_PRINT("info",("bind_addr: %s", mi->bind_addr));
6771  mysql_options(mysql, MYSQL_OPT_BIND, mi->bind_addr);
6772  }
6773 
6774 #ifdef HAVE_OPENSSL
6775  if (mi->ssl)
6776  {
6777  mysql_ssl_set(mysql,
6778  mi->ssl_key[0]?mi->ssl_key:0,
6779  mi->ssl_cert[0]?mi->ssl_cert:0,
6780  mi->ssl_ca[0]?mi->ssl_ca:0,
6781  mi->ssl_capath[0]?mi->ssl_capath:0,
6782  mi->ssl_cipher[0]?mi->ssl_cipher:0);
6783  mysql_options(mysql, MYSQL_OPT_SSL_CRL,
6784  mi->ssl_crl[0] ? mi->ssl_crl : 0);
6785  mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
6786  mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
6787  mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
6788  &mi->ssl_verify_server_cert);
6789  }
6790 #endif
6791 
6792  mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
6793  /* This one is not strictly needed but we have it here for completeness */
6794  mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
6795 
6796  if (mi->is_start_plugin_auth_configured())
6797  {
6798  DBUG_PRINT("info", ("Slaving is using MYSQL_DEFAULT_AUTH %s",
6799  mi->get_start_plugin_auth()));
6800  mysql_options(mysql, MYSQL_DEFAULT_AUTH, mi->get_start_plugin_auth());
6801  }
6802 
6803  if (mi->is_start_plugin_dir_configured())
6804  {
6805  DBUG_PRINT("info", ("Slaving is using MYSQL_PLUGIN_DIR %s",
6806  mi->get_start_plugin_dir()));
6807  mysql_options(mysql, MYSQL_PLUGIN_DIR, mi->get_start_plugin_dir());
6808  }
6809  /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
6810  else if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
6811  mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
6812 
6813  if (!mi->is_start_user_configured())
6814  sql_print_warning("%s", ER(ER_INSECURE_CHANGE_MASTER));
6815 
6816  if (mi->get_password(password, &password_size))
6817  {
6818  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6819  ER(ER_SLAVE_FATAL_ERROR),
6820  "Unable to configure password when attempting to "
6821  "connect to the master server. Connection attempt "
6822  "terminated.");
6823  DBUG_RETURN(1);
6824  }
6825 
6826  const char* user= mi->get_user();
6827  if (user == NULL || user[0] == 0)
6828  {
6829  mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6830  ER(ER_SLAVE_FATAL_ERROR),
6831  "Invalid (empty) username when attempting to "
6832  "connect to the master server. Connection attempt "
6833  "terminated.");
6834  DBUG_RETURN(1);
6835  }
6836 
6837  while (!(slave_was_killed = io_slave_killed(thd,mi))
6838  && (reconnect ? mysql_reconnect(mysql) != 0 :
6839  mysql_real_connect(mysql, mi->host, user,
6840  password, 0, mi->port, 0, client_flag) == 0))
6841  {
6842  /*
6843  SHOW SLAVE STATUS will display the number of retries which
6844  would be real retry counts instead of mi->retry_count for
6845  each connection attempt by 'Last_IO_Error' entry.
6846  */
6847  last_errno=mysql_errno(mysql);
6848  suppress_warnings= 0;
6849  mi->report(ERROR_LEVEL, last_errno,
6850  "error %s to master '%s@%s:%d'"
6851  " - retry-time: %d retries: %lu",
6852  (reconnect ? "reconnecting" : "connecting"),
6853  mi->get_user(), mi->host, mi->port,
6854  mi->connect_retry, err_count + 1);
6855  /*
6856  By default we try forever. The reason is that failure will trigger
6857  master election, so if the user did not set mi->retry_count we
6858  do not want to have election triggered on the first failure to
6859  connect
6860  */
6861  if (++err_count == mi->retry_count)
6862  {
6863  slave_was_killed=1;
6864  break;
6865  }
6866  slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
6867  }
6868 
6869  if (!slave_was_killed)
6870  {
6871  mi->clear_error(); // clear possible left over reconnect error
6872  if (reconnect)
6873  {
6874  if (!suppress_warnings && log_warnings)
6875  sql_print_information("Slave: connected to master '%s@%s:%d',\
6876 replication resumed in log '%s' at position %s", mi->get_user(),
6877  mi->host, mi->port,
6878  mi->get_io_rpl_log_name(),
6879  llstr(mi->get_master_log_pos(),llbuff));
6880  }
6881  else
6882  {
6883  general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
6884  mi->get_user(), mi->host, mi->port);
6885  }
6886 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
6887  thd->set_active_vio(mysql->net.vio);
6888 #endif
6889  }
6890  mysql->reconnect= 1;
6891  DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
6892  DBUG_RETURN(slave_was_killed);
6893 }
6894 
6895 
6896 /*
6897  safe_reconnect()
6898 
6899  IMPLEMENTATION
6900  Try to connect until successful or slave killed or we have retried
6901  mi->retry_count times
6902 */
6903 
6904 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
6905  bool suppress_warnings)
6906 {
6907  DBUG_ENTER("safe_reconnect");
6908  DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
6909 }
6910 
6911 
6912 MYSQL *rpl_connect_master(MYSQL *mysql)
6913 {
6914  THD *thd= current_thd;
6915  char password[MAX_PASSWORD_LENGTH + 1];
6916  int password_size= sizeof(password);
6917  Master_info *mi= my_pthread_getspecific_ptr(Master_info*, RPL_MASTER_INFO);
6918  if (!mi)
6919  {
6920  sql_print_error("'rpl_connect_master' must be called in slave I/O thread context.");
6921  return NULL;
6922  }
6923 
6924  bool allocated= false;
6925 
6926  if (!mysql)
6927  {
6928  if(!(mysql= mysql_init(NULL)))
6929  {
6930  sql_print_error("rpl_connect_master: failed in mysql_init()");
6931  return NULL;
6932  }
6933  allocated= true;
6934  }
6935 
6936  /*
6937  XXX: copied from connect_to_master, this function should not
6938  change the slave status, so we cannot use connect_to_master
6939  directly
6940 
6941  TODO: make this part a seperate function to eliminate duplication
6942  */
6943  mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
6944  mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
6945 
6946  if (mi->bind_addr[0])
6947  {
6948  DBUG_PRINT("info",("bind_addr: %s", mi->bind_addr));
6949  mysql_options(mysql, MYSQL_OPT_BIND, mi->bind_addr);
6950  }
6951 
6952 #ifdef HAVE_OPENSSL
6953  if (mi->ssl)
6954  {
6955  mysql_ssl_set(mysql,
6956  mi->ssl_key[0]?mi->ssl_key:0,
6957  mi->ssl_cert[0]?mi->ssl_cert:0,
6958  mi->ssl_ca[0]?mi->ssl_ca:0,
6959  mi->ssl_capath[0]?mi->ssl_capath:0,
6960  mi->ssl_cipher[0]?mi->ssl_cipher:0);
6961  mysql_options(mysql, MYSQL_OPT_SSL_CRL,
6962  mi->ssl_crl[0] ? mi->ssl_crl : 0);
6963  mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
6964  mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
6965  mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
6966  &mi->ssl_verify_server_cert);
6967  }
6968 #endif
6969 
6970  mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
6971  /* This one is not strictly needed but we have it here for completeness */
6972  mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
6973 
6974  if (mi->is_start_plugin_auth_configured())
6975  {
6976  DBUG_PRINT("info", ("Slaving is using MYSQL_DEFAULT_AUTH %s",
6977  mi->get_start_plugin_auth()));
6978  mysql_options(mysql, MYSQL_DEFAULT_AUTH, mi->get_start_plugin_auth());
6979  }
6980 
6981  if (mi->is_start_plugin_dir_configured())
6982  {
6983  DBUG_PRINT("info", ("Slaving is using MYSQL_PLUGIN_DIR %s",
6984  mi->get_start_plugin_dir()));
6985  mysql_options(mysql, MYSQL_PLUGIN_DIR, mi->get_start_plugin_dir());
6986  }
6987  /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
6988  else if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
6989  mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
6990 
6991  if (!mi->is_start_user_configured())
6992  sql_print_warning("%s", ER(ER_INSECURE_CHANGE_MASTER));
6993 
6994  const char *user= mi->get_user();
6995  if (user == NULL
6996  || user[0] == 0
6997  || mi->get_password(password, &password_size)
6998  || io_slave_killed(thd, mi)
6999  || !mysql_real_connect(mysql, mi->host, user,
7000  password, 0, mi->port, 0, 0))
7001  {
7002  if (!io_slave_killed(thd, mi))
7003  sql_print_error("rpl_connect_master: error connecting to master: %s (server_error: %d)",
7004  mysql_error(mysql), mysql_errno(mysql));
7005 
7006  if (allocated)
7007  mysql_close(mysql); // this will free the object
7008  return NULL;
7009  }
7010  return mysql;
7011 }
7012 
7013 /*
7014  Called when we notice that the current "hot" log got rotated under our feet.
7015 */
7016 
7017 static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
7018 {
7019  DBUG_ENTER("reopen_relay_log");
7020  DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
7021  DBUG_ASSERT(rli->cur_log_fd == -1);
7022 
7023  IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
7024  if ((rli->cur_log_fd=open_binlog_file(cur_log,rli->get_event_relay_log_name(),
7025  errmsg)) <0)
7026  DBUG_RETURN(0);
7027  /*
7028  We want to start exactly where we was before:
7029  relay_log_pos Current log pos
7030  pending Number of bytes already processed from the event
7031  */
7032  rli->set_event_relay_log_pos(max<ulonglong>(rli->get_event_relay_log_pos(),
7033  BIN_LOG_HEADER_SIZE));
7034  my_b_seek(cur_log,rli->get_event_relay_log_pos());
7035  DBUG_RETURN(cur_log);
7036 }
7037 
7038 
7049 static Log_event* next_event(Relay_log_info* rli)
7050 {
7051  Log_event* ev;
7052  IO_CACHE* cur_log = rli->cur_log;
7053  mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
7054  const char* errmsg=0;
7055  THD* thd = rli->info_thd;
7056  DBUG_ENTER("next_event");
7057 
7058  DBUG_ASSERT(thd != 0);
7059 
7060 #ifndef DBUG_OFF
7061  if (abort_slave_event_count && !rli->events_until_exit--)
7062  DBUG_RETURN(0);
7063 #endif
7064 
7065  /*
7066  For most operations we need to protect rli members with data_lock,
7067  so we assume calling function acquired this mutex for us and we will
7068  hold it for the most of the loop below However, we will release it
7069  whenever it is worth the hassle, and in the cases when we go into a
7070  mysql_cond_wait() with the non-data_lock mutex
7071  */
7072  mysql_mutex_assert_owner(&rli->data_lock);
7073 
7074  while (!sql_slave_killed(thd,rli))
7075  {
7076  /*
7077  We can have two kinds of log reading:
7078  hot_log:
7079  rli->cur_log points at the IO_CACHE of relay_log, which
7080  is actively being updated by the I/O thread. We need to be careful
7081  in this case and make sure that we are not looking at a stale log that
7082  has already been rotated. If it has been, we reopen the log.
7083 
7084  The other case is much simpler:
7085  We just have a read only log that nobody else will be updating.
7086  */
7087  bool hot_log;
7088  if ((hot_log = (cur_log != &rli->cache_buf)))
7089  {
7090  DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
7091  mysql_mutex_lock(log_lock);
7092 
7093  /*
7094  Reading xxx_file_id is safe because the log will only
7095  be rotated when we hold relay_log.LOCK_log
7096  */
7097  if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count)
7098  {
7099  // The master has switched to a new log file; Reopen the old log file
7100  cur_log=reopen_relay_log(rli, &errmsg);
7101  mysql_mutex_unlock(log_lock);
7102  if (!cur_log) // No more log files
7103  goto err;
7104  hot_log=0; // Using old binary log
7105  }
7106  }
7107  /*
7108  As there is no guarantee that the relay is open (for example, an I/O
7109  error during a write by the slave I/O thread may have closed it), we
7110  have to test it.
7111  */
7112  if (!my_b_inited(cur_log))
7113  goto err;
7114 #ifndef DBUG_OFF
7115  {
7116  DBUG_PRINT("info", ("assertion skip %lu file pos %lu event relay log pos %lu file %s\n",
7117  (ulong) rli->slave_skip_counter, (ulong) my_b_tell(cur_log),
7118  (ulong) rli->get_event_relay_log_pos(),
7119  rli->get_event_relay_log_name()));
7120 
7121  /* This is an assertion which sometimes fails, let's try to track it */
7122  char llbuf1[22], llbuf2[22];
7123  DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
7124  llstr(my_b_tell(cur_log),llbuf1),
7125  llstr(rli->get_event_relay_log_pos(),llbuf2)));
7126 
7127  DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
7128  DBUG_ASSERT(my_b_tell(cur_log) == rli->get_event_relay_log_pos() || rli->is_parallel_exec());
7129 
7130  DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
7131  rli->get_group_master_log_name(),
7132  (ulong) rli->get_group_master_log_pos(),
7133  rli->get_group_relay_log_name(),
7134  (ulong) rli->get_group_relay_log_pos(),
7135  rli->get_event_relay_log_name(),
7136  (ulong) rli->get_event_relay_log_pos()));
7137  }
7138 #endif
7139  /*
7140  Relay log is always in new format - if the master is 3.23, the
7141  I/O thread will convert the format for us.
7142  A problem: the description event may be in a previous relay log. So if
7143  the slave has been shutdown meanwhile, we would have to look in old relay
7144  logs, which may even have been deleted. So we need to write this
7145  description event at the beginning of the relay log.
7146  When the relay log is created when the I/O thread starts, easy: the
7147  master will send the description event and we will queue it.
7148  But if the relay log is created by new_file(): then the solution is:
7149  MYSQL_BIN_LOG::open() will write the buffered description event.
7150  */
7151  if ((ev= Log_event::read_log_event(cur_log, 0,
7153  opt_slave_sql_verify_checksum)))
7154  {
7155  DBUG_ASSERT(thd==rli->info_thd);
7156  /*
7157  read it while we have a lock, to avoid a mutex lock in
7158  inc_event_relay_log_pos()
7159  */
7160  rli->set_future_event_relay_log_pos(my_b_tell(cur_log));
7161  ev->future_event_relay_log_pos= rli->get_future_event_relay_log_pos();
7162 
7163  if (hot_log)
7164  mysql_mutex_unlock(log_lock);
7165 
7166  /*
7167  MTS checkpoint in the successful read branch
7168  */
7169  bool force= (rli->checkpoint_seqno > (rli->checkpoint_group - 1));
7170  if (rli->is_parallel_exec() && (opt_mts_checkpoint_period != 0 || force))
7171  {
7172  ulonglong period= static_cast<ulonglong>(opt_mts_checkpoint_period * 1000000ULL);
7173  mysql_mutex_unlock(&rli->data_lock);
7174  /*
7175  At this point the coordinator has is delegating jobs to workers and
7176  the checkpoint routine must be periodically invoked.
7177  */
7178  (void) mts_checkpoint_routine(rli, period, force, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
7179  DBUG_ASSERT(!force ||
7180  (force && (rli->checkpoint_seqno <= (rli->checkpoint_group - 1))) ||
7181  sql_slave_killed(thd, rli));
7182  mysql_mutex_lock(&rli->data_lock);
7183  }
7184  DBUG_RETURN(ev);
7185  }
7186  DBUG_ASSERT(thd==rli->info_thd);
7187  if (opt_reckless_slave) // For mysql-test
7188  cur_log->error = 0;
7189  if (cur_log->error < 0)
7190  {
7191  errmsg = "slave SQL thread aborted because of I/O error";
7192  if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
7193  /*
7194  MTS group status is set to MTS_KILLED_GROUP, whenever a read event
7195  error happens and there was already a non-terminal event scheduled.
7196  */
7197  rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
7198  if (hot_log)
7199  mysql_mutex_unlock(log_lock);
7200  goto err;
7201  }
7202  if (!cur_log->error) /* EOF */
7203  {
7204  /*
7205  On a hot log, EOF means that there are no more updates to
7206  process and we must block until I/O thread adds some and
7207  signals us to continue
7208  */
7209  if (hot_log)
7210  {
7211  /*
7212  We say in Seconds_Behind_Master that we have "caught up". Note that
7213  for example if network link is broken but I/O slave thread hasn't
7214  noticed it (slave_net_timeout not elapsed), then we'll say "caught
7215  up" whereas we're not really caught up. Fixing that would require
7216  internally cutting timeout in smaller pieces in network read, no
7217  thanks. Another example: SQL has caught up on I/O, now I/O has read
7218  a new event and is queuing it; the false "0" will exist until SQL
7219  finishes executing the new event; it will be look abnormal only if
7220  the events have old timestamps (then you get "many", 0, "many").
7221 
7222  Transient phases like this can be fixed with implemeting
7223  Heartbeat event which provides the slave the status of the
7224  master at time the master does not have any new update to send.
7225  Seconds_Behind_Master would be zero only when master has no
7226  more updates in binlog for slave. The heartbeat can be sent
7227  in a (small) fraction of slave_net_timeout. Until it's done
7228  rli->last_master_timestamp is temporarely (for time of
7229  waiting for the following event) reset whenever EOF is
7230  reached.
7231  */
7232 
7233  /* shows zero while it is sleeping (and until the next event
7234  is about to be executed). Note, in MTS case
7235  Seconds_Behind_Master resetting follows slightly different
7236  schema where reaching EOF is not enough. The status
7237  parameter is updated per some number of processed group of
7238  events. The number can't be greater than
7239  @@global.slave_checkpoint_group and anyway SBM updating
7240  rate does not exceed @@global.slave_checkpoint_period.
7241  Notice that SBM is set to a new value after processing the
7242  terminal event (e.g Commit) of a group. Coordinator resets
7243  SBM when notices no more groups left neither to read from
7244  Relay-log nor to process by Workers.
7245  */
7246  if (!rli->is_parallel_exec())
7247  rli->last_master_timestamp= 0;
7248 
7249  DBUG_ASSERT(rli->relay_log.get_open_count() ==
7250  rli->cur_log_old_open_count);
7251 
7252  if (rli->ign_master_log_name_end[0])
7253  {
7254  /* We generate and return a Rotate, to make our positions advance */
7255  DBUG_PRINT("info",("seeing an ignored end segment"));
7256  ev= new Rotate_log_event(rli->ign_master_log_name_end,
7257  0, rli->ign_master_log_pos_end,
7258  Rotate_log_event::DUP_NAME);
7259  rli->ign_master_log_name_end[0]= 0;
7260  mysql_mutex_unlock(log_lock);
7261  if (unlikely(!ev))
7262  {
7263  errmsg= "Slave SQL thread failed to create a Rotate event "
7264  "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
7265  goto err;
7266  }
7267  ev->server_id= 0; // don't be ignored by slave SQL thread
7268  DBUG_RETURN(ev);
7269  }
7270 
7271  /*
7272  We can, and should release data_lock while we are waiting for
7273  update. If we do not, show slave status will block
7274  */
7275  mysql_mutex_unlock(&rli->data_lock);
7276 
7277  /*
7278  Possible deadlock :
7279  - the I/O thread has reached log_space_limit
7280  - the SQL thread has read all relay logs, but cannot purge for some
7281  reason:
7282  * it has already purged all logs except the current one
7283  * there are other logs than the current one but they're involved in
7284  a transaction that finishes in the current one (or is not finished)
7285  Solution :
7286  Wake up the possibly waiting I/O thread, and set a boolean asking
7287  the I/O thread to temporarily ignore the log_space_limit
7288  constraint, because we do not want the I/O thread to block because of
7289  space (it's ok if it blocks for any other reason (e.g. because the
7290  master does not send anything). Then the I/O thread stops waiting
7291  and reads one more event and starts honoring log_space_limit again.
7292 
7293  If the SQL thread needs more events to be able to rotate the log (it
7294  might need to finish the current group first), then it can ask for one
7295  more at a time. Thus we don't outgrow the relay log indefinitely,
7296  but rather in a controlled manner, until the next rotate.
7297 
7298  When the SQL thread starts it sets ignore_log_space_limit to false.
7299  We should also reset ignore_log_space_limit to 0 when the user does
7300  RESET SLAVE, but in fact, no need as RESET SLAVE requires that the slave
7301  be stopped, and the SQL thread sets ignore_log_space_limit to 0 when
7302  it stops.
7303  */
7304  mysql_mutex_lock(&rli->log_space_lock);
7305 
7306  /*
7307  If we have reached the limit of the relay space and we
7308  are going to sleep, waiting for more events:
7309 
7310  1. If outside a group, SQL thread asks the IO thread
7311  to force a rotation so that the SQL thread purges
7312  logs next time it processes an event (thus space is
7313  freed).
7314 
7315  2. If in a group, SQL thread asks the IO thread to
7316  ignore the limit and queues yet one more event
7317  so that the SQL thread finishes the group and
7318  is are able to rotate and purge sometime soon.
7319  */
7320  if (rli->log_space_limit &&
7321  rli->log_space_limit < rli->log_space_total)
7322  {
7323  /* force rotation if not in an unfinished group */
7324  if (!rli->is_parallel_exec())
7325  {
7326  rli->sql_force_rotate_relay= !rli->is_in_group();
7327  }
7328  else
7329  {
7330  rli->sql_force_rotate_relay=
7331  (rli->mts_group_status != Relay_log_info::MTS_IN_GROUP);
7332  }
7333  /* ask for one more event */
7334  rli->ignore_log_space_limit= true;
7335  }
7336 
7337  /*
7338  If the I/O thread is blocked, unblock it. Ok to broadcast
7339  after unlock, because the mutex is only destroyed in
7340  ~Relay_log_info(), i.e. when rli is destroyed, and rli will
7341  not be destroyed before we exit the present function.
7342  */
7343  mysql_mutex_unlock(&rli->log_space_lock);
7344  mysql_cond_broadcast(&rli->log_space_cond);
7345  // Note that wait_for_update_relay_log unlocks lock_log !
7346 
7347  if (rli->is_parallel_exec() && (opt_mts_checkpoint_period != 0 ||
7348  DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0)))
7349  {
7350  int ret= 0;
7351  struct timespec waittime;
7352  ulonglong period= static_cast<ulonglong>(opt_mts_checkpoint_period * 1000000ULL);
7353  ulong signal_cnt= rli->relay_log.signal_cnt;
7354 
7355  mysql_mutex_unlock(log_lock);
7356  do
7357  {
7358  /*
7359  At this point the coordinator has no job to delegate to workers.
7360  However, workers are executing their assigned jobs and as such
7361  the checkpoint routine must be periodically invoked.
7362  */
7363  (void) mts_checkpoint_routine(rli, period, false, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
7364  mysql_mutex_lock(log_lock);
7365  // More to the empty relay-log all assigned events done so reset it.
7366  if (rli->gaq->empty())
7367  rli->last_master_timestamp= 0;
7368 
7369  if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
7370  period= 10000000ULL;
7371 
7372  set_timespec_nsec(waittime, period);
7373  ret= rli->relay_log.wait_for_update_relay_log(thd, &waittime);
7374  } while ((ret == ETIMEDOUT || ret == ETIME) /* todo:remove */ &&
7375  signal_cnt == rli->relay_log.signal_cnt && !thd->killed);
7376  }
7377  else
7378  {
7379  rli->relay_log.wait_for_update_relay_log(thd, NULL);
7380  }
7381 
7382  // re-acquire data lock since we released it earlier
7383  mysql_mutex_lock(&rli->data_lock);
7384  continue;
7385  }
7386  /*
7387  If the log was not hot, we need to move to the next log in
7388  sequence. The next log could be hot or cold, we deal with both
7389  cases separately after doing some common initialization
7390  */
7391  end_io_cache(cur_log);
7392  DBUG_ASSERT(rli->cur_log_fd >= 0);
7393  mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
7394  rli->cur_log_fd = -1;
7395 
7396  if (relay_log_purge)
7397  {
7398  /*
7399  purge_first_log will properly set up relay log coordinates in rli.
7400  If the group's coordinates are equal to the event's coordinates
7401  (i.e. the relay log was not rotated in the middle of a group),
7402  we can purge this relay log too.
7403  We do ulonglong and string comparisons, this may be slow but
7404  - purging the last relay log is nice (it can save 1GB of disk), so we
7405  like to detect the case where we can do it, and given this,
7406  - I see no better detection method
7407  - purge_first_log is not called that often
7408  */
7409  if (rli->relay_log.purge_first_log
7410  (rli,
7411  rli->get_group_relay_log_pos() == rli->get_event_relay_log_pos()
7412  && !strcmp(rli->get_group_relay_log_name(),rli->get_event_relay_log_name())))
7413  {
7414  errmsg = "Error purging processed logs";
7415  goto err;
7416  }
7417  DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
7418  rli->get_group_master_log_name(),
7419  (ulong) rli->get_group_master_log_pos(),
7420  rli->get_group_relay_log_name(),
7421  (ulong) rli->get_group_relay_log_pos(),
7422  rli->get_event_relay_log_name(),
7423  (ulong) rli->get_event_relay_log_pos()));
7424  }
7425  else
7426  {
7427  /*
7428  If hot_log is set, then we already have a lock on
7429  LOCK_log. If not, we have to get the lock.
7430 
7431  According to Sasha, the only time this code will ever be executed
7432  is if we are recovering from a bug.
7433  */
7434  if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
7435  {
7436  errmsg = "error switching to the next log";
7437  goto err;
7438  }
7439  rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
7440  rli->set_event_relay_log_name(rli->linfo.log_file_name);
7441  /*
7442  We may update the worker here but this is not extremlly
7443  necessary. /Alfranio
7444  */
7445  rli->flush_info();
7446  }
7447 
7448  /* Reset the relay-log-change-notified status of Slave Workers */
7449  if (rli->is_parallel_exec())
7450  {
7451  DBUG_PRINT("info", ("next_event: MTS group relay log changes to %s %lu\n",
7452  rli->get_group_relay_log_name(),
7453  (ulong) rli->get_group_relay_log_pos()));
7455  }
7456 
7457  /*
7458  Now we want to open this next log. To know if it's a hot log (the one
7459  being written by the I/O thread now) or a cold log, we can use
7460  is_active(); if it is hot, we use the I/O cache; if it's cold we open
7461  the file normally. But if is_active() reports that the log is hot, this
7462  may change between the test and the consequence of the test. So we may
7463  open the I/O cache whereas the log is now cold, which is nonsense.
7464  To guard against this, we need to have LOCK_log.
7465  */
7466 
7467  DBUG_PRINT("info",("hot_log: %d",hot_log));
7468  if (!hot_log) /* if hot_log, we already have this mutex */
7469  mysql_mutex_lock(log_lock);
7470  if (rli->relay_log.is_active(rli->linfo.log_file_name))
7471  {
7472 #ifdef EXTRA_DEBUG
7473  if (log_warnings)
7474  sql_print_information("next log '%s' is currently active",
7475  rli->linfo.log_file_name);
7476 #endif
7477  rli->cur_log= cur_log= rli->relay_log.get_log_file();
7478  rli->cur_log_old_open_count= rli->relay_log.get_open_count();
7479  DBUG_ASSERT(rli->cur_log_fd == -1);
7480 
7481  /*
7482  When the SQL thread is [stopped and] (re)started the
7483  following may happen:
7484 
7485  1. Log was hot at stop time and remains hot at restart
7486 
7487  SQL thread reads again from hot_log (SQL thread was
7488  reading from the active log when it was stopped and the
7489  very same log is still active on SQL thread restart).
7490 
7491  In this case, my_b_seek is performed on cur_log, while
7492  cur_log points to relay_log.get_log_file();
7493 
7494  2. Log was hot at stop time but got cold before restart
7495 
7496  The log was hot when SQL thread stopped, but it is not
7497  anymore when the SQL thread restarts.
7498 
7499  In this case, the SQL thread reopens the log, using
7500  cache_buf, ie, cur_log points to &cache_buf, and thence
7501  its coordinates are reset.
7502 
7503  3. Log was already cold at stop time
7504 
7505  The log was not hot when the SQL thread stopped, and, of
7506  course, it will not be hot when it restarts.
7507 
7508  In this case, the SQL thread opens the cold log again,
7509  using cache_buf, ie, cur_log points to &cache_buf, and
7510  thence its coordinates are reset.
7511 
7512  4. Log was hot at stop time, DBA changes to previous cold
7513  log and restarts SQL thread
7514 
7515  The log was hot when the SQL thread was stopped, but the
7516  user changed the coordinates of the SQL thread to
7517  restart from a previous cold log.
7518 
7519  In this case, at start time, cur_log points to a cold
7520  log, opened using &cache_buf as cache, and coordinates
7521  are reset. However, as it moves on to the next logs, it
7522  will eventually reach the hot log. If the hot log is the
7523  same at the time the SQL thread was stopped, then
7524  coordinates were not reset - the cur_log will point to
7525  relay_log.get_log_file(), and not a freshly opened
7526  IO_CACHE through cache_buf. For this reason we need to
7527  deploy a my_b_seek before calling check_binlog_magic at
7528  this point of the code (see: BUG#55263 for more
7529  details).
7530 
7531  NOTES:
7532  - We must keep the LOCK_log to read the 4 first bytes, as
7533  this is a hot log (same as when we call read_log_event()
7534  above: for a hot log we take the mutex).
7535 
7536  - Because of scenario #4 above, we need to have a
7537  my_b_seek here. Otherwise, we might hit the assertion
7538  inside check_binlog_magic.
7539  */
7540 
7541  my_b_seek(cur_log, (my_off_t) 0);
7542  if (check_binlog_magic(cur_log,&errmsg))
7543  {
7544  if (!hot_log)
7545  mysql_mutex_unlock(log_lock);
7546  goto err;
7547  }
7548  if (!hot_log)
7549  mysql_mutex_unlock(log_lock);
7550  continue;
7551  }
7552  if (!hot_log)
7553  mysql_mutex_unlock(log_lock);
7554  /*
7555  if we get here, the log was not hot, so we will have to open it
7556  ourselves. We are sure that the log is still not hot now (a log can get
7557  from hot to cold, but not from cold to hot). No need for LOCK_log.
7558  */
7559 #ifdef EXTRA_DEBUG
7560  if (log_warnings)
7561  sql_print_information("next log '%s' is not active",
7562  rli->linfo.log_file_name);
7563 #endif
7564  // open_binlog_file() will check the magic header
7565  if ((rli->cur_log_fd=open_binlog_file(cur_log,rli->linfo.log_file_name,
7566  &errmsg)) <0)
7567  goto err;
7568  }
7569  else
7570  {
7571  /*
7572  Read failed with a non-EOF error.
7573  TODO: come up with something better to handle this error
7574  */
7575  if (hot_log)
7576  mysql_mutex_unlock(log_lock);
7577  sql_print_error("Slave SQL thread: I/O error reading \
7578 event(errno: %d cur_log->error: %d)",
7579  my_errno,cur_log->error);
7580  // set read position to the beginning of the event
7581  my_b_seek(cur_log,rli->get_event_relay_log_pos());
7582  /* otherwise, we have had a partial read */
7583  errmsg = "Aborting slave SQL thread because of partial event read";
7584  break; // To end of function
7585  }
7586  }
7587  if (!errmsg && log_warnings)
7588  {
7589  sql_print_information("Error reading relay log event: %s",
7590  "slave SQL thread was killed");
7591  DBUG_RETURN(0);
7592  }
7593 
7594 err:
7595  if (errmsg)
7596  sql_print_error("Error reading relay log event: %s", errmsg);
7597  DBUG_RETURN(0);
7598 }
7599 
7600 /*
7601  Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
7602  because of size is simpler because when we do it we already have all relevant
7603  locks; here we don't, so this function is mainly taking locks).
7604  Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
7605  is void).
7606 */
7607 
7608 int rotate_relay_log(Master_info* mi)
7609 {
7610  DBUG_ENTER("rotate_relay_log");
7611 
7612  mysql_mutex_assert_owner(&mi->data_lock);
7613  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
7614 
7615  Relay_log_info* rli= mi->rli;
7616  int error= 0;
7617 
7618  /*
7619  We need to test inited because otherwise, new_file() will attempt to lock
7620  LOCK_log, which may not be inited (if we're not a slave).
7621  */
7622  if (!rli->inited)
7623  {
7624  DBUG_PRINT("info", ("rli->inited == 0"));
7625  goto end;
7626  }
7627 
7628  /* If the relay log is closed, new_file() will do nothing. */
7629  error= rli->relay_log.new_file(mi->get_mi_description_event());
7630  if (error != 0)
7631  goto end;
7632 
7633  /*
7634  We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
7635  be counted, so imagine a succession of FLUSH LOGS and assume the slave
7636  threads are started:
7637  relay_log_space decreases by the size of the deleted relay log, but does
7638  not increase, so flush-after-flush we may become negative, which is wrong.
7639  Even if this will be corrected as soon as a query is replicated on the
7640  slave (because the I/O thread will then call harvest_bytes_written() which
7641  will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
7642  output in SHOW SLAVE STATUS meanwhile. So we harvest now.
7643  If the log is closed, then this will just harvest the last writes, probably
7644  0 as they probably have been harvested.
7645  */
7646  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
7647 end:
7648  DBUG_RETURN(error);
7649 }
7650 
7651 
7667 bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
7668  bool (*pred)(const void *), const void *param)
7669 {
7670  struct st_version_range_for_one_bug {
7671  uint bug_id;
7672  const uchar introduced_in[3]; // first version with bug
7673  const uchar fixed_in[3]; // first version with fix
7674  };
7675  static struct st_version_range_for_one_bug versions_for_all_bugs[]=
7676  {
7677  {24432, { 5, 0, 24 }, { 5, 0, 38 } },
7678  {24432, { 5, 1, 12 }, { 5, 1, 17 } },
7679  {33029, { 5, 0, 0 }, { 5, 0, 58 } },
7680  {33029, { 5, 1, 0 }, { 5, 1, 12 } },
7681  {37426, { 5, 1, 0 }, { 5, 1, 26 } },
7682  };
7683  const uchar *master_ver=
7684  rli->get_rli_description_event()->server_version_split;
7685 
7686  DBUG_ASSERT(sizeof(rli->get_rli_description_event()->server_version_split) == 3);
7687 
7688  for (uint i= 0;
7689  i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
7690  {
7691  const uchar *introduced_in= versions_for_all_bugs[i].introduced_in,
7692  *fixed_in= versions_for_all_bugs[i].fixed_in;
7693  if ((versions_for_all_bugs[i].bug_id == bug_id) &&
7694  (memcmp(introduced_in, master_ver, 3) <= 0) &&
7695  (memcmp(fixed_in, master_ver, 3) > 0) &&
7696  (pred == NULL || (*pred)(param)))
7697  {
7698  if (!report)
7699  return TRUE;
7700  // a short message for SHOW SLAVE STATUS (message length constraints)
7701  my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
7702  " http://bugs.mysql.com/bug.php?id=%u"
7703  " so slave stops; check error log on slave"
7704  " for more info", MYF(0), bug_id);
7705  // a verbose message for the error log
7706  rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
7707  "According to the master's version ('%s'),"
7708  " it is probable that master suffers from this bug:"
7709  " http://bugs.mysql.com/bug.php?id=%u"
7710  " and thus replicating the current binary log event"
7711  " may make the slave's data become different from the"
7712  " master's data."
7713  " To take no risk, slave refuses to replicate"
7714  " this event and stops."
7715  " We recommend that all updates be stopped on the"
7716  " master and slave, that the data of both be"
7717  " manually synchronized,"
7718  " that master's binary logs be deleted,"
7719  " that master be upgraded to a version at least"
7720  " equal to '%d.%d.%d'. Then replication can be"
7721  " restarted.",
7722  rli->get_rli_description_event()->server_version,
7723  bug_id,
7724  fixed_in[0], fixed_in[1], fixed_in[2]);
7725  return TRUE;
7726  }
7727  }
7728  return FALSE;
7729 }
7730 
7741 bool rpl_master_erroneous_autoinc(THD *thd)
7742 {
7743  if (active_mi != NULL && active_mi->rli->info_thd == thd)
7744  {
7745  Relay_log_info *rli= active_mi->rli;
7746  DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;);
7747  return rpl_master_has_bug(rli, 33029, FALSE, NULL, NULL);
7748  }
7749  return FALSE;
7750 }
7751 
7758 uint sql_slave_skip_counter;
7759 
7773 int start_slave(THD* thd , Master_info* mi, bool net_report)
7774 {
7775  int slave_errno= 0;
7776  int thread_mask;
7777  DBUG_ENTER("start_slave");
7778 
7779  if (check_access(thd, SUPER_ACL, any_db, NULL, NULL, 0, 0))
7780  DBUG_RETURN(1);
7781 
7782  if (thd->lex->slave_connection.user ||
7783  thd->lex->slave_connection.password)
7784  {
7785 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
7786  if (thd->vio_ok() && !thd->net.vio->ssl_arg)
7787  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
7788  ER_INSECURE_PLAIN_TEXT,
7789  ER(ER_INSECURE_PLAIN_TEXT));
7790 #endif
7791 #if !defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
7792  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
7793  ER_INSECURE_PLAIN_TEXT,
7794  ER(ER_INSECURE_PLAIN_TEXT));
7795 #endif
7796  }
7797 
7798  lock_slave_threads(mi); // this allows us to cleanly read slave_running
7799  // Get a mask of _stopped_ threads
7800  init_thread_mask(&thread_mask,mi,1 /* inverse */);
7801  /*
7802  Below we will start all stopped threads. But if the user wants to
7803  start only one thread, do as if the other thread was running (as we
7804  don't wan't to touch the other thread), so set the bit to 0 for the
7805  other thread
7806  */
7807  if (thd->lex->slave_thd_opt)
7808  thread_mask&= thd->lex->slave_thd_opt;
7809  if (thread_mask) //some threads are stopped, start them
7810  {
7811  if (global_init_info(mi, false, thread_mask))
7812  slave_errno=ER_MASTER_INFO;
7813  else if (server_id_supplied && *mi->host)
7814  {
7815  /*
7816  If we will start IO thread we need to take care of possible
7817  options provided through the START SLAVE if there is any.
7818  */
7819  if (thread_mask & SLAVE_IO)
7820  {
7821  if (thd->lex->slave_connection.user)
7822  {
7823  mi->set_start_user_configured(true);
7824  mi->set_user(thd->lex->slave_connection.user);
7825  }
7826  if (thd->lex->slave_connection.password)
7827  {
7828  mi->set_start_user_configured(true);
7829  mi->set_password(thd->lex->slave_connection.password,
7830  strlen(thd->lex->slave_connection.password));
7831  }
7832  if (thd->lex->slave_connection.plugin_auth)
7833  mi->set_plugin_auth(thd->lex->slave_connection.plugin_auth);
7834  if (thd->lex->slave_connection.plugin_dir)
7835  mi->set_plugin_dir(thd->lex->slave_connection.plugin_dir);
7836  }
7837 
7838  /*
7839  If we will start SQL thread we will care about UNTIL options If
7840  not and they are specified we will ignore them and warn user
7841  about this fact.
7842  */
7843  if (thread_mask & SLAVE_SQL)
7844  {
7845  /*
7846  To cache the MTS system var values and used them in the following
7847  runtime. The system var:s can change meanwhile but having no other
7848  effects.
7849  */
7850  mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
7851 #ifndef DBUG_OFF
7852  if (!DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
7853 #endif
7854  mi->rli->checkpoint_group= opt_mts_checkpoint_group;
7855 
7856  mysql_mutex_lock(&mi->rli->data_lock);
7857 
7858  if (thd->lex->mi.pos)
7859  {
7860  if (thd->lex->mi.relay_log_pos)
7861  slave_errno= ER_BAD_SLAVE_UNTIL_COND;
7862  mi->rli->until_condition= Relay_log_info::UNTIL_MASTER_POS;
7863  mi->rli->until_log_pos= thd->lex->mi.pos;
7864  /*
7865  We don't check thd->lex->mi.log_file_name for NULL here
7866  since it is checked in sql_yacc.yy
7867  */
7868  strmake(mi->rli->until_log_name, thd->lex->mi.log_file_name,
7869  sizeof(mi->rli->until_log_name)-1);
7870  }
7871  else if (thd->lex->mi.relay_log_pos)
7872  {
7873  if (thd->lex->mi.pos)
7874  slave_errno= ER_BAD_SLAVE_UNTIL_COND;
7875  mi->rli->until_condition= Relay_log_info::UNTIL_RELAY_POS;
7876  mi->rli->until_log_pos= thd->lex->mi.relay_log_pos;
7877  strmake(mi->rli->until_log_name, thd->lex->mi.relay_log_name,
7878  sizeof(mi->rli->until_log_name)-1);
7879  }
7880  else if (thd->lex->mi.gtid)
7881  {
7882  global_sid_lock->wrlock();
7883  mi->rli->clear_until_condition();
7884  if (mi->rli->until_sql_gtids.add_gtid_text(thd->lex->mi.gtid)
7885  != RETURN_STATUS_OK)
7886  slave_errno= ER_BAD_SLAVE_UNTIL_COND;
7887  else {
7888  mi->rli->until_condition=
7889  LEX_MASTER_INFO::UNTIL_SQL_BEFORE_GTIDS == thd->lex->mi.gtid_until_condition
7890  ? Relay_log_info::UNTIL_SQL_BEFORE_GTIDS
7891  : Relay_log_info::UNTIL_SQL_AFTER_GTIDS;
7892  if ((mi->rli->until_condition ==
7893  Relay_log_info::UNTIL_SQL_AFTER_GTIDS) &&
7894  mi->rli->opt_slave_parallel_workers != 0)
7895  {
7896  mi->rli->opt_slave_parallel_workers= 0;
7897  push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
7898  ER_MTS_FEATURE_IS_NOT_SUPPORTED,
7899  ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
7900  "UNTIL condtion",
7901  "Slave is started in the sequential execution mode.");
7902  }
7903  }
7904  global_sid_lock->unlock();
7905  }
7906  else if (thd->lex->mi.until_after_gaps)
7907  {
7908  mi->rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
7909  mi->rli->opt_slave_parallel_workers=
7910  mi->rli->recovery_parallel_workers;
7911  }
7912  else
7913  mi->rli->clear_until_condition();
7914 
7915  if (mi->rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
7916  mi->rli->until_condition == Relay_log_info::UNTIL_RELAY_POS)
7917  {
7918  /* Preparing members for effective until condition checking */
7919  const char *p= fn_ext(mi->rli->until_log_name);
7920  char *p_end;
7921  if (*p)
7922  {
7923  //p points to '.'
7924  mi->rli->until_log_name_extension= strtoul(++p,&p_end, 10);
7925  /*
7926  p_end points to the first invalid character. If it equals
7927  to p, no digits were found, error. If it contains '\0' it
7928  means conversion went ok.
7929  */
7930  if (p_end==p || *p_end)
7931  slave_errno=ER_BAD_SLAVE_UNTIL_COND;
7932  }
7933  else
7934  slave_errno=ER_BAD_SLAVE_UNTIL_COND;
7935 
7936  /* mark the cached result of the UNTIL comparison as "undefined" */
7937  mi->rli->until_log_names_cmp_result=
7938  Relay_log_info::UNTIL_LOG_NAMES_CMP_UNKNOWN;
7939 
7940  /* Issuing warning then started without --skip-slave-start */
7941  if (!opt_skip_slave_start)
7942  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
7943  ER_MISSING_SKIP_SLAVE,
7944  ER(ER_MISSING_SKIP_SLAVE));
7945  if (mi->rli->opt_slave_parallel_workers != 0)
7946  {
7947  mi->rli->opt_slave_parallel_workers= 0;
7948  push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
7949  ER_MTS_FEATURE_IS_NOT_SUPPORTED,
7950  ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
7951  "UNTIL condtion",
7952  "Slave is started in the sequential execution mode.");
7953  }
7954  }
7955 
7956  mysql_mutex_unlock(&mi->rli->data_lock);
7957 
7958  /* MTS technical limitation no support of trans retry */
7959  if (mi->rli->opt_slave_parallel_workers != 0 && slave_trans_retries != 0)
7960  {
7961  push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
7962  ER_MTS_FEATURE_IS_NOT_SUPPORTED,
7963  ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
7964  "slave_transaction_retries",
7965  "In the event of a transient failure, the slave will "
7966  "not retry the transaction and will stop.");
7967  }
7968  }
7969  else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos || thd->lex->mi.gtid)
7970  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED,
7971  ER(ER_UNTIL_COND_IGNORED));
7972 
7973  if (!slave_errno)
7974  slave_errno = start_slave_threads(false/*need_lock_slave=false*/,
7975  true/*wait_for_start=true*/,
7976  mi,
7977  thread_mask);
7978  }
7979  else
7980  slave_errno = ER_BAD_SLAVE;
7981  }
7982  else
7983  {
7984  /* no error if all threads are already started, only a warning */
7985  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING,
7986  ER(ER_SLAVE_WAS_RUNNING));
7987  }
7988 
7989  /*
7990  Clean up start information if there was an attempt to start
7991  the IO thread to avoid any security issue.
7992  */
7993  if (slave_errno &&
7994  (thread_mask & SLAVE_IO) == SLAVE_IO)
7995  mi->reset_start_info();
7996 
7997  unlock_slave_threads(mi);
7998 
7999  if (slave_errno)
8000  {
8001  if (net_report)
8002  my_message(slave_errno, ER(slave_errno), MYF(0));
8003  DBUG_RETURN(1);
8004  }
8005  else if (net_report)
8006  my_ok(thd);
8007 
8008  DBUG_RETURN(0);
8009 }
8010 
8011 
8025 int stop_slave(THD* thd, Master_info* mi, bool net_report )
8026 {
8027  DBUG_ENTER("stop_slave");
8028 
8029  int slave_errno;
8030  if (!thd)
8031  thd = current_thd;
8032 
8033  if (check_access(thd, SUPER_ACL, any_db, NULL, NULL, 0, 0))
8034  DBUG_RETURN(1);
8035  THD_STAGE_INFO(thd, stage_killing_slave);
8036  int thread_mask;
8037  lock_slave_threads(mi);
8038  // Get a mask of _running_ threads
8039  init_thread_mask(&thread_mask,mi,0 /* not inverse*/);
8040  /*
8041  Below we will stop all running threads.
8042  But if the user wants to stop only one thread, do as if the other thread
8043  was stopped (as we don't wan't to touch the other thread), so set the
8044  bit to 0 for the other thread
8045  */
8046  if (thd->lex->slave_thd_opt)
8047  thread_mask &= thd->lex->slave_thd_opt;
8048 
8049  if (thread_mask)
8050  {
8051  slave_errno= terminate_slave_threads(mi,thread_mask,
8052  false/*need_lock_term=false*/);
8053  }
8054  else
8055  {
8056  //no error if both threads are already stopped, only a warning
8057  slave_errno= 0;
8058  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_NOT_RUNNING,
8059  ER(ER_SLAVE_WAS_NOT_RUNNING));
8060  }
8061  unlock_slave_threads(mi);
8062 
8063  if (slave_errno)
8064  {
8065  if ((slave_errno == ER_STOP_SLAVE_SQL_THREAD_TIMEOUT) ||
8066  (slave_errno == ER_STOP_SLAVE_IO_THREAD_TIMEOUT))
8067  {
8068  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, slave_errno,
8069  ER(slave_errno));
8070  sql_print_warning("%s",ER(slave_errno));
8071  }
8072  if (net_report)
8073  my_message(slave_errno, ER(slave_errno), MYF(0));
8074  DBUG_RETURN(1);
8075  }
8076  else if (net_report)
8077  my_ok(thd);
8078 
8079  DBUG_RETURN(0);
8080 }
8081 
8082 
8094 int reset_slave(THD *thd, Master_info* mi)
8095 {
8096  int thread_mask= 0, error= 0;
8097  uint sql_errno=ER_UNKNOWN_ERROR;
8098  const char* errmsg= "Unknown error occured while reseting slave";
8099  DBUG_ENTER("reset_slave");
8100 
8101  lock_slave_threads(mi);
8102  init_thread_mask(&thread_mask,mi,0 /* not inverse */);
8103  if (thread_mask) // We refuse if any slave thread is running
8104  {
8105  sql_errno= ER_SLAVE_MUST_STOP;
8106  error=1;
8107  goto err;
8108  }
8109 
8110  ha_reset_slave(thd);
8111 
8112  // delete relay logs, clear relay log coordinates
8113  if ((error= mi->rli->purge_relay_logs(thd,
8114  1 /* just reset */,
8115  &errmsg)))
8116  {
8117  sql_errno= ER_RELAY_LOG_FAIL;
8118  goto err;
8119  }
8120 
8121  /* Clear master's log coordinates and associated information */
8122  DBUG_ASSERT(!mi->rli || !mi->rli->slave_running); // none writes in rli table
8123  mi->clear_in_memory_info(thd->lex->reset_slave_info.all);
8124 
8125  if (remove_info(mi))
8126  {
8127  error= 1;
8128  goto err;
8129  }
8130 
8131  (void) RUN_HOOK(binlog_relay_io, after_reset_slave, (thd, mi));
8132 err:
8133  unlock_slave_threads(mi);
8134  if (error)
8135  my_error(sql_errno, MYF(0), errmsg);
8136  DBUG_RETURN(error);
8137 }
8138 
8152 bool change_master(THD* thd, Master_info* mi)
8153 {
8154  int thread_mask;
8155  const char* errmsg= 0;
8156  bool need_relay_log_purge= 1;
8157  char *var_master_log_name= NULL, *var_group_master_log_name= NULL;
8158  bool ret= false;
8159  char saved_host[HOSTNAME_LENGTH + 1], saved_bind_addr[HOSTNAME_LENGTH + 1];
8160  uint saved_port= 0;
8161  char saved_log_name[FN_REFLEN];
8162  my_off_t saved_log_pos= 0;
8163  my_bool save_relay_log_purge= relay_log_purge;
8164  bool mts_remove_workers= false;
8165 
8166  DBUG_ENTER("change_master");
8167 
8168  lock_slave_threads(mi);
8169  init_thread_mask(&thread_mask,mi,0 /*not inverse*/);
8170  LEX_MASTER_INFO* lex_mi= &thd->lex->mi;
8171  if (thread_mask) // We refuse if any slave thread is running
8172  {
8173  my_message(ER_SLAVE_MUST_STOP, ER(ER_SLAVE_MUST_STOP), MYF(0));
8174  ret= true;
8175  goto err;
8176  }
8177  thread_mask= SLAVE_IO | SLAVE_SQL;
8178 
8179  THD_STAGE_INFO(thd, stage_changing_master);
8180  /*
8181  We need to check if there is an empty master_host. Otherwise
8182  change master succeeds, a master.info file is created containing
8183  empty master_host string and when issuing: start slave; an error
8184  is thrown stating that the server is not configured as slave.
8185  (See BUG#28796).
8186  */
8187  if(lex_mi->host && !*lex_mi->host)
8188  {
8189  my_error(ER_WRONG_ARGUMENTS, MYF(0), "MASTER_HOST");
8190  unlock_slave_threads(mi);
8191  DBUG_RETURN(TRUE);
8192  }
8193  if (global_init_info(mi, false, thread_mask))
8194  {
8195  my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0));
8196  ret= true;
8197  goto err;
8198  }
8199  if (mi->rli->mts_recovery_group_cnt)
8200  {
8201  /*
8202  Change-Master can't be done if there is a mts group gap.
8203  That requires mts-recovery which START SLAVE provides.
8204  */
8205  DBUG_ASSERT(mi->rli->recovery_parallel_workers);
8206 
8207  my_message(ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS,
8208  ER(ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS), MYF(0));
8209  ret= true;
8210  goto err;
8211  }
8212  else
8213  {
8214  /*
8215  Lack of mts group gaps makes Workers info stale
8216  regardless of need_relay_log_purge computation.
8217  */
8218  if (mi->rli->recovery_parallel_workers)
8219  mts_remove_workers= true;
8220  }
8221  /*
8222  We cannot specify auto position and set either the coordinates
8223  on master or slave. If we try to do so, an error message is
8224  printed out.
8225  */
8226  if (lex_mi->log_file_name != NULL || lex_mi->pos != 0 ||
8227  lex_mi->relay_log_name != NULL || lex_mi->relay_log_pos != 0)
8228  {
8229  if (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE ||
8230  (lex_mi->auto_position != LEX_MASTER_INFO::LEX_MI_DISABLE &&
8231  mi->is_auto_position()))
8232  {
8233  my_message(ER_BAD_SLAVE_AUTO_POSITION,
8234  ER(ER_BAD_SLAVE_AUTO_POSITION), MYF(0));
8235  ret= true;
8236  goto err;
8237  }
8238  }
8239 
8240  // CHANGE MASTER TO MASTER_AUTO_POSITION = 1 requires GTID_MODE = ON
8241  if (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE && gtid_mode != 3)
8242  {
8243  my_message(ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON,
8244  ER(ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON), MYF(0));
8245  ret= true;
8246  goto err;
8247  }
8248 
8249  /*
8250  Data lock not needed since we have already stopped the running threads,
8251  and we have the hold on the run locks which will keep all threads that
8252  could possibly modify the data structures from running
8253  */
8254 
8255  /*
8256  Before processing the command, save the previous state.
8257  */
8258  strmake(saved_host, mi->host, HOSTNAME_LENGTH);
8259  strmake(saved_bind_addr, mi->bind_addr, HOSTNAME_LENGTH);
8260  saved_port= mi->port;
8261  strmake(saved_log_name, mi->get_master_log_name(), FN_REFLEN - 1);
8262  saved_log_pos= mi->get_master_log_pos();
8263 
8264  /*
8265  If the user specified host or port without binlog or position,
8266  reset binlog's name to FIRST and position to 4.
8267  */
8268 
8269  if ((lex_mi->host && strcmp(lex_mi->host, mi->host)) ||
8270  (lex_mi->port && lex_mi->port != mi->port))
8271  {
8272  /*
8273  This is necessary because the primary key, i.e. host or port, has
8274  changed.
8275 
8276  The repository does not support direct changes on the primary key,
8277  so the row is dropped and re-inserted with a new primary key. If we
8278  don't do that, the master info repository we will end up with several
8279  rows.
8280  */
8281  if (mi->clean_info())
8282  {
8283  ret= true;
8284  goto err;
8285  }
8286  mi->master_uuid[0]= 0;
8287  mi->master_id= 0;
8288  }
8289 
8290  if ((lex_mi->host || lex_mi->port) && !lex_mi->log_file_name && !lex_mi->pos)
8291  {
8292  var_master_log_name= const_cast<char*>(mi->get_master_log_name());
8293  var_master_log_name[0]= '\0';
8294  mi->set_master_log_pos(BIN_LOG_HEADER_SIZE);
8295  }
8296 
8297  if (lex_mi->log_file_name)
8298  mi->set_master_log_name(lex_mi->log_file_name);
8299  if (lex_mi->pos)
8300  {
8301  mi->set_master_log_pos(lex_mi->pos);
8302  }
8303  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
8304 
8305  if (lex_mi->user || lex_mi->password)
8306  {
8307 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
8308  if (thd->vio_ok() && !thd->net.vio->ssl_arg)
8309  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8310  ER_INSECURE_PLAIN_TEXT,
8311  ER(ER_INSECURE_PLAIN_TEXT));
8312 #endif
8313 #if !defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
8314  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8315  ER_INSECURE_PLAIN_TEXT,
8316  ER(ER_INSECURE_PLAIN_TEXT));
8317 #endif
8318  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8319  ER_INSECURE_CHANGE_MASTER,
8320  ER(ER_INSECURE_CHANGE_MASTER));
8321  }
8322 
8323  if (lex_mi->user)
8324  mi->set_user(lex_mi->user);
8325 
8326  if (lex_mi->password)
8327  {
8328  if (mi->set_password(lex_mi->password, strlen(lex_mi->password)))
8329  {
8330  /*
8331  After implementing WL#5769, we should create a better error message
8332  to denote that the call may have failed due to an error while trying
8333  to encrypt/store the password in a secure key store.
8334  */
8335  my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0));
8336  ret= false;
8337  goto err;
8338  }
8339  }
8340  if (lex_mi->host)
8341  strmake(mi->host, lex_mi->host, sizeof(mi->host)-1);
8342  if (lex_mi->bind_addr)
8343  strmake(mi->bind_addr, lex_mi->bind_addr, sizeof(mi->bind_addr)-1);
8344  if (lex_mi->port)
8345  mi->port = lex_mi->port;
8346  if (lex_mi->connect_retry)
8347  mi->connect_retry = lex_mi->connect_retry;
8348  if (lex_mi->retry_count_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
8349  mi->retry_count = lex_mi->retry_count;
8350  if (lex_mi->heartbeat_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
8351  mi->heartbeat_period = lex_mi->heartbeat_period;
8352  else
8353  mi->heartbeat_period= min<float>(SLAVE_MAX_HEARTBEAT_PERIOD,
8354  (slave_net_timeout/2.0));
8355  mi->received_heartbeats= LL(0); // counter lives until master is CHANGEd
8356  /*
8357  reset the last time server_id list if the current CHANGE MASTER
8358  is mentioning IGNORE_SERVER_IDS= (...)
8359  */
8360  if (lex_mi->repl_ignore_server_ids_opt == LEX_MASTER_INFO::LEX_MI_ENABLE)
8361  reset_dynamic(&(mi->ignore_server_ids->dynamic_ids));
8362  for (uint i= 0; i < lex_mi->repl_ignore_server_ids.elements; i++)
8363  {
8364  ulong s_id;
8365  get_dynamic(&lex_mi->repl_ignore_server_ids, (uchar*) &s_id, i);
8366  if (s_id == ::server_id && replicate_same_server_id)
8367  {
8368  my_error(ER_SLAVE_IGNORE_SERVER_IDS, MYF(0), static_cast<int>(s_id));
8369  ret= TRUE;
8370  goto err;
8371  }
8372  else
8373  {
8374  if (bsearch((const ulong *) &s_id,
8375  mi->ignore_server_ids->dynamic_ids.buffer,
8376  mi->ignore_server_ids->dynamic_ids.elements, sizeof(ulong),
8377  (int (*) (const void*, const void*))
8378  change_master_server_id_cmp) == NULL)
8379  insert_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id);
8380  }
8381  }
8382  sort_dynamic(&(mi->ignore_server_ids->dynamic_ids), (qsort_cmp) change_master_server_id_cmp);
8383 
8384  if (lex_mi->ssl != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
8385  mi->ssl= (lex_mi->ssl == LEX_MASTER_INFO::LEX_MI_ENABLE);
8386 
8387  if (lex_mi->sql_delay != -1)
8388  mi->rli->set_sql_delay(lex_mi->sql_delay);
8389 
8390  if (lex_mi->ssl_verify_server_cert != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
8391  mi->ssl_verify_server_cert=
8392  (lex_mi->ssl_verify_server_cert == LEX_MASTER_INFO::LEX_MI_ENABLE);
8393 
8394  if (lex_mi->ssl_ca)
8395  strmake(mi->ssl_ca, lex_mi->ssl_ca, sizeof(mi->ssl_ca)-1);
8396  if (lex_mi->ssl_capath)
8397  strmake(mi->ssl_capath, lex_mi->ssl_capath, sizeof(mi->ssl_capath)-1);
8398  if (lex_mi->ssl_cert)
8399  strmake(mi->ssl_cert, lex_mi->ssl_cert, sizeof(mi->ssl_cert)-1);
8400  if (lex_mi->ssl_cipher)
8401  strmake(mi->ssl_cipher, lex_mi->ssl_cipher, sizeof(mi->ssl_cipher)-1);
8402  if (lex_mi->ssl_key)
8403  strmake(mi->ssl_key, lex_mi->ssl_key, sizeof(mi->ssl_key)-1);
8404  if (lex_mi->ssl_crl)
8405  strmake(mi->ssl_crl, lex_mi->ssl_crl, sizeof(mi->ssl_crl)-1);
8406  if (lex_mi->ssl_crlpath)
8407  strmake(mi->ssl_crlpath, lex_mi->ssl_crlpath, sizeof(mi->ssl_crlpath)-1);
8408 #ifndef HAVE_OPENSSL
8409  if (lex_mi->ssl || lex_mi->ssl_ca || lex_mi->ssl_capath ||
8410  lex_mi->ssl_cert || lex_mi->ssl_cipher || lex_mi->ssl_key ||
8411  lex_mi->ssl_verify_server_cert || lex_mi->ssl_crl || lex_mi->ssl_crlpath)
8412  push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8413  ER_SLAVE_IGNORED_SSL_PARAMS, ER(ER_SLAVE_IGNORED_SSL_PARAMS));
8414 #endif
8415 
8416  if (lex_mi->relay_log_name)
8417  {
8418  need_relay_log_purge= 0;
8419  char relay_log_name[FN_REFLEN];
8420 
8421  mi->rli->relay_log.make_log_name(relay_log_name, lex_mi->relay_log_name);
8422  mi->rli->set_group_relay_log_name(relay_log_name);
8423  mi->rli->set_event_relay_log_name(relay_log_name);
8424  }
8425 
8426  if (lex_mi->relay_log_pos)
8427  {
8428  need_relay_log_purge= 0;
8429  mi->rli->set_group_relay_log_pos(lex_mi->relay_log_pos);
8430  mi->rli->set_event_relay_log_pos(lex_mi->relay_log_pos);
8431  }
8432 
8433  /*
8434  If user did specify neither host nor port nor any log name nor any log
8435  pos, i.e. he specified only user/password/master_connect_retry, he probably
8436  wants replication to resume from where it had left, i.e. from the
8437  coordinates of the **SQL** thread (imagine the case where the I/O is ahead
8438  of the SQL; restarting from the coordinates of the I/O would lose some
8439  events which is probably unwanted when you are just doing minor changes
8440  like changing master_connect_retry).
8441  A side-effect is that if only the I/O thread was started, this thread may
8442  restart from ''/4 after the CHANGE MASTER. That's a minor problem (it is a
8443  much more unlikely situation than the one we are fixing here).
8444  Note: coordinates of the SQL thread must be read here, before the
8445  'if (need_relay_log_purge)' block which resets them.
8446  */
8447  if (!lex_mi->host && !lex_mi->port &&
8448  !lex_mi->log_file_name && !lex_mi->pos &&
8449  need_relay_log_purge)
8450  {
8451  /*
8452  Sometimes mi->rli->master_log_pos == 0 (it happens when the SQL thread is
8453  not initialized), so we use a max().
8454  What happens to mi->rli->master_log_pos during the initialization stages
8455  of replication is not 100% clear, so we guard against problems using
8456  max().
8457  */
8458  mi->set_master_log_pos(max<ulonglong>(BIN_LOG_HEADER_SIZE,
8459  mi->rli->get_group_master_log_pos()));
8460  mi->set_master_log_name(mi->rli->get_group_master_log_name());
8461  }
8462 
8463  /*
8464  Sets if the slave should connect to the master and look for
8465  GTIDs.
8466  */
8467  if (lex_mi->auto_position != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
8468  mi->set_auto_position(
8469  (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE));
8470 
8471  /*
8472  Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never
8473  a slave before).
8474  */
8475  if (flush_master_info(mi, true))
8476  {
8477  my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file");
8478  ret= TRUE;
8479  goto err;
8480  }
8481  if (need_relay_log_purge)
8482  {
8483  relay_log_purge= 1;
8484  THD_STAGE_INFO(thd, stage_purging_old_relay_logs);
8485  if (mi->rli->purge_relay_logs(thd,
8486  0 /* not only reset, but also reinit */,
8487  &errmsg))
8488  {
8489  my_error(ER_RELAY_LOG_FAIL, MYF(0), errmsg);
8490  ret= TRUE;
8491  goto err;
8492  }
8493  }
8494  else
8495  {
8496  const char* msg;
8497  relay_log_purge= 0;
8498  /* Relay log is already initialized */
8499 
8500  if (mi->rli->init_relay_log_pos(mi->rli->get_group_relay_log_name(),
8501  mi->rli->get_group_relay_log_pos(),
8502  true/*need_data_lock=true*/,
8503  &msg, 0))
8504  {
8505  my_error(ER_RELAY_LOG_INIT, MYF(0), msg);
8506  ret= TRUE;
8507  goto err;
8508  }
8509  }
8510  relay_log_purge= save_relay_log_purge;
8511 
8512  /*
8513  Coordinates in rli were spoilt by the 'if (need_relay_log_purge)' block,
8514  so restore them to good values. If we left them to ''/0, that would work;
8515  but that would fail in the case of 2 successive CHANGE MASTER (without a
8516  START SLAVE in between): because first one would set the coords in mi to
8517  the good values of those in rli, the set those in rli to ''/0, then
8518  second CHANGE MASTER would set the coords in mi to those of rli, i.e. to
8519  ''/0: we have lost all copies of the original good coordinates.
8520  That's why we always save good coords in rli.
8521  */
8522  if (need_relay_log_purge)
8523  {
8524  mi->rli->set_group_master_log_pos(mi->get_master_log_pos());
8525  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
8526  mi->rli->set_group_master_log_name(mi->get_master_log_name());
8527  }
8528  var_group_master_log_name= const_cast<char *>(mi->rli->get_group_master_log_name());
8529  if (!var_group_master_log_name[0]) // uninitialized case
8530  mi->rli->set_group_master_log_pos(0);
8531 
8532  mysql_mutex_lock(&mi->rli->data_lock);
8533  mi->rli->abort_pos_wait++; /* for MASTER_POS_WAIT() to abort */
8534  /* Clear the errors, for a clean start */
8535  mi->rli->clear_error();
8536  mi->rli->clear_until_condition();
8537 
8538  sql_print_information("'CHANGE MASTER TO executed'. "
8539  "Previous state master_host='%s', master_port= %u, master_log_file='%s', "
8540  "master_log_pos= %ld, master_bind='%s'. "
8541  "New state master_host='%s', master_port= %u, master_log_file='%s', "
8542  "master_log_pos= %ld, master_bind='%s'.",
8543  saved_host, saved_port, saved_log_name, (ulong) saved_log_pos,
8544  saved_bind_addr, mi->host, mi->port, mi->get_master_log_name(),
8545  (ulong) mi->get_master_log_pos(), mi->bind_addr);
8546 
8547  /*
8548  If we don't write new coordinates to disk now, then old will remain in
8549  relay-log.info until START SLAVE is issued; but if mysqld is shutdown
8550  before START SLAVE, then old will remain in relay-log.info, and will be the
8551  in-memory value at restart (thus causing errors, as the old relay log does
8552  not exist anymore).
8553 
8554  Notice that the rli table is available exclusively as slave is not
8555  running.
8556  */
8557  DBUG_ASSERT(!mi->rli->slave_running);
8558  if ((ret= mi->rli->flush_info(true)))
8559  my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush relay info file.");
8560  mysql_cond_broadcast(&mi->data_cond);
8561  mysql_mutex_unlock(&mi->rli->data_lock);
8562 
8563 err:
8564  unlock_slave_threads(mi);
8565  if (ret == FALSE)
8566  {
8567  if (!mts_remove_workers)
8568  my_ok(thd);
8569  else
8570  if (!Rpl_info_factory::reset_workers(mi->rli))
8571  my_ok(thd);
8572  else
8573  my_error(ER_MTS_RESET_WORKERS, MYF(0));
8574  }
8575  DBUG_RETURN(ret);
8576 }
8580 #endif /* HAVE_REPLICATION */