MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NdbBackup.cpp
1 /*
2  Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 #include <signaldata/DumpStateOrd.hpp>
19 #include <NdbBackup.hpp>
20 #include <NdbOut.hpp>
21 #include <NDBT_Output.hpp>
22 #include <NdbConfig.h>
23 #include <ndb_version.h>
24 #include <NDBT.hpp>
25 #include <NdbSleep.h>
26 #include <random.h>
27 #include <NdbTick.h>
28 
29 #define CHECK(b, m) { int _xx = b; if (!(_xx)) { \
30  ndbout << "ERR: "<< m \
31  << " " << "File: " << __FILE__ \
32  << " (Line: " << __LINE__ << ")" << "- " << _xx << endl; \
33  return NDBT_FAILED; } }
34 
35 #include <ConfigRetriever.hpp>
36 #include <mgmapi.h>
37 #include <mgmapi_config_parameters.h>
38 #include <mgmapi_configuration.hpp>
39 
40 int
41 NdbBackup::clearOldBackups()
42 {
43  if (!isConnected())
44  return -1;
45 
46  if (getStatus() != 0)
47  return -1;
48 
49  int retCode = 0;
50 
51 #ifndef _WIN32
52  for(size_t i = 0; i < ndbNodes.size(); i++)
53  {
54  int nodeId = ndbNodes[i].node_id;
55  const char* path = getBackupDataDirForNode(nodeId);
56  if (path == NULL)
57  return -1;
58 
59  const char *host;
60  if (!getHostName(nodeId, &host))
61  return -1;
62 
63  /*
64  * Clear old backup files
65  */
66  BaseString tmp;
67  tmp.assfmt("ssh %s rm -rf %s/BACKUP", host, path);
68 
69  ndbout << "buf: "<< tmp.c_str() <<endl;
70  int res = system(tmp.c_str());
71  ndbout << "ssh res: " << res << endl;
72 
73  if (res && retCode == 0)
74  retCode = res;
75  }
76 #endif
77 
78  return retCode;
79 }
80 
81 int
82 NdbBackup::start(unsigned int & _backup_id,
83  int flags,
84  unsigned int user_backup_id,
85  unsigned int logtype){
86 
87 
88  if (!isConnected())
89  return -1;
90 
92  reply.return_code = 0;
93 
94  bool any = _backup_id == 0;
95 
96 loop:
97  if (ndb_mgm_start_backup3(handle,
98  flags,
99  &_backup_id,
100  &reply,
101  user_backup_id,
102  logtype) == -1) {
103 
104  if (ndb_mgm_get_latest_error(handle) == NDB_MGM_COULD_NOT_START_BACKUP &&
105  strstr(ndb_mgm_get_latest_error_desc(handle), "file already exists") &&
106  any == true)
107  {
108  NdbSleep_SecSleep(3);
109  _backup_id += 100;
110  user_backup_id += 100;
111  goto loop;
112  }
113 
114  g_err << "Error: " << ndb_mgm_get_latest_error(handle) << endl;
115  g_err << "Error msg: " << ndb_mgm_get_latest_error_msg(handle) << endl;
116  g_err << "Error desc: " << ndb_mgm_get_latest_error_desc(handle) << endl;
117  return -1;
118  }
119 
120  if(reply.return_code != 0){
121  g_err << "PLEASE CHECK CODE NdbBackup.cpp line=" << __LINE__ << endl;
122  g_err << "Error: " << ndb_mgm_get_latest_error(handle) << endl;
123  g_err << "Error msg: " << ndb_mgm_get_latest_error_msg(handle) << endl;
124  g_err << "Error desc: " << ndb_mgm_get_latest_error_desc(handle) << endl;
125  return reply.return_code;
126  }
127  return 0;
128 }
129 
130 int
131 NdbBackup::startLogEvent(){
132 
133  if (!isConnected())
134  return -1;
135  log_handle= NULL;
136  int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0, 0 };
137  log_handle = ndb_mgm_create_logevent_handle(handle, filter);
138  if (!log_handle) {
139  g_err << "Can't create log event" << endl;
140  return -1;
141  }
142  return 0;
143 }
144 
145 int
146 NdbBackup::checkBackupStatus(){
147 
148  struct ndb_logevent log_event;
149  int result = 0;
150  int res;
151  if(!log_handle) {
152  return -1;
153  }
154  if ((res= ndb_logevent_get_next(log_handle, &log_event, 3000)) > 0)
155  {
156  switch (log_event.type) {
158  result = 1;
159  break;
161  result = 2;
162  break;
164  result = 3;
165  break;
166  default:
167  break;
168  }
169  }
170  ndb_mgm_destroy_logevent_handle(&log_handle);
171  return result;
172 }
173 
174 
175 const char *
176 NdbBackup::getBackupDataDirForNode(int _node_id){
177 
182  if (connect())
183  return NULL;
184 
185  if ((p = ndb_mgm_get_configuration(handle, 0)) == 0)
186  {
187  const char * s= ndb_mgm_get_latest_error_msg(handle);
188  if(s == 0)
189  s = "No error given!";
190 
191  ndbout << "Could not fetch configuration" << endl;
192  ndbout << s << endl;
193  return NULL;
194  }
195 
199  ndb_mgm_configuration_iterator iter(* p, CFG_SECTION_NODE);
200  if (iter.find(CFG_NODE_ID, _node_id)){
201  ndbout << "Invalid configuration fetched, DB missing" << endl;
202  return NULL;
203  }
204 
205  unsigned int type = NODE_TYPE_DB + 1;
206  if(iter.get(CFG_TYPE_OF_SECTION, &type) || type != NODE_TYPE_DB){
207  ndbout <<"type = " << type << endl;
208  ndbout <<"Invalid configuration fetched, I'm wrong type of node" << endl;
209  return NULL;
210  }
211 
212  const char * path;
213  if (iter.get(CFG_DB_BACKUP_DATADIR, &path)){
214  ndbout << "BackupDataDir not found" << endl;
215  return NULL;
216  }
217 
218  return path;
219 
220 }
221 
222 int
223 NdbBackup::execRestore(bool _restore_data,
224  bool _restore_meta,
225  int _node_id,
226  unsigned _backup_id){
227  ndbout << "getBackupDataDir "<< _node_id <<endl;
228 
229  const char* path = getBackupDataDirForNode(_node_id);
230  if (path == NULL)
231  return -1;
232 
233  ndbout << "getHostName "<< _node_id <<endl;
234  const char *host;
235  if (!getHostName(_node_id, &host)){
236  return -1;
237  }
238 
239  /*
240  * Copy backup files to local dir
241  */
242  BaseString tmp;
243  tmp.assfmt("scp %s:%s/BACKUP/BACKUP-%d/BACKUP-%d*.%d.* .",
244  host, path,
245  _backup_id,
246  _backup_id,
247  _node_id);
248 
249  ndbout << "buf: "<< tmp.c_str() <<endl;
250  int res = system(tmp.c_str());
251 
252  ndbout << "scp res: " << res << endl;
253 
254  if (res == 0 && _restore_meta)
255  {
258  tmp.assfmt("%sndb_restore -c \"%s:%d\" -n %d -b %d -m -d .",
259 #if 1
260  "",
261 #else
262  "valgrind --leak-check=yes -v "
263 #endif
266  _node_id,
267  _backup_id);
268 
269  ndbout << "buf: "<< tmp.c_str() <<endl;
270  res = system(tmp.c_str());
271  }
272 
273  if (res == 0 && _restore_data)
274  {
275 
276  tmp.assfmt("%sndb_restore -c \"%s:%d\" -n %d -b %d -r .",
277 #if 1
278  "",
279 #else
280  "valgrind --leak-check=yes -v "
281 #endif
284  _node_id,
285  _backup_id);
286 
287  ndbout << "buf: "<< tmp.c_str() <<endl;
288  res = system(tmp.c_str());
289  }
290 
291  ndbout << "ndb_restore res: " << res << endl;
292 
293  return res;
294 }
295 
296 int
297 NdbBackup::restore(unsigned _backup_id){
298 
299  if (!isConnected())
300  return -1;
301 
302  if (getStatus() != 0)
303  return -1;
304 
305  int res;
306 
307  // restore metadata first and data for first node
308  res = execRestore(true, true, ndbNodes[0].node_id, _backup_id);
309 
310  // Restore data once for each node
311  for(size_t i = 1; i < ndbNodes.size(); i++){
312  res = execRestore(true, false, ndbNodes[i].node_id, _backup_id);
313  }
314 
315  return 0;
316 }
317 
318 // Master failure
319 int
320 NFDuringBackupM_codes[] = {
321  10003,
322  10004,
323  10007,
324  10008,
325  10009,
326  10010,
327  10012,
328  10013
329 };
330 
331 // Slave failure
332 int
333 NFDuringBackupS_codes[] = {
334  10014,
335  10015,
336  10016,
337  10017,
338  10018,
339  10020
340 };
341 
342 // Master takeover etc...
343 int
344 NFDuringBackupSL_codes[] = {
345  10001,
346  10002,
347  10021
348 };
349 
350 int
351 NdbBackup::NFMaster(NdbRestarter& _restarter){
352  const int sz = sizeof(NFDuringBackupM_codes)/sizeof(NFDuringBackupM_codes[0]);
353  return NF(_restarter, NFDuringBackupM_codes, sz, true);
354 }
355 
356 int
357 NdbBackup::NFMasterAsSlave(NdbRestarter& _restarter){
358  const int sz = sizeof(NFDuringBackupS_codes)/sizeof(NFDuringBackupS_codes[0]);
359  return NF(_restarter, NFDuringBackupS_codes, sz, true);
360 }
361 
362 int
363 NdbBackup::NFSlave(NdbRestarter& _restarter){
364  const int sz = sizeof(NFDuringBackupS_codes)/sizeof(NFDuringBackupS_codes[0]);
365  return NF(_restarter, NFDuringBackupS_codes, sz, false);
366 }
367 
368 int
369 NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){
370  int nNodes = _restarter.getNumDbNodes();
371  {
372  if(nNodes == 1)
373  return NDBT_OK;
374 
375  int nodeId = _restarter.getMasterNodeId();
376 
377  CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
378  "Could not restart node "<< nodeId);
379 
380  CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
381  "waitNodesNoStart failed");
382 
383  CHECK(_restarter.startNodes(&nodeId, 1) == 0,
384  "failed to start node");
385  }
386 
387  CHECK(_restarter.waitClusterStarted() == 0,
388  "waitClusterStarted failed");
389 
390  myRandom48Init((long)NdbTick_CurrentMillisecond());
391 
392  for(int i = 0; i<sz; i++){
393 
394  int error = NFDuringBackup_codes[i];
395  unsigned int backupId;
396 
397  const int masterNodeId = _restarter.getMasterNodeId();
398  CHECK(masterNodeId > 0, "getMasterNodeId failed");
399  int nodeId;
400 
401  nodeId = masterNodeId;
402  if (!onMaster) {
403  int randomId;
404  while (nodeId == masterNodeId) {
405  randomId = myRandom48(nNodes);
406  nodeId = _restarter.getDbNodeId(randomId);
407  }
408  }
409 
410  g_err << "NdbBackup::NF node = " << nodeId
411  << " error code = " << error << " masterNodeId = "
412  << masterNodeId << endl;
413 
414 
415  int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
416  CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0,
417  "failed to set RestartOnErrorInsert");
418  CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
419  "failed to set error insert");
420 
421  g_info << "error inserted" << endl;
422  NdbSleep_SecSleep(1);
423 
424  g_info << "starting backup" << endl;
425  int r = start(backupId);
426  g_info << "r = " << r
427  << " (which should fail) started with id = " << backupId << endl;
428  if (r == 0) {
429  g_err << "Backup should have failed on error_insertion " << error << endl
430  << "Master = " << masterNodeId << "Node = " << nodeId << endl;
431  return NDBT_FAILED;
432  }
433 
434  CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
435  "waitNodesNoStart failed");
436 
437  g_info << "number of nodes running " << _restarter.getNumDbNodes() << endl;
438 
439  if (_restarter.getNumDbNodes() != nNodes) {
440  g_err << "Failure: cluster not up" << endl;
441  return NDBT_FAILED;
442  }
443 
444  g_info << "starting new backup" << endl;
445  CHECK(start(backupId) == 0,
446  "failed to start backup");
447  g_info << "(which should succeed) started with id = " << backupId << endl;
448 
449  g_info << "starting node" << endl;
450  CHECK(_restarter.startNodes(&nodeId, 1) == 0,
451  "failed to start node");
452 
453  CHECK(_restarter.waitClusterStarted() == 0,
454  "waitClusterStarted failed");
455  g_info << "node started" << endl;
456 
457  int val2[] = { 24, 2424 };
458  CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
459  "failed to check backup resources RestartOnErrorInsert");
460 
461  CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
462  "failed to set error insert");
463 
464  NdbSleep_SecSleep(1);
465  }
466 
467  return NDBT_OK;
468 }
469 
470 int
471 FailS_codes[] = {
472  10025,
473  10027,
474  10033,
475  10035,
476  10036
477 };
478 
479 int
480 FailM_codes[] = {
481  10023,
482  10024,
483  10025,
484  10026,
485  10027,
486  10028,
487  10031,
488  10033,
489  10035,
490  10037,
491  10038
492 };
493 
494 int
495 NdbBackup::FailMaster(NdbRestarter& _restarter){
496  const int sz = sizeof(FailM_codes)/sizeof(FailM_codes[0]);
497  return Fail(_restarter, FailM_codes, sz, true);
498 }
499 
500 int
501 NdbBackup::FailMasterAsSlave(NdbRestarter& _restarter){
502  const int sz = sizeof(FailS_codes)/sizeof(FailS_codes[0]);
503  return Fail(_restarter, FailS_codes, sz, true);
504 }
505 
506 int
507 NdbBackup::FailSlave(NdbRestarter& _restarter){
508  const int sz = sizeof(FailS_codes)/sizeof(FailS_codes[0]);
509  return Fail(_restarter, FailS_codes, sz, false);
510 }
511 
512 int
513 NdbBackup::Fail(NdbRestarter& _restarter, int *Fail_codes, const int sz, bool onMaster){
514 
515  CHECK(_restarter.waitClusterStarted() == 0,
516  "waitClusterStarted failed");
517 
518  int nNodes = _restarter.getNumDbNodes();
519 
520  myRandom48Init((long)NdbTick_CurrentMillisecond());
521 
522  for(int i = 0; i<sz; i++){
523  int error = Fail_codes[i];
524  unsigned int backupId;
525 
526  const int masterNodeId = _restarter.getMasterNodeId();
527  CHECK(masterNodeId > 0, "getMasterNodeId failed");
528  int nodeId;
529 
530  nodeId = masterNodeId;
531  if (!onMaster) {
532  int randomId;
533  while (nodeId == masterNodeId) {
534  randomId = myRandom48(nNodes);
535  nodeId = _restarter.getDbNodeId(randomId);
536  }
537  }
538 
539  g_err << "NdbBackup::Fail node = " << nodeId
540  << " error code = " << error << " masterNodeId = "
541  << masterNodeId << endl;
542 
543  CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
544  "failed to set error insert");
545 
546  g_info << "error inserted" << endl;
547  g_info << "waiting some before starting backup" << endl;
548 
549  g_info << "starting backup" << endl;
550  int r = start(backupId);
551  g_info << "r = " << r
552  << " (which should fail) started with id = " << backupId << endl;
553  if (r == 0) {
554  g_err << "Backup should have failed on error_insertion " << error << endl
555  << "Master = " << masterNodeId << "Node = " << nodeId << endl;
556  return NDBT_FAILED;
557  }
558 
559  CHECK(_restarter.waitClusterStarted() == 0,
560  "waitClusterStarted failed");
561 
562  CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
563  "failed to set error insert");
564 
565  NdbSleep_SecSleep(5);
566 
567  int val2[] = { 24, 2424 };
568  CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
569  "failed to check backup resources RestartOnErrorInsert");
570 
571  }
572 
573  return NDBT_OK;
574 }
575