18 #include <signaldata/DumpStateOrd.hpp> 
   19 #include <NdbBackup.hpp> 
   21 #include <NDBT_Output.hpp> 
   22 #include <NdbConfig.h> 
   23 #include <ndb_version.h> 
   29 #define CHECK(b, m) { int _xx = b; if (!(_xx)) { \ 
   30   ndbout << "ERR: "<< m \ 
   31            << "   " << "File: " << __FILE__ \ 
   32            << " (Line: " << __LINE__ << ")" << "- " << _xx << endl; \ 
   33   return NDBT_FAILED; } } 
   35 #include <ConfigRetriever.hpp> 
   37 #include <mgmapi_config_parameters.h> 
   38 #include <mgmapi_configuration.hpp> 
   41 NdbBackup::clearOldBackups()
 
   52   for(
size_t i = 0; 
i < ndbNodes.size(); 
i++)
 
   54     int nodeId = ndbNodes[
i].node_id;
 
   55     const char* path = getBackupDataDirForNode(nodeId);
 
   67     tmp.
assfmt(
"ssh %s rm -rf %s/BACKUP", host, path);
 
   69     ndbout << 
"buf: "<< tmp.
c_str() <<endl;
 
   70     int res = system(tmp.
c_str());  
 
   71     ndbout << 
"ssh res: " << res << endl;
 
   73     if (res && retCode == 0)
 
   82 NdbBackup::start(
unsigned int & _backup_id,
 
   84                  unsigned int user_backup_id,
 
   85                  unsigned int logtype){
 
   94   bool any = _backup_id == 0;
 
  108       NdbSleep_SecSleep(3);
 
  110       user_backup_id += 100;
 
  121     g_err  << 
"PLEASE CHECK CODE NdbBackup.cpp line=" << __LINE__ << endl;
 
  131 NdbBackup::startLogEvent(){
 
  139     g_err << 
"Can't create log event" << endl;
 
  146 NdbBackup::checkBackupStatus(){
 
  156     switch (log_event.type) {
 
  170   ndb_mgm_destroy_logevent_handle(&log_handle);
 
  176 NdbBackup::getBackupDataDirForNode(
int _node_id){
 
  189       s = 
"No error given!";
 
  191     ndbout << 
"Could not fetch configuration" << endl;
 
  200   if (iter.find(CFG_NODE_ID, _node_id)){
 
  201     ndbout << 
"Invalid configuration fetched, DB missing" << endl;
 
  205   unsigned int type = NODE_TYPE_DB + 1;
 
  206   if(iter.get(CFG_TYPE_OF_SECTION, &type) || type != NODE_TYPE_DB){
 
  207     ndbout <<
"type = " << type << endl;
 
  208     ndbout <<
"Invalid configuration fetched, I'm wrong type of node" << endl;
 
  213   if (iter.get(CFG_DB_BACKUP_DATADIR, &path)){
 
  214     ndbout << 
"BackupDataDir not found" << endl;
 
  223 NdbBackup::execRestore(
bool _restore_data,
 
  226                        unsigned _backup_id){
 
  227   ndbout << 
"getBackupDataDir "<< _node_id <<endl;
 
  229   const char* path = getBackupDataDirForNode(_node_id);
 
  233   ndbout << 
"getHostName "<< _node_id <<endl;
 
  243   tmp.
assfmt(
"scp %s:%s/BACKUP/BACKUP-%d/BACKUP-%d*.%d.* .",
 
  249   ndbout << 
"buf: "<< tmp.
c_str() <<endl;
 
  250   int res = system(tmp.
c_str());  
 
  252   ndbout << 
"scp res: " << res << endl;
 
  254   if (res == 0 && _restore_meta)
 
  258     tmp.
assfmt(
"%sndb_restore -c \"%s:%d\" -n %d -b %d -m -d .", 
 
  262                "valgrind --leak-check=yes -v " 
  269     ndbout << 
"buf: "<< tmp.
c_str() <<endl;
 
  270     res = system(tmp.
c_str());
 
  273   if (res == 0 && _restore_data)
 
  276     tmp.
assfmt(
"%sndb_restore -c \"%s:%d\" -n %d -b %d -r .", 
 
  280                "valgrind --leak-check=yes -v " 
  287     ndbout << 
"buf: "<< tmp.
c_str() <<endl;
 
  288     res = system(tmp.
c_str());
 
  291   ndbout << 
"ndb_restore res: " << res << endl;
 
  297 NdbBackup::restore(
unsigned _backup_id){
 
  302   if (getStatus() != 0)
 
  308   res = execRestore(
true, 
true, ndbNodes[0].node_id, _backup_id);
 
  311   for(
size_t i = 1; 
i < ndbNodes.size(); 
i++){
 
  312     res = execRestore(
true, 
false, ndbNodes[
i].node_id, _backup_id);
 
  320 NFDuringBackupM_codes[] = {
 
  333 NFDuringBackupS_codes[] = {
 
  344 NFDuringBackupSL_codes[] = {
 
  352   const int sz = 
sizeof(NFDuringBackupM_codes)/
sizeof(NFDuringBackupM_codes[0]);
 
  353   return NF(_restarter, NFDuringBackupM_codes, sz, 
true);
 
  358   const int sz = 
sizeof(NFDuringBackupS_codes)/
sizeof(NFDuringBackupS_codes[0]);
 
  359   return NF(_restarter, NFDuringBackupS_codes, sz, 
true);
 
  364   const int sz = 
sizeof(NFDuringBackupS_codes)/
sizeof(NFDuringBackupS_codes[0]);
 
  365   return NF(_restarter, NFDuringBackupS_codes, sz, 
false);
 
  369 NdbBackup::NF(
NdbRestarter& _restarter, 
int *NFDuringBackup_codes, 
const int sz, 
bool onMaster){
 
  370   int nNodes = _restarter.getNumDbNodes();
 
  375     int nodeId = _restarter.getMasterNodeId();
 
  377     CHECK(_restarter.restartOneDbNode(nodeId, 
false, 
true, 
true) == 0,
 
  378           "Could not restart node "<< nodeId);
 
  380     CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
 
  381           "waitNodesNoStart failed");
 
  383     CHECK(_restarter.startNodes(&nodeId, 1) == 0,
 
  384           "failed to start node");
 
  387   CHECK(_restarter.waitClusterStarted() == 0,
 
  388         "waitClusterStarted failed");
 
  390   myRandom48Init((
long)NdbTick_CurrentMillisecond());
 
  392   for(
int i = 0; 
i<sz; 
i++){
 
  394     int error = NFDuringBackup_codes[
i];
 
  395     unsigned int backupId;
 
  397     const int masterNodeId = _restarter.getMasterNodeId();
 
  398     CHECK(masterNodeId > 0, 
"getMasterNodeId failed");
 
  401     nodeId = masterNodeId;
 
  404       while (nodeId == masterNodeId) {
 
  405         randomId = myRandom48(nNodes);
 
  406         nodeId = _restarter.getDbNodeId(randomId);
 
  410     g_err << 
"NdbBackup::NF node = " << nodeId 
 
  411            << 
" error code = " << error << 
" masterNodeId = " 
  412            << masterNodeId << endl;
 
  415     int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
 
  416     CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0,
 
  417           "failed to set RestartOnErrorInsert");
 
  418     CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
 
  419           "failed to set error insert");
 
  421     g_info << 
"error inserted"  << endl;
 
  422     NdbSleep_SecSleep(1);
 
  424     g_info << 
"starting backup"  << endl;
 
  425     int r = start(backupId);
 
  426     g_info << 
"r = " << r
 
  427            << 
" (which should fail) started with id = "  << backupId << endl;
 
  429       g_err << 
"Backup should have failed on error_insertion " << error << endl
 
  430             << 
"Master = " << masterNodeId << 
"Node = " << nodeId << endl;
 
  434     CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
 
  435           "waitNodesNoStart failed");
 
  437     g_info << 
"number of nodes running " << _restarter.getNumDbNodes() << endl;
 
  439     if (_restarter.getNumDbNodes() != nNodes) {
 
  440       g_err << 
"Failure: cluster not up" << endl;
 
  444     g_info << 
"starting new backup"  << endl;
 
  445     CHECK(start(backupId) == 0,
 
  446           "failed to start backup");
 
  447     g_info << 
"(which should succeed) started with id = "  << backupId << endl;
 
  449     g_info << 
"starting node"  << endl;
 
  450     CHECK(_restarter.startNodes(&nodeId, 1) == 0,
 
  451           "failed to start node");
 
  453     CHECK(_restarter.waitClusterStarted() == 0,
 
  454           "waitClusterStarted failed");
 
  455     g_info << 
"node started"  << endl;
 
  457     int val2[] = { 24, 2424 };
 
  458     CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
 
  459           "failed to check backup resources RestartOnErrorInsert");
 
  461     CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
 
  462           "failed to set error insert");
 
  464     NdbSleep_SecSleep(1);
 
  496   const int sz = 
sizeof(FailM_codes)/
sizeof(FailM_codes[0]);
 
  497   return Fail(_restarter, FailM_codes, sz, 
true);
 
  502   const int sz = 
sizeof(FailS_codes)/
sizeof(FailS_codes[0]);
 
  503   return Fail(_restarter, FailS_codes, sz, 
true);
 
  508   const int sz = 
sizeof(FailS_codes)/
sizeof(FailS_codes[0]);
 
  509   return Fail(_restarter, FailS_codes, sz, 
false);
 
  513 NdbBackup::Fail(
NdbRestarter& _restarter, 
int *Fail_codes, 
const int sz, 
bool onMaster){
 
  515   CHECK(_restarter.waitClusterStarted() == 0,
 
  516         "waitClusterStarted failed");
 
  518   int nNodes = _restarter.getNumDbNodes();
 
  520   myRandom48Init((
long)NdbTick_CurrentMillisecond());
 
  522   for(
int i = 0; 
i<sz; 
i++){
 
  523     int error = Fail_codes[
i];
 
  524     unsigned int backupId;
 
  526     const int masterNodeId = _restarter.getMasterNodeId();
 
  527     CHECK(masterNodeId > 0, 
"getMasterNodeId failed");
 
  530     nodeId = masterNodeId;
 
  533       while (nodeId == masterNodeId) {
 
  534         randomId = myRandom48(nNodes);
 
  535         nodeId = _restarter.getDbNodeId(randomId);
 
  539     g_err << 
"NdbBackup::Fail node = " << nodeId 
 
  540            << 
" error code = " << error << 
" masterNodeId = " 
  541            << masterNodeId << endl;
 
  543     CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
 
  544           "failed to set error insert");
 
  546     g_info << 
"error inserted"  << endl;
 
  547     g_info << 
"waiting some before starting backup"  << endl;
 
  549     g_info << 
"starting backup"  << endl;
 
  550     int r = start(backupId);
 
  551     g_info << 
"r = " << r
 
  552            << 
" (which should fail) started with id = "  << backupId << endl;
 
  554       g_err << 
"Backup should have failed on error_insertion " << error << endl
 
  555             << 
"Master = " << masterNodeId << 
"Node = " << nodeId << endl;
 
  559     CHECK(_restarter.waitClusterStarted() == 0,
 
  560           "waitClusterStarted failed");
 
  562     CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
 
  563           "failed to set error insert");
 
  565     NdbSleep_SecSleep(5);
 
  567     int val2[] = { 24, 2424 };
 
  568     CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
 
  569           "failed to check backup resources RestartOnErrorInsert");