MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NdbRestarts.cpp
1 /*
2  Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 #include <NdbRestarts.hpp>
19 #include <NDBT.hpp>
20 #include <string.h>
21 #include <NdbSleep.h>
22 #include <kernel/ndb_limits.h>
23 #include <signaldata/DumpStateOrd.hpp>
24 #include <NdbEnv.h>
25 #include <NDBT_Test.hpp>
26 
27 #define F_ARGS NDBT_Context* ctx, NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart
28 
29 int restartRandomNodeGraceful(F_ARGS);
30 int restartRandomNodeAbort(F_ARGS);
31 int restartRandomNodeError(F_ARGS);
32 int restartRandomNodeInitial(F_ARGS);
33 int restartNFDuringNR(F_ARGS);
34 int restartMasterNodeError(F_ARGS);
35 int twoNodeFailure(F_ARGS);
36 int fiftyPercentFail(F_ARGS);
37 int twoMasterNodeFailure(F_ARGS);
38 int restartAllNodesGracfeul(F_ARGS);
39 int restartAllNodesAbort(F_ARGS);
40 int restartAllNodesError9999(F_ARGS);
41 int fiftyPercentStopAndWait(F_ARGS);
42 int restartNodeDuringLCP(F_ARGS);
43 int stopOnError(F_ARGS);
44 int getRandomNodeId(NdbRestarter& _restarter);
45 
56 const NdbRestarts::NdbRestart NdbRestarts::m_restarts[] = {
57  /*********************************************************
58  *
59  * NODE RESTARTS with 1 node restarted
60  *
61  *********************************************************/
66  NdbRestart("RestartRandomNode",
67  NODE_RESTART,
68  restartRandomNodeGraceful,
69  2),
74  NdbRestart("RestartRandomNodeAbort",
75  NODE_RESTART,
76  restartRandomNodeAbort,
77  2),
83  NdbRestart("RestartRandomNodeError",
84  NODE_RESTART,
85  restartRandomNodeError,
86  2),
91  NdbRestart("RestartMasterNodeError",
92  NODE_RESTART,
93  restartMasterNodeError,
94  2),
99  NdbRestart("RestartRandomNodeInitial",
100  NODE_RESTART,
101  restartRandomNodeInitial,
102  2),
108  NdbRestart("RestartNFDuringNR",
109  NODE_RESTART,
110  restartNFDuringNR,
111  2),
112 
118  NdbRestart("StopOnError",
119  NODE_RESTART,
120  stopOnError,
121  1),
122 
123  /*********************************************************
124  *
125  * MULTIPLE NODE RESTARTS with more than 1 node
126  *
127  *********************************************************/
132  NdbRestart("TwoNodeFailure",
133  MULTIPLE_NODE_RESTART,
134  twoNodeFailure,
135  4),
141  NdbRestart("TwoMasterNodeFailure",
142  MULTIPLE_NODE_RESTART,
143  twoMasterNodeFailure,
144  4),
145 
146  NdbRestart("FiftyPercentFail",
147  MULTIPLE_NODE_RESTART,
148  fiftyPercentFail,
149  2),
150 
151  /*********************************************************
152  *
153  * SYSTEM RESTARTS
154  *
155  *********************************************************/
161  NdbRestart("RestartAllNodes",
162  SYSTEM_RESTART,
163  restartAllNodesGracfeul,
164  1),
169  NdbRestart("RestartAllNodesAbort",
170  SYSTEM_RESTART,
171  restartAllNodesAbort,
172  1),
177  NdbRestart("RestartAllNodesError9999",
178  SYSTEM_RESTART,
179  restartAllNodesError9999,
180  1),
186  NdbRestart("FiftyPercentStopAndWait",
187  SYSTEM_RESTART,
188  fiftyPercentStopAndWait,
189  2),
193  NdbRestart("RestartNodeDuringLCP",
194  NODE_RESTART,
195  restartNodeDuringLCP,
196  2),
197 };
198 
199 const int NdbRestarts::m_NoOfRestarts = sizeof(m_restarts) / sizeof(NdbRestart);
200 
201 
202 const NdbRestarts::NdbErrorInsert NdbRestarts::m_errors[] = {
203  NdbErrorInsert("Error9999", 9999)
204 };
205 
206 const int NdbRestarts::m_NoOfErrors = sizeof(m_errors) / sizeof(NdbErrorInsert);
207 
208 NdbRestarts::NdbRestart::NdbRestart(const char* _name,
209  NdbRestartType _type,
210  restartFunc* _func,
211  int _requiredNodes,
212  int _arg1){
213  m_name = _name;
214  m_type = _type;
215  m_restartFunc = _func;
216  m_numRequiredNodes = _requiredNodes;
217  // m_arg1 = arg1;
218 }
219 
220 
221 int NdbRestarts::getNumRestarts(){
222  return m_NoOfRestarts;
223 }
224 
225 const NdbRestarts::NdbRestart* NdbRestarts::getRestart(int _num){
226  if (_num >= m_NoOfRestarts)
227  return NULL;
228 
229  return &m_restarts[_num];
230 }
231 
232 const NdbRestarts::NdbRestart* NdbRestarts::getRestart(const char* _name){
233  for(int i = 0; i < m_NoOfRestarts; i++){
234  if (strcmp(m_restarts[i].m_name, _name) == 0){
235  return &m_restarts[i];
236  }
237  }
238  g_err << "The restart \""<< _name << "\" not found in NdbRestarts" << endl;
239  return NULL;
240 }
241 
242 
243 int NdbRestarts::executeRestart(NDBT_Context* ctx,
244  const NdbRestarts::NdbRestart* _restart,
245  unsigned int _timeout){
246  // Check that there are enough nodes in the cluster
247  // for this test
248  NdbRestarter restarter;
249  if (_restart->m_numRequiredNodes > restarter.getNumDbNodes()){
250  g_err << "This test requires " << _restart->m_numRequiredNodes << " nodes "
251  << "there are only "<< restarter.getNumDbNodes() <<" nodes in cluster"
252  << endl;
253  return NDBT_OK;
254  }
255  if (restarter.waitClusterStarted(120) != 0){
256  // If cluster is not started when we shall peform restart
257  // the restart can not be executed and the test fails
258  return NDBT_FAILED;
259  }
260 
261  int res = _restart->m_restartFunc(ctx, restarter, _restart);
262 
263  // Sleep a little waiting for nodes to react to command
264  NdbSleep_SecSleep(2);
265 
266  if (_timeout == 0){
267  // If timeout == 0 wait for ever
268  while(restarter.waitClusterStarted(60) != 0)
269  g_err << "Cluster is not started after restart. Waiting 60s more..."
270  << endl;
271  } else {
272  if (restarter.waitClusterStarted(_timeout) != 0){
273  g_err<<"Cluster failed to start" << endl;
274  res = NDBT_FAILED;
275  }
276  }
277 
278  return res;
279 }
280 
281 int NdbRestarts::executeRestart(NDBT_Context* ctx,
282  int _num,
283  unsigned int _timeout){
284  const NdbRestarts::NdbRestart* r = getRestart(_num);
285  if (r == NULL)
286  return NDBT_FAILED;
287 
288  int res = executeRestart(ctx, r, _timeout);
289  return res;
290 }
291 
292 int NdbRestarts::executeRestart(NDBT_Context* ctx,
293  const char* _name,
294  unsigned int _timeout){
295  const NdbRestarts::NdbRestart* r = getRestart(_name);
296  if (r == NULL)
297  return NDBT_FAILED;
298 
299  int res = executeRestart(ctx, r, _timeout);
300  return res;
301 }
302 
303 void NdbRestarts::listRestarts(NdbRestartType _type){
304  for(int i = 0; i < m_NoOfRestarts; i++){
305  if (m_restarts[i].m_type == _type)
306  ndbout << " " << m_restarts[i].m_name << ", min "
307  << m_restarts[i].m_numRequiredNodes
308  << " nodes"<< endl;
309  }
310 }
311 
312 void NdbRestarts::listRestarts(){
313  ndbout << "NODE RESTARTS" << endl;
314  listRestarts(NODE_RESTART);
315  ndbout << "MULTIPLE NODE RESTARTS" << endl;
316  listRestarts(MULTIPLE_NODE_RESTART);
317  ndbout << "SYSTEM RESTARTS" << endl;
318  listRestarts(SYSTEM_RESTART);
319 }
320 
321 NdbRestarts::NdbErrorInsert::NdbErrorInsert(const char* _name,
322  int _errorNo){
323 
324  m_name = _name;
325  m_errorNo = _errorNo;
326 }
327 
328 int NdbRestarts::getNumErrorInserts(){
329  return m_NoOfErrors;
330 }
331 
332 const NdbRestarts::NdbErrorInsert* NdbRestarts::getError(int _num){
333  if (_num >= m_NoOfErrors)
334  return NULL;
335  return &m_errors[_num];
336 }
337 
338 const NdbRestarts::NdbErrorInsert* NdbRestarts::getRandomError(){
339  int randomId = myRandom48(m_NoOfErrors);
340  return &m_errors[randomId];
341 }
342 
343 
344 
354 #define CHECK(b, m) { int _xx = b; if (!(_xx)) { \
355  ndbout << "ERR: "<< m \
356  << " " << "File: " << __FILE__ \
357  << " (Line: " << __LINE__ << ")" << "- " << _xx << endl; \
358  return NDBT_FAILED; } }
359 
360 
361 
362 int restartRandomNodeGraceful(F_ARGS){
363 
364  myRandom48Init((long)NdbTick_CurrentMillisecond());
365  int randomId = myRandom48(_restarter.getNumDbNodes());
366  int nodeId = _restarter.getDbNodeId(randomId);
367 
368  g_info << _restart->m_name << ": node = "<<nodeId << endl;
369 
370  CHECK(_restarter.restartOneDbNode(nodeId) == 0,
371  "Could not restart node "<<nodeId);
372 
373  return NDBT_OK;
374 }
375 
376 int restartRandomNodeAbort(F_ARGS){
377 
378  myRandom48Init((long)NdbTick_CurrentMillisecond());
379  int randomId = myRandom48(_restarter.getNumDbNodes());
380  int nodeId = _restarter.getDbNodeId(randomId);
381 
382  g_info << _restart->m_name << ": node = "<<nodeId << endl;
383 
384  CHECK(_restarter.restartOneDbNode(nodeId, false, false, true) == 0,
385  "Could not restart node "<<nodeId);
386 
387  return NDBT_OK;
388 }
389 
390 int restartRandomNodeError(F_ARGS){
391 
392  myRandom48Init((long)NdbTick_CurrentMillisecond());
393  int randomId = myRandom48(_restarter.getNumDbNodes());
394  int nodeId = _restarter.getDbNodeId(randomId);
395 
396  ndbout << _restart->m_name << ": node = "<<nodeId << endl;
397 
398  CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0,
399  "Could not restart node "<<nodeId);
400 
401  return NDBT_OK;
402 }
403 
404 int restartMasterNodeError(F_ARGS){
405 
406  int nodeId = _restarter.getDbNodeId(0);
407 
408  g_info << _restart->m_name << ": node = "<<nodeId << endl;
409 
410  CHECK(_restarter.insertErrorInNode(nodeId, 39999) == 0,
411  "Could not restart node "<<nodeId);
412 
413  return NDBT_OK;
414 }
415 
416 int restartRandomNodeInitial(F_ARGS){
417 
418  myRandom48Init((long)NdbTick_CurrentMillisecond());
419  int randomId = myRandom48(_restarter.getNumDbNodes());
420  int nodeId = _restarter.getDbNodeId(randomId);
421 
422  g_info << _restart->m_name << ": node = "<<nodeId << endl;
423 
424  CHECK(_restarter.restartOneDbNode(nodeId, true) == 0,
425  "Could not restart node "<<nodeId);
426 
427  return NDBT_OK;
428 }
429 
430 int twoNodeFailure(F_ARGS){
431 
432  myRandom48Init((long)NdbTick_CurrentMillisecond());
433  int randomId = myRandom48(_restarter.getNumDbNodes());
434  int n[2];
435  n[0] = _restarter.getDbNodeId(randomId);
436  n[1] = _restarter.getRandomNodeOtherNodeGroup(n[0], rand());
437  g_info << _restart->m_name << ": node = "<< n[0] << endl;
438 
439  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
440  CHECK(_restarter.dumpStateOneNode(n[0], val2, 2) == 0,
441  "Failed to dump");
442  CHECK(_restarter.dumpStateOneNode(n[1], val2, 2) == 0,
443  "Failed to dump");
444 
445  CHECK(_restarter.insertErrorInNode(n[0], 9999) == 0,
446  "Could not restart node "<< n[0]);
447 
448  // Create random value, max 3 secs
449  int max = 3000;
450  int ms = (myRandom48(max)) + 1;
451  g_info << "Waiting for " << ms << "(" << max
452  << ") ms " << endl;
453  NdbSleep_MilliSleep(ms);
454 
455  g_info << _restart->m_name << ": node = "<< n[1] << endl;
456  CHECK(_restarter.insertErrorInNode(n[1], 9999) == 0,
457  "Could not restart node "<< n[1]);
458 
459  CHECK(_restarter.waitNodesNoStart(n, 2) == 0,
460  "Failed to wait nostart");
461 
462  _restarter.startNodes(n, 2);
463 
464  return NDBT_OK;
465 }
466 
467 int twoMasterNodeFailure(F_ARGS){
468 
469  int n[2];
470  n[0] = _restarter.getMasterNodeId();
471  n[1] = n[0];
472  do {
473  n[1] = _restarter.getNextMasterNodeId(n[1]);
474  } while(_restarter.getNodeGroup(n[0]) == _restarter.getNodeGroup(n[1]));
475 
476  g_info << _restart->m_name << ": ";
477  g_info << "node0 = "<< n[0] << "(" << _restarter.getNodeGroup(n[0]) << ") ";
478  g_info << "node1 = "<< n[1] << "(" << _restarter.getNodeGroup(n[1]) << ") ";
479  g_info << endl;
480 
481  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
482  CHECK(_restarter.dumpStateOneNode(n[0], val2, 2) == 0,
483  "Failed to dump");
484  CHECK(_restarter.dumpStateOneNode(n[1], val2, 2) == 0,
485  "Failed to dump");
486 
487  CHECK(_restarter.insertErrorInNode(n[0], 9999) == 0,
488  "Could not restart node "<< n[0]);
489 
490  // Create random value, max 3 secs
491  int max = 3000;
492  int ms = (myRandom48(max)) + 1;
493  g_info << "Waiting for " << ms << "(" << max
494  << ") ms " << endl;
495  NdbSleep_MilliSleep(ms);
496 
497  g_info << _restart->m_name << ": node = "<< n[1] << endl;
498 
499  CHECK(_restarter.insertErrorInNode(n[1], 9999) == 0,
500  "Could not restart node "<< n[1]);
501 
502  CHECK(_restarter.waitNodesNoStart(n, 2) == 0,
503  "Failed to wait nostart");
504 
505  _restarter.startNodes(n, 2);
506 
507  return NDBT_OK;
508 }
509 
510 int get50PercentOfNodes(NdbRestarter& restarter,
511  int * _nodes){
512  // For now simply return all nodes with even node id
513  // TODO Check nodegroup and return one node from each
514 
515  int num50Percent = restarter.getNumDbNodes() / 2;
516  assert(num50Percent <= MAX_NDB_NODES);
517 
518  // Calculate which nodes to stop, select all even nodes
519  for (int i = 0; i < num50Percent; i++){
520  _nodes[i] = restarter.getDbNodeId(i*2);
521  }
522  return num50Percent;
523 }
524 
525 int fiftyPercentFail(F_ARGS){
526 
527 
528  int nodes[MAX_NDB_NODES];
529 
530  int numNodes = get50PercentOfNodes(_restarter, nodes);
531 
532  // Stop the nodes, with nostart and abort
533  for (int i = 0; i < numNodes; i++){
534  g_info << "Stopping node "<< nodes[i] << endl;
535  int res = _restarter.restartOneDbNode(nodes[i], false, true, true);
536  CHECK(res == 0, "Could not stop node: "<< nodes[i]);
537  }
538 
539  CHECK(_restarter.waitNodesNoStart(nodes, numNodes) == 0,
540  "waitNodesNoStart");
541 
542  // Order all nodes to start
543  ndbout << "Starting all nodes" << endl;
544  CHECK(_restarter.startAll() == 0,
545  "Could not start all nodes");
546 
547  return NDBT_OK;
548 }
549 
550 
551 int restartAllNodesGracfeul(F_ARGS){
552 
553  g_info << _restart->m_name << endl;
554 
555  // Restart graceful
556  CHECK(_restarter.restartAll() == 0,
557  "Could not restart all nodes");
558 
559  return NDBT_OK;
560 
561 }
562 
563 int restartAllNodesAbort(F_ARGS){
564 
565  g_info << _restart->m_name << endl;
566 
567  // Restart abort
568  CHECK(_restarter.restartAll(false, false, true) == 0,
569  "Could not restart all nodes");
570 
571  return NDBT_OK;
572 }
573 
574 int restartAllNodesError9999(F_ARGS){
575 
576  g_info << _restart->m_name << endl;
577 
578  int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ;
579  CHECK(_restarter.dumpStateAllNodes(val, 2) == 0,
580  "failed to set RestartOnErrorInsert");
581 
582  CHECK(_restarter.insertErrorInAllNodes(932) == 0,
583  "Failed to set error 932 (auto-restart on arbit error)");
584 
585  // Restart with error insert
586  CHECK(_restarter.insertErrorInAllNodes(9999) == 0,
587  "Could not restart all nodes ");
588 
589  CHECK(_restarter.waitClusterNoStart() == 0,
590  "Failed to wait not started");
591 
592  _restarter.startAll();
593 
594  return NDBT_OK;
595 }
596 
597 int fiftyPercentStopAndWait(F_ARGS){
598 
599  int nodes[MAX_NDB_NODES];
600  int numNodes = get50PercentOfNodes(_restarter, nodes);
601 
602  // Stop the nodes, with nostart and abort
603  for (int i = 0; i < numNodes; i++){
604  g_info << "Stopping node "<<nodes[i] << endl;
605  int res = _restarter.restartOneDbNode(nodes[i], false, true, true);
606  CHECK(res == 0, "Could not stop node: "<< nodes[i]);
607  }
608 
609  CHECK(_restarter.waitNodesNoStart(nodes, numNodes) == 0,
610  "waitNodesNoStart");
611 
612  // Create random value, max 120 secs
613  int max = 120;
614  int seconds = (myRandom48(max)) + 1;
615  g_info << "Waiting for " << seconds << "(" << max
616  << ") secs " << endl;
617  NdbSleep_SecSleep(seconds);
618 
619 
620  // Restart graceful
621  CHECK(_restarter.restartAll() == 0,
622  "Could not restart all nodes");
623 
624  g_info << _restart->m_name << endl;
625 
626  return NDBT_OK;
627 }
628 
629 int
630 NFDuringNR_codes[] = {
631  7121,
632  5027,
633  7172,
634  6000,
635  6001,
636  7171,
637  7130,
638  7133,
639  7138,
640  7154,
641  7144,
642  5026,
643  7139,
644  7132,
645  5045,
646 
647  7195, 7196,7197,7198,7199,
648 
649 
650  //LCP
651  8000,
652  8001,
653  5010,
654  7022,
655  7024,
656  7016,
657  7017,
658  5002
659 };
660 
661 int restartNFDuringNR(F_ARGS){
662 
663  myRandom48Init((long)NdbTick_CurrentMillisecond());
664  int i;
665  const int sz = sizeof(NFDuringNR_codes)/sizeof(NFDuringNR_codes[0]);
666  for(i = 0; i<sz; i++){
667  int randomId = myRandom48(_restarter.getNumDbNodes());
668  int nodeId = _restarter.getDbNodeId(randomId);
669  int error = NFDuringNR_codes[i];
670 
671  g_err << _restart->m_name << ": node = " << nodeId
672  << " error code = " << error << endl;
673 
674  CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
675  "Could not restart node "<< nodeId);
676 
677  CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
678  "waitNodesNoStart failed");
679 
680  int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ;
681  CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0,
682  "failed to set RestartOnErrorInsert");
683 
684  CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
685  "failed to set error insert");
686 
687  CHECK(_restarter.startNodes(&nodeId, 1) == 0,
688  "failed to start node");
689 
690  NdbSleep_SecSleep(3);
691 
692  CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
693  "waitNodesNoStart failed");
694 
695  CHECK(_restarter.startNodes(&nodeId, 1) == 0,
696  "failed to start node");
697 
698  CHECK(_restarter.waitNodesStarted(&nodeId, 1) == 0,
699  "waitNodesStarted failed");
700  }
701 
702  return NDBT_OK;
703 
704  if(_restarter.getNumDbNodes() < 4)
705  return NDBT_OK;
706 
707  char buf[256];
708  if(NdbEnv_GetEnv("USER", buf, 256) == 0 || strcmp(buf, "ejonore") != 0)
709  return NDBT_OK;
710 
711  for(i = 0; i<sz && !ctx->isTestStopped(); i++){
712  const int randomId = myRandom48(_restarter.getNumDbNodes());
713  int nodeId = _restarter.getDbNodeId(randomId);
714  const int error = NFDuringNR_codes[i];
715 
716  const int masterNodeId = _restarter.getMasterNodeId();
717  CHECK(masterNodeId > 0, "getMasterNodeId failed");
718  int crashNodeId = 0;
719  do {
720  int rand = myRandom48(1000);
721  crashNodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand);
722  } while(crashNodeId == masterNodeId);
723 
724  CHECK(crashNodeId > 0, "getMasterNodeId failed");
725 
726  g_info << _restart->m_name << " restarting node = " << nodeId
727  << " error code = " << error
728  << " crash node = " << crashNodeId << endl;
729 
730  CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
731  "Could not restart node "<< nodeId);
732 
733  CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
734  "waitNodesNoStart failed");
735 
736  int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
737  CHECK(_restarter.dumpStateOneNode(crashNodeId, val, 2) == 0,
738  "failed to set RestartOnErrorInsert");
739 
740  CHECK(_restarter.insertErrorInNode(crashNodeId, error) == 0,
741  "failed to set error insert");
742 
743  CHECK(_restarter.startNodes(&nodeId, 1) == 0,
744  "failed to start node");
745 
746  CHECK(_restarter.waitClusterStarted() == 0,
747  "waitClusterStarted failed");
748  }
749 
750  return NDBT_OK;
751 }
752 
753 int
754 NRDuringLCP_Master_codes[] = {
755  7009, // Insert system error in master when local checkpoint is idle.
756  7010, // Insert system error in master when local checkpoint is in the
757  // state clcpStatus = CALCULATE_KEEP_GCI.
758  7013, // Insert system error in master when local checkpoint is in the
759  // state clcpStatus = COPY_GCI before sending COPY_GCIREQ.
760  7014, // Insert system error in master when local checkpoint is in the
761  // state clcpStatus = TC_CLOPSIZE before sending TC_CLOPSIZEREQ.
762  7015, // Insert system error in master when local checkpoint is in the
763  // state clcpStatus = START_LCP_ROUND before sending START_LCP_ROUND.
764  7019, // Insert system error in master when local checkpoint is in the
765  // state clcpStatus = IDLE before sending CONTINUEB(ZCHECK_TC_COUNTER).
766  7075, // Master. Don't send any LCP_FRAG_ORD(last=true)
767  // And crash when all have "not" been sent
768  7021, // Crash in master when receiving START_LCP_REQ
769  7023, // Crash in master when sending START_LCP_CONF
770  7025, // Crash in master when receiving LCP_FRAG_REP
771  7026, // Crash in master when changing state to LCP_TAB_COMPLETED
772  7027 // Crash in master when changing state to LCP_TAB_SAVED
773 };
774 
775 int
776 NRDuringLCP_NonMaster_codes[] = {
777  7020, // Insert system error in local checkpoint participant at reception
778  // of COPY_GCIREQ.
779  8000, // Crash particpant when receiving TCGETOPSIZEREQ
780  8001, // Crash particpant when receiving TC_CLOPSIZEREQ
781  5010, // Crash any when receiving LCP_FRAGORD
782  7022, // Crash in !master when receiving START_LCP_REQ
783  7024, // Crash in !master when sending START_LCP_CONF
784  7016, // Crash in !master when receiving LCP_FRAG_REP
785  7017, // Crash in !master when changing state to LCP_TAB_COMPLETED
786  7018 // Crash in !master when changing state to LCP_TAB_SAVED
787 };
788 
789 int restartNodeDuringLCP(F_ARGS) {
790  int i;
791  // Master
792  int val = DumpStateOrd::DihMinTimeBetweenLCP;
793  CHECK(_restarter.dumpStateAllNodes(&val, 1) == 0,
794  "Failed to set LCP to min value"); // Set LCP to min val
795  int sz = sizeof(NRDuringLCP_Master_codes)/
796  sizeof(NRDuringLCP_Master_codes[0]);
797  for(i = 0; i<sz; i++) {
798 
799  int error = NRDuringLCP_Master_codes[i];
800  int masterNodeId = _restarter.getMasterNodeId();
801 
802  CHECK(masterNodeId > 0, "getMasterNodeId failed");
803 
804  ndbout << _restart->m_name << " restarting master node = " << masterNodeId
805  << " error code = " << error << endl;
806 
807  {
808  int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
809  CHECK(_restarter.dumpStateAllNodes(val, 2) == 0,
810  "failed to set RestartOnErrorInsert");
811  }
812 
813  CHECK(_restarter.insertErrorInNode(masterNodeId, error) == 0,
814  "failed to set error insert");
815 
816  CHECK(_restarter.waitNodesNoStart(&masterNodeId, 1, 300) == 0,
817  "failed to wait no start");
818 
819  CHECK(_restarter.startNodes(&masterNodeId, 1) == 0,
820  "failed to start node");
821 
822  CHECK(_restarter.waitClusterStarted(300) == 0,
823  "waitClusterStarted failed");
824 
825  {
826  int val = DumpStateOrd::DihMinTimeBetweenLCP;
827  CHECK(_restarter.dumpStateOneNode(masterNodeId, &val, 1) == 0,
828  "failed to set error insert");
829  }
830  }
831 
832  // NON-Master
833  sz = sizeof(NRDuringLCP_NonMaster_codes)/
834  sizeof(NRDuringLCP_NonMaster_codes[0]);
835  for(i = 0; i<sz; i++) {
836 
837  int error = NRDuringLCP_NonMaster_codes[i];
838  int nodeId = getRandomNodeId(_restarter);
839  int masterNodeId = _restarter.getMasterNodeId();
840  CHECK(masterNodeId > 0, "getMasterNodeId failed");
841 
842  while (nodeId == masterNodeId) {
843  nodeId = getRandomNodeId(_restarter);
844  }
845 
846  ndbout << _restart->m_name << " restarting non-master node = " << nodeId
847  << " error code = " << error << endl;
848 
849  int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
850  CHECK(_restarter.dumpStateAllNodes(val, 2) == 0,
851  "failed to set RestartOnErrorInsert");
852 
853  CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
854  "failed to set error insert");
855 
856  CHECK(_restarter.waitNodesNoStart(&nodeId, 1, 300) == 0,
857  "failed to wait no start");
858 
859  CHECK(_restarter.startNodes(&nodeId, 1) == 0,
860  "failed to start node");
861 
862  CHECK(_restarter.waitClusterStarted(300) == 0,
863  "waitClusterStarted failed");
864 
865  {
866  int val = DumpStateOrd::DihMinTimeBetweenLCP;
867  CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0,
868  "failed to set error insert");
869  }
870  }
871 
872  return NDBT_OK;
873 }
874 
875 int stopOnError(F_ARGS){
876 
877  myRandom48Init((long)NdbTick_CurrentMillisecond());
878 
879  int randomId = myRandom48(_restarter.getNumDbNodes());
880  int nodeId = _restarter.getDbNodeId(randomId);
881 
882  do {
883  g_info << _restart->m_name << ": node = " << nodeId
884  << endl;
885 
886  CHECK(_restarter.waitClusterStarted(300) == 0,
887  "waitClusterStarted failed");
888 
889  int val = DumpStateOrd::NdbcntrTestStopOnError;
890  CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0,
891  "failed to set NdbcntrTestStopOnError");
892 
893  NdbSleep_SecSleep(3);
894 
895  CHECK(_restarter.waitClusterStarted(300) == 0,
896  "waitClusterStarted failed");
897  } while (false);
898 
899  return NDBT_OK;
900 }
901 
902 int getRandomNodeId(NdbRestarter& _restarter) {
903  myRandom48Init((long)NdbTick_CurrentMillisecond());
904  int randomId = myRandom48(_restarter.getNumDbNodes());
905  int nodeId = _restarter.getDbNodeId(randomId);
906 
907  return nodeId;
908 }