MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QmgrMain.cpp
1 /*
2  Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 
19 #define QMGR_C
20 #include "Qmgr.hpp"
21 #include <pc.hpp>
22 #include <NdbTick.h>
23 #include <signaldata/EventReport.hpp>
24 #include <signaldata/StartOrd.hpp>
25 #include <signaldata/CloseComReqConf.hpp>
26 #include <signaldata/PrepFailReqRef.hpp>
27 #include <signaldata/NodeFailRep.hpp>
28 #include <signaldata/ReadNodesConf.hpp>
29 #include <signaldata/NFCompleteRep.hpp>
30 #include <signaldata/CheckNodeGroups.hpp>
31 #include <signaldata/ArbitSignalData.hpp>
32 #include <signaldata/ApiRegSignalData.hpp>
33 #include <signaldata/ApiVersion.hpp>
34 #include <signaldata/BlockCommitOrd.hpp>
35 #include <signaldata/FailRep.hpp>
36 #include <signaldata/DisconnectRep.hpp>
37 #include <signaldata/ApiBroadcast.hpp>
38 #include <signaldata/Upgrade.hpp>
39 #include <signaldata/EnableCom.hpp>
40 #include <signaldata/RouteOrd.hpp>
41 #include <signaldata/NodePing.hpp>
42 #include <signaldata/DihRestart.hpp>
43 #include <ndb_version.h>
44 
45 #include <EventLogger.hpp>
46 extern EventLogger * g_eventLogger;
47 
48 //#define DEBUG_QMGR_START
49 #ifdef DEBUG_QMGR_START
50 #include <DebuggerNames.hpp>
51 #define DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
52 #define DEBUG_START(gsn, node, msg) DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
53 #define DEBUG_START2(gsn, rg, msg) { char nodes[255]; DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); }
54 #define DEBUG_START3(signal, msg) DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
55 #else
56 #define DEBUG(x)
57 #define DEBUG_START(gsn, node, msg)
58 #define DEBUG_START2(gsn, rg, msg)
59 #define DEBUG_START3(signal, msg)
60 #endif
61 
89 // Signal entries and statement blocks
90 /* 4 P R O G R A M */
91 /*******************************/
92 /* CMHEART_BEAT */
93 /*******************************/
94 void Qmgr::execCM_HEARTBEAT(Signal* signal)
95 {
96  NodeRecPtr hbNodePtr;
97  jamEntry();
98  hbNodePtr.i = signal->theData[0];
99  ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
100  setNodeInfo(hbNodePtr.i).m_heartbeat_cnt= 0;
101  return;
102 }//Qmgr::execCM_HEARTBEAT()
103 
104 /*******************************/
105 /* CM_NODEINFOREF */
106 /*******************************/
107 void Qmgr::execCM_NODEINFOREF(Signal* signal)
108 {
109  jamEntry();
110  systemErrorLab(signal, __LINE__);
111  return;
112 }//Qmgr::execCM_NODEINFOREF()
113 
114 /*******************************/
115 /* CONTINUEB */
116 /*******************************/
117 void Qmgr::execCONTINUEB(Signal* signal)
118 {
119  jamEntry();
120  const Uint32 tcontinuebType = signal->theData[0];
121  const Uint32 tdata0 = signal->theData[1];
122  const Uint32 tdata1 = signal->theData[2];
123  switch (tcontinuebType) {
124  case ZREGREQ_TIMELIMIT:
125  jam();
126  if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
127  jam();
128  return;
129  }//if
130  regreqTimeLimitLab(signal);
131  break;
132  case ZREGREQ_MASTER_TIMELIMIT:
133  jam();
134  if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
135  jam();
136  return;
137  }//if
138  //regreqMasterTimeLimitLab(signal);
139  failReportLab(signal, c_start.m_startNode, FailRep::ZSTART_IN_REGREQ, getOwnNodeId());
140  return;
141  break;
142  case ZTIMER_HANDLING:
143  jam();
144  timerHandlingLab(signal);
145  return;
146  break;
147  case ZARBIT_HANDLING:
148  jam();
149  runArbitThread(signal);
150  return;
151  break;
152  case ZSTART_FAILURE_LIMIT:{
153  if (cpresident != ZNIL)
154  {
155  jam();
156  return;
157  }
158  Uint64 now = NdbTick_CurrentMillisecond();
159 
160  if (now > (c_start_election_time + c_restartFailureTimeout))
161  {
162  jam();
163  BaseString tmp;
164  tmp.append("Shutting down node as total restart time exceeds "
165  " StartFailureTimeout as set in config file ");
166  if(c_restartFailureTimeout == (Uint32) ~0)
167  tmp.append(" 0 (inifinite)");
168  else
169  tmp.appfmt(" %d", c_restartFailureTimeout);
170 
171  progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
172  }
173  signal->theData[0] = ZSTART_FAILURE_LIMIT;
174  sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
175  return;
176  }
177  default:
178  jam();
179  // ZCOULD_NOT_OCCUR_ERROR;
180  systemErrorLab(signal, __LINE__);
181  return;
182  break;
183  }//switch
184  return;
185 }//Qmgr::execCONTINUEB()
186 
187 
188 void Qmgr::execDEBUG_SIG(Signal* signal)
189 {
190  NodeRecPtr debugNodePtr;
191  jamEntry();
192  debugNodePtr.i = signal->theData[0];
193  ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
194  return;
195 }//Qmgr::execDEBUG_SIG()
196 
197 /*******************************/
198 /* FAIL_REP */
199 /*******************************/
200 void Qmgr::execFAIL_REP(Signal* signal)
201 {
202  const FailRep * const failRep = (FailRep *)&signal->theData[0];
203  const NodeId failNodeId = failRep->failNodeId;
204  const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
205  Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
206  if (!failSource)
207  {
208  /* Failure source not included, use sender of signal as 'source' */
209  failSource = refToNode(signal->getSendersBlockRef());
210  }
211 
212  jamEntry();
213  failReportLab(signal, failNodeId, failCause, failSource);
214  return;
215 }//Qmgr::execFAIL_REP()
216 
217 /*******************************/
218 /* PRES_TOREQ */
219 /*******************************/
220 void Qmgr::execPRES_TOREQ(Signal* signal)
221 {
222  jamEntry();
223  BlockReference Tblockref = signal->theData[0];
224  signal->theData[0] = getOwnNodeId();
225  signal->theData[1] = ccommitFailureNr;
226  sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
227  return;
228 }//Qmgr::execPRES_TOREQ()
229 
230 void
231 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
232 {
233  jamEntry();
234 
235  const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
236 
237  Uint32 ref = req->senderRef;
238  Uint32 senderData = req->senderData;
239 
240  const ndb_mgm_configuration_iterator * p =
241  m_ctx.m_config.getOwnConfigIterator();
242  ndbrequire(p != 0);
243 
244  ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
245  conf->senderRef = reference();
246  conf->senderData = senderData;
247  sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
248  ReadConfigConf::SignalLength, JBB);
249 }
250 
251 void
252 Qmgr::execSTART_ORD(Signal* signal)
253 {
257  Uint64 now = NdbTick_CurrentMillisecond();
258  signal->theData[0] = ZTIMER_HANDLING;
259  signal->theData[1] = Uint32(now >> 32);
260  signal->theData[2] = Uint32(now);
261  sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
262 
263  NodeRecPtr nodePtr;
264  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
265  {
266  ptrAss(nodePtr, nodeRec);
267  nodePtr.p->ndynamicId = 0;
268  nodePtr.p->hbOrder = 0;
269  Uint32 cnt = 0;
270  Uint32 type = getNodeInfo(nodePtr.i).m_type;
271  switch(type){
272  case NodeInfo::DB:
273  jam();
274  nodePtr.p->phase = ZINIT;
275  c_definedNodes.set(nodePtr.i);
276  break;
277  case NodeInfo::API:
278  jam();
279  nodePtr.p->phase = ZAPI_INACTIVE;
280  break;
281  case NodeInfo::MGM:
282  jam();
286  nodePtr.p->phase = ZAPI_INACTIVE;
287  break;
288  default:
289  jam();
290  nodePtr.p->phase = ZAPI_INACTIVE;
291  }
292 
293  setNodeInfo(nodePtr.i).m_heartbeat_cnt = cnt;
294  nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
295  nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
296  nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
297  nodePtr.p->failState = NORMAL;
298  }//for
299 }
300 
301 /*
302 4.2 ADD NODE MODULE*/
303 /*##########################################################################*/
304 /*
305 4.2.1 STTOR */
311 /*******************************/
312 /* STTOR */
313 /*******************************/
314 void Qmgr::execSTTOR(Signal* signal)
315 {
316  jamEntry();
317 
318  switch(signal->theData[1]){
319  case 1:
320  initData(signal);
321  startphase1(signal);
322  recompute_version_info(NodeInfo::DB);
323  recompute_version_info(NodeInfo::API);
324  recompute_version_info(NodeInfo::MGM);
325  return;
326  case 7:
327  cactivateApiCheck = 1;
328  if (cpresident == getOwnNodeId())
329  {
330  switch(arbitRec.method){
331  case ArbitRec::DISABLED:
332  break;
333 
334  case ArbitRec::METHOD_EXTERNAL:
335  case ArbitRec::METHOD_DEFAULT:
340  jam();
341  handleArbitStart(signal);
342  break;
343  }
344  }
345  break;
346  case 8:{
351  c_allow_api_connect = 1;
352  NodeRecPtr nodePtr;
353  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
354  {
355  jam();
356  Uint32 type = getNodeInfo(nodePtr.i).m_type;
357  if (type != NodeInfo::API)
358  continue;
359 
360  ptrAss(nodePtr, nodeRec);
361  if (nodePtr.p->phase == ZAPI_INACTIVE)
362  {
363  jam();
364  setNodeInfo(nodePtr.i).m_heartbeat_cnt = 3;
365  nodePtr.p->phase = ZFAIL_CLOSING;
366  nodePtr.p->failState = NORMAL;
367  }
368  }
369  }
370  }
371 
372  sendSttorryLab(signal);
373  return;
374 }//Qmgr::execSTTOR()
375 
376 void Qmgr::sendSttorryLab(Signal* signal)
377 {
378 /****************************<*/
379 /*< STTORRY <*/
380 /****************************<*/
381  signal->theData[3] = 7;
382  signal->theData[4] = 8;
383  signal->theData[5] = 255;
384  sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 6, JBB);
385  return;
386 }//Qmgr::sendSttorryLab()
387 
388 void Qmgr::startphase1(Signal* signal)
389 {
390  jamEntry();
391 
392  NodeRecPtr nodePtr;
393  nodePtr.i = getOwnNodeId();
394  ptrAss(nodePtr, nodeRec);
395  nodePtr.p->phase = ZSTARTING;
396 
397  DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
398  req->senderRef = reference();
399  sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
400  DihRestartReq::SignalLength, JBB);
401  return;
402 }
403 
404 void
405 Qmgr::execDIH_RESTARTREF(Signal*signal)
406 {
407  jamEntry();
408 
409  const DihRestartRef * ref = CAST_CONSTPTR(DihRestartRef,
410  signal->getDataPtr());
411  c_start.m_latest_gci = 0;
412  c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
413  ref->no_nodegroup_mask);
414  execCM_INFOCONF(signal);
415 }
416 
417 void
418 Qmgr::execDIH_RESTARTCONF(Signal*signal)
419 {
420  jamEntry();
421 
422  const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
423  signal->getDataPtr());
424 
425  c_start.m_latest_gci = conf->latest_gci;
426  c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
427  conf->no_nodegroup_mask);
428  execCM_INFOCONF(signal);
429 }
430 
431 void Qmgr::setHbDelay(UintR aHbDelay)
432 {
433  NDB_TICKS now = NdbTick_CurrentMillisecond();
434  hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
435  hb_send_timer.reset(now);
436  hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
437  hb_check_timer.reset(now);
438 }
439 
440 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
441 {
442  NDB_TICKS now = NdbTick_CurrentMillisecond();
443  chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
444  hb_api_timer.setDelay(chbApiDelay);
445  hb_api_timer.reset(now);
446 }
447 
448 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
449 {
450  arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
451 }
452 
453 void Qmgr::setCCDelay(UintR aCCDelay)
454 {
455  NDB_TICKS now = NdbTick_CurrentMillisecond();
456  if (aCCDelay == 0)
457  {
458  /* Connectivity check disabled */
459  m_connectivity_check.m_enabled = false;
460  m_connectivity_check.m_timer.setDelay(0);
461  }
462  else
463  {
464  m_connectivity_check.m_enabled = true;
465  m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
466  m_connectivity_check.m_timer.reset(now);
467  }
468 }
469 
470 void Qmgr::execCONNECT_REP(Signal* signal)
471 {
472  jamEntry();
473  const Uint32 nodeId = signal->theData[0];
474 
475  if (ERROR_INSERTED(931))
476  {
477  jam();
478  ndbout_c("Discarding CONNECT_REP(%d)", nodeId);
479  infoEvent("Discarding CONNECT_REP(%d)", nodeId);
480  return;
481  }
482 
483  c_connectedNodes.set(nodeId);
484 
485  NodeRecPtr nodePtr;
486  nodePtr.i = nodeId;
487  ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
488  nodePtr.p->m_secret = 0;
489 
490  nodePtr.i = getOwnNodeId();
491  ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
492  NodeInfo nodeInfo = getNodeInfo(nodeId);
493  switch(nodePtr.p->phase){
494  case ZRUNNING:
495  if (nodeInfo.getType() == NodeInfo::DB)
496  {
497  ndbrequire(!c_clusterNodes.get(nodeId));
498  }
499  case ZSTARTING:
500  jam();
501  break;
502  case ZPREPARE_FAIL:
503  case ZFAIL_CLOSING:
504  jam();
505  return;
506  case ZAPI_ACTIVE:
507  case ZAPI_INACTIVE:
508  return;
509  case ZINIT:
510  ndbrequire(getNodeInfo(nodeId).m_type == NodeInfo::MGM);
511  break;
512  default:
513  ndbrequire(false);
514  }
515 
516  if (nodeInfo.getType() != NodeInfo::DB)
517  {
518  jam();
519  return;
520  }
521 
522  switch(c_start.m_gsn){
523  case GSN_CM_REGREQ:
524  jam();
525  sendCmRegReq(signal, nodeId);
526 
531  ndbrequire(nodePtr.p->phase == ZSTARTING);
532  ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
533  return;
534  case GSN_CM_NODEINFOREQ:
535  jam();
536 
537  if (c_start.m_nodes.isWaitingFor(nodeId))
538  {
539  jam();
540  ndbrequire(getOwnNodeId() != cpresident);
541  ndbrequire(nodePtr.p->phase == ZSTARTING);
542  sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
543  return;
544  }
545  return;
546  case GSN_CM_NODEINFOCONF:{
547  jam();
548 
549  ndbrequire(getOwnNodeId() != cpresident);
550  ndbrequire(nodePtr.p->phase == ZRUNNING);
551  if (c_start.m_nodes.isWaitingFor(nodeId))
552  {
553  jam();
554  c_start.m_nodes.clearWaitingFor(nodeId);
555  c_start.m_gsn = RNIL;
556 
557  NodeRecPtr addNodePtr;
558  addNodePtr.i = nodeId;
559  ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
560  cmAddPrepare(signal, addNodePtr, nodePtr.p);
561  return;
562  }
563  }
564  default:
565  (void)1;
566  }
567 
568  ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
569  ndbrequire(!c_readnodes_nodes.get(nodeId));
570  c_readnodes_nodes.set(nodeId);
571  signal->theData[0] = reference();
572  sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
573  return;
574 }//Qmgr::execCONNECT_REP()
575 
576 void
577 Qmgr::execREAD_NODESCONF(Signal* signal)
578 {
579  jamEntry();
580  check_readnodes_reply(signal,
581  refToNode(signal->getSendersBlockRef()),
582  GSN_READ_NODESCONF);
583 }
584 
585 void
586 Qmgr::execREAD_NODESREF(Signal* signal)
587 {
588  jamEntry();
589  check_readnodes_reply(signal,
590  refToNode(signal->getSendersBlockRef()),
591  GSN_READ_NODESREF);
592 }
593 
594 /*******************************/
595 /* CM_INFOCONF */
596 /*******************************/
597 void Qmgr::execCM_INFOCONF(Signal* signal)
598 {
602  signal->theData[0] = 0; // no answer
603  signal->theData[1] = 0; // no id
604  signal->theData[2] = NodeInfo::DB;
605  sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB);
606 
607  cpresident = ZNIL;
608  cpresidentAlive = ZFALSE;
609  c_start_election_time = NdbTick_CurrentMillisecond();
610 
611  signal->theData[0] = ZSTART_FAILURE_LIMIT;
612  sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
613 
614  cmInfoconf010Lab(signal);
615 
616  return;
617 }//Qmgr::execCM_INFOCONF()
618 
619 Uint32 g_start_type = 0;
620 NdbNodeBitmask g_nowait_nodes; // Set by clo
621 
622 void Qmgr::cmInfoconf010Lab(Signal* signal)
623 {
624  c_start.m_startKey = 0;
625  c_start.m_startNode = getOwnNodeId();
626  c_start.m_nodes.clearWaitingFor();
627  c_start.m_gsn = GSN_CM_REGREQ;
628  c_start.m_starting_nodes.clear();
629  c_start.m_starting_nodes_w_log.clear();
630  c_start.m_regReqReqSent = 0;
631  c_start.m_regReqReqRecv = 0;
632  c_start.m_skip_nodes = g_nowait_nodes;
633  c_start.m_skip_nodes.bitAND(c_definedNodes);
634  c_start.m_start_type = g_start_type;
635 
636  NodeRecPtr nodePtr;
637  cnoOfNodes = 0;
638  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
639  jam();
640  ptrAss(nodePtr, nodeRec);
641 
642  if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
643  continue;
644 
645  c_start.m_nodes.setWaitingFor(nodePtr.i);
646  cnoOfNodes++;
647 
648  if(!c_connectedNodes.get(nodePtr.i))
649  continue;
650 
651  sendCmRegReq(signal, nodePtr.i);
652  }
653 
654  //----------------------------------------
655  /* Wait for a while. When it returns */
656  /* we will check if we got any CM_REGREF*/
657  /* or CM_REGREQ (lower nodeid than our */
658  /* own). */
659  //----------------------------------------
660  signal->theData[0] = ZREGREQ_TIMELIMIT;
661  signal->theData[1] = c_start.m_startKey;
662  signal->theData[2] = c_start.m_startNode;
663  sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
664 
665  creadyDistCom = ZTRUE;
666  return;
667 }//Qmgr::cmInfoconf010Lab()
668 
669 void
670 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
671  CmRegReq * req = (CmRegReq *)&signal->theData[0];
672  req->blockRef = reference();
673  req->nodeId = getOwnNodeId();
674  req->version = NDB_VERSION;
675  req->mysql_version = NDB_MYSQL_VERSION_D;
676  req->latest_gci = c_start.m_latest_gci;
677  req->start_type = c_start.m_start_type;
678  c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes);
679  const Uint32 ref = calcQmgrBlockRef(nodeId);
680  sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
681  DEBUG_START(GSN_CM_REGREQ, nodeId, "");
682 
683  c_start.m_regReqReqSent++;
684 }
685 
686 /*
687 4.4.11 CM_REGREQ */
720 /*******************************/
721 /* CM_REGREQ */
722 /*******************************/
723 static
724 int
725 check_start_type(Uint32 starting, Uint32 own)
726 {
727  if (starting == (1 << NodeState::ST_INITIAL_START) &&
728  ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
729  {
730  return 1;
731  }
732  return 0;
733 }
734 
735 void Qmgr::execCM_REGREQ(Signal* signal)
736 {
737  DEBUG_START3(signal, "");
738 
739  NodeRecPtr addNodePtr;
740  jamEntry();
741 
742  CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
743  const BlockReference Tblockref = cmRegReq->blockRef;
744  const Uint32 startingVersion = cmRegReq->version;
745  Uint32 startingMysqlVersion = cmRegReq->mysql_version;
746  addNodePtr.i = cmRegReq->nodeId;
747  Uint32 gci = 1;
748  Uint32 start_type = ~0;
749  NdbNodeBitmask skip_nodes;
750 
751  if (!c_connectedNodes.get(cmRegReq->nodeId))
752  {
753  jam();
754 
763  g_eventLogger->info("discarding CM_REGREQ from %u "
764  "as we're not yet connected (isNdbMt: %u)",
765  cmRegReq->nodeId,
766  (unsigned)isNdbMt());
767 
768  ndbrequire(isNdbMt());
769  return;
770  }
771 
772  if (signal->getLength() == CmRegReq::SignalLength)
773  {
774  jam();
775  gci = cmRegReq->latest_gci;
776  start_type = cmRegReq->start_type;
777  skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
778  }
779 
780  if (startingVersion < NDBD_SPLIT_VERSION)
781  {
782  startingMysqlVersion = 0;
783  }
784 
785  if (creadyDistCom == ZFALSE) {
786  jam();
787  /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
788  return;
789  }//if
790 
791  if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
792  jam();
793  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
794  return;
795  }
796 
797  if (!ndb_check_micro_gcp(startingVersion))
798  {
799  jam();
800  infoEvent("Connection from node %u refused as it's not micro GCP enabled",
801  addNodePtr.i);
802  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
803  return;
804  }
805 
806  if (!ndb_pnr(startingVersion))
807  {
808  jam();
809  infoEvent("Connection from node %u refused as it's not does not support "
810  "parallel node recovery",
811  addNodePtr.i);
812  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
813  return;
814  }
815 
816  if (!ndb_check_hb_order_version(startingVersion) &&
817  m_hb_order_config_used)
818  {
819  jam();
820  infoEvent("Connection from node %u refused as it does not support "
821  "user-defined HeartbeatOrder",
822  addNodePtr.i);
823  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
824  return;
825  }
826 
827  if (m_connectivity_check.m_enabled &&
828  !ndbd_connectivity_check(startingVersion))
829  {
830  jam();
831  infoEvent("Connection from node %u refused as it does not support "
832  "ConnectCheckIntervalDelay",
833  addNodePtr.i);
834  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
835  return;
836  }
837 
838  if (check_start_type(start_type, c_start.m_start_type))
839  {
840  jam();
841  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
842  return;
843  }
844 
845  if (cpresident != getOwnNodeId())
846  {
847  jam();
848 
849  if (cpresident == ZNIL)
850  {
851  /***
852  * We don't know the president.
853  * If the node to be added has lower node id
854  * than our president cancidate. Set it as
855  * candidate
856  */
857  jam();
858  if (gci > c_start.m_president_candidate_gci ||
859  (gci == c_start.m_president_candidate_gci &&
860  addNodePtr.i < c_start.m_president_candidate))
861  {
862  jam();
863  c_start.m_president_candidate = addNodePtr.i;
864  c_start.m_president_candidate_gci = gci;
865  }
866  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
867  return;
868  }
869 
875  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT);
876  return;
877  }//if
878 
879  if (c_start.m_startNode != 0)
880  {
881  jam();
885  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT);
886  return;
887  }//if
888 
889  if (ctoStatus == Q_ACTIVE)
890  {
891  jam();
895  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES);
896  return;
897  }//if
898 
899  if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
900  {
901  jam();
905  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG);
906  return;
907  }
908 
909  if (getNodeState().getSingleUserMode())
910  {
916  // handle rolling upgrade
917  {
918  unsigned int get_major = getMajor(startingVersion);
919  unsigned int get_minor = getMinor(startingVersion);
920  unsigned int get_build = getBuild(startingVersion);
921 
922  if (startingVersion < NDBD_QMGR_SINGLEUSER_VERSION_5) {
923  jam();
924 
925  infoEvent("QMGR: detect upgrade: new node %u old version %u.%u.%u",
926  (unsigned int)addNodePtr.i, get_major, get_minor, get_build);
931  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
932  } else {
933  jam();
934 
935  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE);
936  }//if
937  }
938 
939  return;
940  }//if
941 
942  ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
943  Phase phase = addNodePtr.p->phase;
944  if (phase != ZINIT)
945  {
946  jam();
947  DEBUG("phase = " << phase);
948  sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
949  return;
950  }
951 
952  jam();
966  c_start.m_startKey++;
967  c_start.m_startNode = addNodePtr.i;
968 
972  UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
973  TdynId |= (addNodePtr.p->hbOrder << 16);
974  setNodeInfo(addNodePtr.i).m_version = startingVersion;
975  setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
976  recompute_version_info(NodeInfo::DB, startingVersion);
977  addNodePtr.p->ndynamicId = TdynId;
978 
982  CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
983  cmRegConf->presidentBlockRef = reference();
984  cmRegConf->presidentNodeId = getOwnNodeId();
985  cmRegConf->presidentVersion = getNodeInfo(getOwnNodeId()).m_version;
986  cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
987  cmRegConf->dynamicId = TdynId;
988  c_clusterNodes.copyto(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
989  sendSignal(Tblockref, GSN_CM_REGCONF, signal,
990  CmRegConf::SignalLength, JBA);
991  DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
992 
996  c_start.m_nodes = c_clusterNodes;
997  c_start.m_nodes.setWaitingFor(addNodePtr.i);
998  c_start.m_gsn = GSN_CM_ADD;
999 
1000  NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1001  CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1002  cmAdd->requestType = CmAdd::Prepare;
1003  cmAdd->startingNodeId = addNodePtr.i;
1004  cmAdd->startingVersion = startingVersion;
1005  cmAdd->startingMysqlVersion = startingMysqlVersion;
1006  sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1007  DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1008 
1012  return;
1013  signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1014  signal->theData[1] = c_start.m_startKey;
1015  sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1016 
1017  return;
1018 }//Qmgr::execCM_REGREQ()
1019 
1020 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1021  CmRegRef::ErrorCode Terror)
1022 {
1023  CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1024  ref->blockRef = reference();
1025  ref->nodeId = getOwnNodeId();
1026  ref->errorCode = Terror;
1027  ref->presidentCandidate =
1028  (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1029  ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1030  ref->latest_gci = c_start.m_latest_gci;
1031  ref->start_type = c_start.m_start_type;
1032  c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes);
1033  sendSignal(TBRef, GSN_CM_REGREF, signal,
1034  CmRegRef::SignalLength, JBB);
1035  DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1036  return;
1037 }//Qmgr::sendCmRegrefLab()
1038 
1039 /*
1040 4.4.11 CM_REGCONF */
1048 /*******************************/
1049 /* CM_REGCONF */
1050 /*******************************/
1051 void Qmgr::execCM_REGCONF(Signal* signal)
1052 {
1053  DEBUG_START3(signal, "");
1054 
1055  NodeRecPtr myNodePtr;
1056  NodeRecPtr nodePtr;
1057  jamEntry();
1058 
1059  const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1060 
1061  if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1062  jam();
1063  char buf[128];
1064  BaseString::snprintf(buf,sizeof(buf),
1065  "incompatible version own=0x%x other=0x%x, "
1066  " shutting down",
1067  NDB_VERSION, cmRegConf->presidentVersion);
1068  progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1069  return;
1070  }
1071 
1072  if (!ndb_check_hb_order_version(cmRegConf->presidentVersion) &&
1073  m_hb_order_config_used) {
1074  jam();
1075  char buf[128];
1076  BaseString::snprintf(buf,sizeof(buf),
1077  "incompatible version own=0x%x other=0x%x, "
1078  "due to user-defined HeartbeatOrder, shutting down",
1079  NDB_VERSION, cmRegConf->presidentVersion);
1080  progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1081  return;
1082  }
1083 
1084  if (m_connectivity_check.m_enabled &&
1085  !ndbd_connectivity_check(cmRegConf->presidentVersion))
1086  {
1087  jam();
1088  m_connectivity_check.m_enabled = false;
1089  ndbout_c("Disabling ConnectCheckIntervalDelay as president "
1090  " does not support it");
1091  infoEvent("Disabling ConnectCheckIntervalDelay as president "
1092  " does not support it");
1093  }
1094 
1095  myNodePtr.i = getOwnNodeId();
1096  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1097 
1098  ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1099  ndbrequire(myNodePtr.p->phase == ZSTARTING);
1100 
1101  cpdistref = cmRegConf->presidentBlockRef;
1102  cpresident = cmRegConf->presidentNodeId;
1103  UintR TdynamicId = cmRegConf->dynamicId;
1104  c_maxDynamicId = TdynamicId & 0xFFFF;
1105  c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1106 
1107  myNodePtr.p->ndynamicId = TdynamicId;
1108 
1109  // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1110  setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1111 
1112 /*--------------------------------------------------------------*/
1113 // Send this as an EVENT REPORT to inform about hearing about
1114 // other NDB node proclaiming to be president.
1115 /*--------------------------------------------------------------*/
1116  signal->theData[0] = NDB_LE_CM_REGCONF;
1117  signal->theData[1] = getOwnNodeId();
1118  signal->theData[2] = cpresident;
1119  signal->theData[3] = TdynamicId;
1120  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1121 
1122  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1123  jam();
1124  if (c_clusterNodes.get(nodePtr.i)){
1125  jam();
1126  ptrAss(nodePtr, nodeRec);
1127 
1128  ndbrequire(nodePtr.p->phase == ZINIT);
1129  nodePtr.p->phase = ZRUNNING;
1130 
1131  if(c_connectedNodes.get(nodePtr.i)){
1132  jam();
1133  sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1134  }
1135  }
1136  }
1137 
1138  c_start.m_gsn = GSN_CM_NODEINFOREQ;
1139  c_start.m_nodes = c_clusterNodes;
1140 
1141  if (ERROR_INSERTED(937))
1142  {
1143  CLEAR_ERROR_INSERT_VALUE;
1144  signal->theData[0] = 9999;
1145  sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1146  }
1147 
1148  return;
1149 }//Qmgr::execCM_REGCONF()
1150 
1151 void
1152 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1153 {
1154  NodeRecPtr myNodePtr;
1155  myNodePtr.i = getOwnNodeId();
1156  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1157 
1158  NodeRecPtr nodePtr;
1159  nodePtr.i = nodeId;
1160  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1161 
1162  ndbrequire(c_readnodes_nodes.get(nodeId));
1163  ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1164  if (gsn == GSN_READ_NODESREF)
1165  {
1166  jam();
1167 retry:
1168  signal->theData[0] = reference();
1169  sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
1170  return;
1171  }
1172 
1173  if (conf->masterNodeId == ZNIL)
1174  {
1175  jam();
1176  goto retry;
1177  }
1178 
1179  Uint32 president = conf->masterNodeId;
1180  if (president == cpresident)
1181  {
1182  jam();
1183  c_readnodes_nodes.clear(nodeId);
1184  return;
1185  }
1186 
1187  char buf[255];
1188  BaseString::snprintf(buf, sizeof(buf),
1189  "check StartPartialTimeout, "
1190  "node %d thinks %d is president, "
1191  "I think president is: %d",
1192  nodeId, president, cpresident);
1193 
1194  ndbout_c("%s", buf);
1195  CRASH_INSERTION(933);
1196 
1197  if (getNodeState().startLevel == NodeState::SL_STARTED)
1198  {
1199  jam();
1200  NdbNodeBitmask part;
1201  part.assign(NdbNodeBitmask::Size, conf->clusterNodes);
1202  FailRep* rep = (FailRep*)signal->getDataPtrSend();
1203  rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1204  rep->partitioned.president = cpresident;
1205  c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partitioned.partition);
1206  rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1207  Uint32 ref = calcQmgrBlockRef(nodeId);
1208  Uint32 i = 0;
1209  /* Send source of event info if a node supports it */
1210  Uint32 length = FailRep::OrigSignalLength + FailRep::PartitionedExtraLength;
1211  while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1212  {
1213  if (i == nodeId)
1214  continue;
1215  rep->failNodeId = i;
1216  bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(i)).m_version);
1217  sendSignal(ref, GSN_FAIL_REP, signal,
1218  length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1219  JBA);
1220  }
1221  rep->failNodeId = nodeId;
1222  bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(nodeId)).m_version);
1223 
1224  sendSignal(ref, GSN_FAIL_REP, signal,
1225  length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1226  JBB);
1227  return;
1228  }
1229 
1230  CRASH_INSERTION(932);
1231  CRASH_INSERTION(938);
1232 
1233  progError(__LINE__,
1234  NDBD_EXIT_PARTITIONED_SHUTDOWN,
1235  buf);
1236 
1237  ndbrequire(false);
1238 }
1239 
1240 void
1241 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1242  CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1243  req->nodeId = getOwnNodeId();
1244  req->dynamicId = self->ndynamicId;
1245  req->version = getNodeInfo(getOwnNodeId()).m_version;
1246  req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1247  req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1248  const Uint32 ref = calcQmgrBlockRef(nodeId);
1249  sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1250  DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1251 }
1252 
1253 /*
1254 4.4.11 CM_REGREF */
1268 /*******************************/
1269 /* CM_REGREF */
1270 /*******************************/
1271 static
1272 const char *
1273 get_start_type_string(Uint32 st)
1274 {
1275  static char buf[256];
1276 
1277  if (st == 0)
1278  {
1279  return "<ANY>";
1280  }
1281  else
1282  {
1283  buf[0] = 0;
1284  for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1285  {
1286  if (st & (1 << i))
1287  {
1288  if (buf[0])
1289  strcat(buf, "/");
1290  switch(i){
1291  case NodeState::ST_INITIAL_START:
1292  strcat(buf, "inital start");
1293  break;
1294  case NodeState::ST_SYSTEM_RESTART:
1295  strcat(buf, "system restart");
1296  break;
1297  case NodeState::ST_NODE_RESTART:
1298  strcat(buf, "node restart");
1299  break;
1300  case NodeState::ST_INITIAL_NODE_RESTART:
1301  strcat(buf, "initial node restart");
1302  break;
1303  }
1304  }
1305  }
1306  return buf;
1307  }
1308 }
1309 
1310 void Qmgr::execCM_REGREF(Signal* signal)
1311 {
1312  jamEntry();
1313 
1314  CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1315  UintR TaddNodeno = ref->nodeId;
1316  UintR TrefuseReason = ref->errorCode;
1317  Uint32 candidate = ref->presidentCandidate;
1318  Uint32 node_gci = 1;
1319  Uint32 candidate_gci = 1;
1320  Uint32 start_type = ~0;
1321  NdbNodeBitmask skip_nodes;
1322  DEBUG_START3(signal, TrefuseReason);
1323 
1324  if (signal->getLength() == CmRegRef::SignalLength)
1325  {
1326  jam();
1327  node_gci = ref->latest_gci;
1328  candidate_gci = ref->candidate_latest_gci;
1329  start_type = ref->start_type;
1330  skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes);
1331  }
1332 
1333  c_start.m_regReqReqRecv++;
1334 
1335  // Ignore block reference in data[0]
1336 
1337  if(candidate != c_start.m_president_candidate)
1338  {
1339  jam();
1340  c_start.m_regReqReqRecv = ~0;
1341  }
1342 
1343  c_start.m_starting_nodes.set(TaddNodeno);
1344  if (node_gci)
1345  {
1346  jam();
1347  c_start.m_starting_nodes_w_log.set(TaddNodeno);
1348  }
1349  c_start.m_node_gci[TaddNodeno] = node_gci;
1350 
1351  skip_nodes.bitAND(c_definedNodes);
1352  c_start.m_skip_nodes.bitOR(skip_nodes);
1353 
1354  // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
1355  setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1356 
1357  char buf[100];
1358  switch (TrefuseReason) {
1359  case CmRegRef::ZINCOMPATIBLE_VERSION:
1360  jam();
1361  progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
1362  "incompatible version, "
1363  "connection refused by running ndb node");
1364  case CmRegRef::ZINCOMPATIBLE_START_TYPE:
1365  jam();
1366  BaseString::snprintf(buf, sizeof(buf),
1367  "incompatible start type detected: node %d"
1368  " reports %s(%d) my start type: %s(%d)",
1369  TaddNodeno,
1370  get_start_type_string(start_type), start_type,
1371  get_start_type_string(c_start.m_start_type),
1372  c_start.m_start_type);
1373  progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
1374  break;
1375  case CmRegRef::ZBUSY:
1376  case CmRegRef::ZBUSY_TO_PRES:
1377  case CmRegRef::ZBUSY_PRESIDENT:
1378  jam();
1379  cpresidentAlive = ZTRUE;
1380  signal->theData[3] = 0;
1381  break;
1382  case CmRegRef::ZNOT_IN_CFG:
1383  jam();
1384  progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
1385  break;
1386  case CmRegRef::ZNOT_DEAD:
1387  jam();
1388  progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
1389  break;
1390  case CmRegRef::ZSINGLE_USER_MODE:
1391  jam();
1392  progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
1393  break;
1399  case CmRegRef::ZGENERIC:
1400  jam();
1401  progError(__LINE__, NDBD_EXIT_GENERIC);
1402  break;
1403  case CmRegRef::ZELECTION:
1404  jam();
1405  if (candidate_gci > c_start.m_president_candidate_gci ||
1406  (candidate_gci == c_start.m_president_candidate_gci &&
1407  candidate < c_start.m_president_candidate))
1408  {
1409  jam();
1410  //----------------------------------------
1411  /* We may already have a candidate */
1412  /* choose the lowest nodeno */
1413  //----------------------------------------
1414  signal->theData[3] = 2;
1415  c_start.m_president_candidate = candidate;
1416  c_start.m_president_candidate_gci = candidate_gci;
1417  } else {
1418  signal->theData[3] = 4;
1419  }//if
1420  break;
1421  case CmRegRef::ZNOT_PRESIDENT:
1422  jam();
1423  cpresidentAlive = ZTRUE;
1424  signal->theData[3] = 3;
1425  break;
1426  default:
1427  jam();
1428  signal->theData[3] = 5;
1429  /*empty*/;
1430  break;
1431  }//switch
1432 /*--------------------------------------------------------------*/
1433 // Send this as an EVENT REPORT to inform about hearing about
1434 // other NDB node proclaiming not to be president.
1435 /*--------------------------------------------------------------*/
1436  signal->theData[0] = NDB_LE_CM_REGREF;
1437  signal->theData[1] = getOwnNodeId();
1438  signal->theData[2] = TaddNodeno;
1439 //-----------------------------------------
1440 // signal->theData[3] filled in above
1441 //-----------------------------------------
1442  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1443 
1444  if(cpresidentAlive == ZTRUE)
1445  {
1446  jam();
1447  DEBUG("cpresidentAlive");
1448  return;
1449  }
1450 
1451  if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
1452  {
1453  jam();
1454  DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
1455  return;
1456  }
1457 
1458  if(c_start.m_president_candidate != getOwnNodeId())
1459  {
1460  jam();
1461  DEBUG("i'm not the candidate");
1462  return;
1463  }
1464 
1468  if(check_startup(signal))
1469  {
1470  jam();
1471  electionWon(signal);
1472  }
1473 
1474  return;
1475 }//Qmgr::execCM_REGREF()
1476 
1477 Uint32
1478 Qmgr::check_startup(Signal* signal)
1479 {
1480  Uint64 now = NdbTick_CurrentMillisecond();
1481  Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout;
1482  Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout;
1483  Uint64 no_nodegroup_timeout = c_start_election_time +
1484  c_restartNoNodegroupTimeout;
1485 
1486  const bool no_nodegroup_active =
1487  (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
1488  (! c_start.m_no_nodegroup_nodes.isclear());
1489 
1493  NdbNodeBitmask tmp;
1494  tmp.bitOR(c_start.m_skip_nodes);
1495  tmp.bitOR(c_start.m_starting_nodes);
1496 
1497  NdbNodeBitmask wait;
1498  wait.assign(c_definedNodes);
1499  wait.bitANDC(tmp);
1500 
1501  Uint32 retVal = 0;
1502  Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
1503  NdbNodeBitmask report_mask;
1504 
1505  if ((c_start.m_latest_gci == 0) ||
1506  (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
1507  {
1508  if (tmp.equal(c_definedNodes))
1509  {
1510  jam();
1511  signal->theData[1] = 0x8000;
1512  report_mask.assign(c_definedNodes);
1513  report_mask.bitANDC(c_start.m_starting_nodes);
1514  retVal = 1;
1515  goto start_report;
1516  }
1517  else if (no_nodegroup_active)
1518  {
1519  if (now < no_nodegroup_timeout)
1520  {
1521  signal->theData[1] = 6;
1522  signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1523  report_mask.assign(wait);
1524  retVal = 0;
1525  goto start_report;
1526  }
1527  tmp.bitOR(c_start.m_no_nodegroup_nodes);
1528  if (tmp.equal(c_definedNodes))
1529  {
1530  signal->theData[1] = 0x8000;
1531  report_mask.assign(c_definedNodes);
1532  report_mask.bitANDC(c_start.m_starting_nodes);
1533  retVal = 1;
1534  goto start_report;
1535  }
1536  else
1537  {
1538  jam();
1539  signal->theData[1] = 1;
1540  signal->theData[2] = ~0;
1541  report_mask.assign(wait);
1542  retVal = 0;
1543  goto start_report;
1544  }
1545  }
1546  else
1547  {
1548  jam();
1549  signal->theData[1] = 1;
1550  signal->theData[2] = ~0;
1551  report_mask.assign(wait);
1552  retVal = 0;
1553  goto start_report;
1554  }
1555  }
1556 
1557  if (now >= no_nodegroup_timeout)
1558  {
1559  tmp.bitOR(c_start.m_no_nodegroup_nodes);
1560  }
1561 
1562  {
1563  const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
1564  CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
1565 
1566  {
1570  NdbNodeBitmask check;
1571  check.assign(c_definedNodes);
1572  check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
1573  check.bitOR(c_start.m_starting_nodes_w_log);
1574 
1575  sd->blockRef = reference();
1576  sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1577  sd->mask = check;
1578  EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1579  CheckNodeGroups::SignalLength);
1580 
1581  if (sd->output == CheckNodeGroups::Lose)
1582  {
1583  jam();
1584  goto missing_nodegroup;
1585  }
1586  }
1587 
1588  sd->blockRef = reference();
1589  sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1590  sd->mask = c_start.m_starting_nodes;
1591  EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1592  CheckNodeGroups::SignalLength);
1593 
1594  const Uint32 result = sd->output;
1595 
1596  sd->blockRef = reference();
1597  sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1598  sd->mask = c_start.m_starting_nodes_w_log;
1599  EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1600  CheckNodeGroups::SignalLength);
1601 
1602  const Uint32 result_w_log = sd->output;
1603 
1604  if (tmp.equal(c_definedNodes))
1605  {
1610  jam();
1611  switch(result_w_log){
1612  case CheckNodeGroups::Lose:
1613  {
1614  jam();
1615  goto missing_nodegroup;
1616  }
1617  case CheckNodeGroups::Win:
1618  signal->theData[1] = all ? 0x8001 : 0x8002;
1619  report_mask.assign(c_definedNodes);
1620  report_mask.bitANDC(c_start.m_starting_nodes);
1621  retVal = 1;
1622  goto check_log;
1623  case CheckNodeGroups::Partitioning:
1624  ndbrequire(result != CheckNodeGroups::Lose);
1625  signal->theData[1] =
1626  all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1627  report_mask.assign(c_definedNodes);
1628  report_mask.bitANDC(c_start.m_starting_nodes);
1629  retVal = 1;
1630  goto check_log;
1631  }
1632  }
1633 
1634  if (now < partial_timeout)
1635  {
1636  jam();
1637 
1638  signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
1639  signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
1640  report_mask.assign(wait);
1641  retVal = 0;
1642 
1643  if (no_nodegroup_active && now < no_nodegroup_timeout)
1644  {
1645  signal->theData[1] = 7;
1646  signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1647  }
1648  else if (no_nodegroup_active && now >= no_nodegroup_timeout)
1649  {
1650  report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
1651  }
1652 
1653  goto start_report;
1654  }
1655 
1659  switch(result_w_log){
1660  case CheckNodeGroups::Lose:
1661  jam();
1662  goto missing_nodegroup;
1663  case CheckNodeGroups::Partitioning:
1664  if (now < partitioned_timeout && result != CheckNodeGroups::Win)
1665  {
1666  goto missinglog;
1667  }
1668  // Fall through...
1669  case CheckNodeGroups::Win:
1670  signal->theData[1] =
1671  all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1672  report_mask.assign(c_definedNodes);
1673  report_mask.bitANDC(c_start.m_starting_nodes);
1674  retVal = 2;
1675  goto check_log;
1676  }
1677  }
1678  ndbrequire(false);
1679 
1680 check_log:
1681  jam();
1682  {
1683  Uint32 save[4+4*NdbNodeBitmask::Size];
1684  memcpy(save, signal->theData, sizeof(save));
1685 
1686  DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
1687  req->senderRef = 0;
1688  c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
1689  memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
1690  EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
1691  DihRestartReq::CheckLength);
1692 
1693  incompleteng = signal->theData[0];
1694  memcpy(signal->theData, save, sizeof(save));
1695 
1696  if (incompleteng != MAX_NDB_NODES)
1697  {
1698  jam();
1699  if (retVal == 1)
1700  {
1701  jam();
1702  goto incomplete_log;
1703  }
1704  else if (retVal == 2)
1705  {
1706  if (now <= partitioned_timeout)
1707  {
1708  jam();
1709  goto missinglog;
1710  }
1711  else
1712  {
1713  goto incomplete_log;
1714  }
1715  }
1716  ndbrequire(false);
1717  }
1718  }
1719  goto start_report;
1720 
1721 missinglog:
1722  signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
1723  signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
1724  report_mask.assign(c_definedNodes);
1725  report_mask.bitANDC(c_start.m_starting_nodes);
1726  retVal = 0;
1727  goto start_report;
1728 
1729 start_report:
1730  jam();
1731  {
1732  Uint32 sz = NdbNodeBitmask::Size;
1733  signal->theData[0] = NDB_LE_StartReport;
1734  signal->theData[3] = sz;
1735  Uint32* ptr = signal->theData+4;
1736  c_definedNodes.copyto(sz, ptr); ptr += sz;
1737  c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
1738  c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
1739  report_mask.copyto(sz, ptr); ptr+= sz;
1740  c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
1741  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
1742  4+5*NdbNodeBitmask::Size, JBB);
1743  }
1744  return retVal;
1745 
1746 missing_nodegroup:
1747  jam();
1748  {
1749  char buf[100], mask1[100], mask2[100];
1750  c_start.m_starting_nodes.getText(mask1);
1751  tmp.assign(c_start.m_starting_nodes);
1752  tmp.bitANDC(c_start.m_starting_nodes_w_log);
1753  tmp.getText(mask2);
1754  BaseString::snprintf(buf, sizeof(buf),
1755  "Unable to start missing node group! "
1756  " starting: %s (missing fs for: %s)",
1757  mask1, mask2);
1758  progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1759  return 0; // Deadcode
1760  }
1761 
1762 incomplete_log:
1763  jam();
1764  {
1765  char buf[100], mask1[100];
1766  c_start.m_starting_nodes.getText(mask1);
1767  BaseString::snprintf(buf, sizeof(buf),
1768  "Incomplete log for node group: %d! "
1769  " starting nodes: %s",
1770  incompleteng, mask1);
1771  progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1772  return 0; // Deadcode
1773  }
1774 }
1775 
1776 void
1777 Qmgr::electionWon(Signal* signal){
1778  NodeRecPtr myNodePtr;
1779  cpresident = getOwnNodeId(); /* This node becomes president. */
1780  myNodePtr.i = getOwnNodeId();
1781  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1782 
1783  myNodePtr.p->phase = ZRUNNING;
1784 
1785  cpdistref = reference();
1786  cneighbourl = ZNIL;
1787  cneighbourh = ZNIL;
1788  myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
1789  c_maxDynamicId = 1;
1790  c_clusterNodes.clear();
1791  c_clusterNodes.set(getOwnNodeId());
1792 
1793  cpresidentAlive = ZTRUE;
1794  c_start_election_time = ~0;
1795  c_start.reset();
1796 
1797  signal->theData[0] = NDB_LE_CM_REGCONF;
1798  signal->theData[1] = getOwnNodeId();
1799  signal->theData[2] = cpresident;
1800  signal->theData[3] = myNodePtr.p->ndynamicId;
1801  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1802 
1803  c_start.m_starting_nodes.clear(getOwnNodeId());
1804  if (c_start.m_starting_nodes.isclear())
1805  {
1806  jam();
1807  sendSttorryLab(signal);
1808  }
1809 }
1810 
1811 /*
1812 4.4.11 CONTINUEB */
1813 /*--------------------------------------------------------------------------*/
1814 /* */
1815 /*--------------------------------------------------------------------------*/
1816 /****************************>---------------------------------------------*/
1817 /* CONTINUEB > SENDER: Own block, Own node */
1818 /****************************>-------+INPUT : TCONTINUEB_TYPE */
1819 /*--------------------------------------------------------------*/
1820 void Qmgr::regreqTimeLimitLab(Signal* signal)
1821 {
1822  if(cpresident == ZNIL)
1823  {
1824  if (c_start.m_president_candidate == ZNIL)
1825  {
1826  jam();
1827  c_start.m_president_candidate = getOwnNodeId();
1828  }
1829 
1830  cmInfoconf010Lab(signal);
1831  }
1832 }//Qmgr::regreqTimelimitLab()
1833 
1842 /*******************************/
1843 /* CM_NODEINFOCONF */
1844 /*******************************/
1845 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
1846 {
1847  DEBUG_START3(signal, "");
1848 
1849  jamEntry();
1850 
1851  CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
1852 
1853  const Uint32 nodeId = conf->nodeId;
1854  const Uint32 dynamicId = conf->dynamicId;
1855  const Uint32 version = conf->version;
1856  Uint32 mysql_version = conf->mysql_version;
1857  Uint32 lqh_workers = conf->lqh_workers;
1858  if (version < NDBD_SPLIT_VERSION)
1859  {
1860  jam();
1861  mysql_version = 0;
1862  }
1863  if (version < NDBD_MT_LQH_VERSION)
1864  {
1865  jam();
1866  lqh_workers = 0;
1867  }
1868 
1869  NodeRecPtr nodePtr;
1870  nodePtr.i = getOwnNodeId();
1871  ptrAss(nodePtr, nodeRec);
1872  ndbrequire(nodePtr.p->phase == ZSTARTING);
1873  ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
1874  c_start.m_nodes.clearWaitingFor(nodeId);
1875 
1879  NodeRecPtr replyNodePtr;
1880  replyNodePtr.i = nodeId;
1881  ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
1882  replyNodePtr.p->ndynamicId = dynamicId;
1883  replyNodePtr.p->blockRef = signal->getSendersBlockRef();
1884  setNodeInfo(replyNodePtr.i).m_version = version;
1885  setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
1886  setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
1887 
1888  recompute_version_info(NodeInfo::DB, version);
1889 
1890  if(!c_start.m_nodes.done()){
1891  jam();
1892  return;
1893  }
1894 
1895  /**********************************************<*/
1896  /* Send an ack. back to the president. */
1897  /* CM_ACKADD */
1898  /* The new node has been registered by all */
1899  /* running nodes and has stored nodeinfo about */
1900  /* all running nodes. The new node has to wait */
1901  /* for CM_ADD (commit) from president to become */
1902  /* a running node in the cluster. */
1903  /**********************************************<*/
1904  sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
1905  return;
1906 }//Qmgr::execCM_NODEINFOCONF()
1907 
1912 /*******************************/
1913 /* CM_NODEINFOREQ */
1914 /*******************************/
1915 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
1916 {
1917  jamEntry();
1918 
1919  const Uint32 Tblockref = signal->getSendersBlockRef();
1920 
1921  NodeRecPtr nodePtr;
1922  nodePtr.i = getOwnNodeId();
1923  ptrAss(nodePtr, nodeRec);
1924  if(nodePtr.p->phase != ZRUNNING){
1925  jam();
1926  signal->theData[0] = reference();
1927  signal->theData[1] = getOwnNodeId();
1928  signal->theData[2] = ZNOT_RUNNING;
1929  sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
1930  return;
1931  }
1932 
1933  NodeRecPtr addNodePtr;
1934  CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
1935  addNodePtr.i = req->nodeId;
1936  ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1937  addNodePtr.p->ndynamicId = req->dynamicId;
1938  addNodePtr.p->blockRef = signal->getSendersBlockRef();
1939  setNodeInfo(addNodePtr.i).m_version = req->version;
1940 
1941  Uint32 mysql_version = req->mysql_version;
1942  if (req->version < NDBD_SPLIT_VERSION)
1943  mysql_version = 0;
1944  setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
1945 
1946  Uint32 lqh_workers = req->lqh_workers;
1947  if (req->version < NDBD_MT_LQH_VERSION)
1948  lqh_workers = 0;
1949  setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
1950 
1951  c_maxDynamicId = req->dynamicId & 0xFFFF;
1952 
1953  cmAddPrepare(signal, addNodePtr, nodePtr.p);
1954 }//Qmgr::execCM_NODEINFOREQ()
1955 
1956 void
1957 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
1958  jam();
1959 
1960  switch(nodePtr.p->phase){
1961  case ZINIT:
1962  jam();
1963  nodePtr.p->phase = ZSTARTING;
1964  return;
1965  case ZFAIL_CLOSING:
1966  jam();
1967 
1968 #if 1
1969  warningEvent("Recieved request to incorperate node %u, "
1970  "while error handling has not yet completed",
1971  nodePtr.i);
1972 
1973  ndbrequire(getOwnNodeId() != cpresident);
1974  ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
1975  c_start.m_nodes.clearWaitingFor();
1976  c_start.m_nodes.setWaitingFor(nodePtr.i);
1977  c_start.m_gsn = GSN_CM_NODEINFOCONF;
1978 #else
1979  warningEvent("Enabling communication to CM_ADD node %u state=%d",
1980  nodePtr.i,
1981  nodePtr.p->phase);
1982  nodePtr.p->phase = ZSTARTING;
1983  nodePtr.p->failState = NORMAL;
1984  signal->theData[0] = 0;
1985  signal->theData[1] = nodePtr.i;
1986  sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
1987 #endif
1988  return;
1989  case ZSTARTING:
1990  break;
1991  case ZRUNNING:
1992  case ZPREPARE_FAIL:
1993  case ZAPI_ACTIVE:
1994  case ZAPI_INACTIVE:
1995  ndbrequire(false);
1996  }
1997 
1998  sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
1999  sendApiVersionRep(signal, nodePtr);
2000 
2001  /* President have prepared us */
2002  CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2003  conf->nodeId = getOwnNodeId();
2004  conf->dynamicId = self->ndynamicId;
2005  conf->version = getNodeInfo(getOwnNodeId()).m_version;
2006  conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2007  conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2008  sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2009  CmNodeInfoConf::SignalLength, JBB);
2010  DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2011 }
2012 
2013 void
2014 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2015 {
2016  if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
2017  {
2018  jam();
2019  Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2020  for(Uint32 i = 1; i<MAX_NODES; i++)
2021  {
2022  jam();
2023  Uint32 version = getNodeInfo(i).m_version;
2024  Uint32 type = getNodeInfo(i).m_type;
2025  if (type != NodeInfo::DB && version)
2026  {
2027  jam();
2028  signal->theData[0] = i;
2029  signal->theData[1] = version;
2030  sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2031  }
2032  }
2033  }
2034 }
2035 
2036 void
2037 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2038 
2039  CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2040  cmAckAdd->requestType = type;
2041  cmAckAdd->startingNodeId = nodeId;
2042  cmAckAdd->senderNodeId = getOwnNodeId();
2043  sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2044  DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2045 
2046  switch(type){
2047  case CmAdd::Prepare:
2048  return;
2049  case CmAdd::AddCommit:
2050  case CmAdd::CommitNew:
2051  break;
2052  }
2053 
2054  signal->theData[0] = nodeId;
2055  EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2056  jamEntry();
2057 }
2058 
2059 /*
2060 4.4.11 CM_ADD */
2070 /*******************************/
2071 /* CM_ADD */
2072 /*******************************/
2073 void Qmgr::execCM_ADD(Signal* signal)
2074 {
2075  NodeRecPtr addNodePtr;
2076  jamEntry();
2077 
2078  NodeRecPtr nodePtr;
2079  nodePtr.i = getOwnNodeId();
2080  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2081 
2082  CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2083  const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2084  addNodePtr.i = cmAdd->startingNodeId;
2085  //const Uint32 startingVersion = cmAdd->startingVersion;
2086  ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2087 
2088  DEBUG_START3(signal, type);
2089 
2090  if(nodePtr.p->phase == ZSTARTING){
2091  jam();
2095  ndbrequire(addNodePtr.i == nodePtr.i);
2096  switch(type){
2097  case CmAdd::Prepare:
2098  ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2102  return;
2103  case CmAdd::CommitNew:
2107  joinedCluster(signal, addNodePtr);
2108  return;
2109  case CmAdd::AddCommit:
2110  ndbrequire(false);
2111  }
2112  }
2113 
2114  switch (type) {
2115  case CmAdd::Prepare:
2116  cmAddPrepare(signal, addNodePtr, nodePtr.p);
2117  break;
2118  case CmAdd::AddCommit:{
2119  jam();
2120  ndbrequire(addNodePtr.p->phase == ZSTARTING);
2121  addNodePtr.p->phase = ZRUNNING;
2122  m_connectivity_check.reportNodeConnect(addNodePtr.i);
2123  setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0;
2124  c_clusterNodes.set(addNodePtr.i);
2125  findNeighbours(signal, __LINE__);
2126 
2131  sendHeartbeat(signal);
2132  hb_send_timer.reset(0);
2133 
2137  EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2138  enableComReq->m_senderRef = reference();
2139  enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2140  NodeBitmask::clear(enableComReq->m_nodeIds);
2141  NodeBitmask::set(enableComReq->m_nodeIds, addNodePtr.i);
2142  sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2143  EnableComReq::SignalLength, JBA);
2144  break;
2145  }
2146  case CmAdd::CommitNew:
2147  jam();
2148  ndbrequire(false);
2149  }
2150 
2151 }//Qmgr::execCM_ADD()
2152 
2153 void
2154 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2155 {
2156  sendCmAckAdd(signal, node, CmAdd::AddCommit);
2157  if(getOwnNodeId() != cpresident){
2158  jam();
2159  c_start.reset();
2160  }
2161 }
2162 
2163 void
2164 Qmgr::execENABLE_COMCONF(Signal *signal)
2165 {
2166  const EnableComConf *enableComConf =
2167  (const EnableComConf *)signal->getDataPtr();
2168  Uint32 state = enableComConf->m_senderData;
2169  Uint32 node = NodeBitmask::find(enableComConf->m_nodeIds, 0);
2170 
2171  jamEntry();
2172 
2173  switch (state)
2174  {
2175  case ENABLE_COM_CM_ADD_COMMIT:
2176  jam();
2177  /* Only exactly one node possible here. */
2178  ndbrequire(node != NodeBitmask::NotFound);
2179  ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2180  NodeBitmask::NotFound);
2181  handleEnableComAddCommit(signal, node);
2182  break;
2183 
2184  case ENABLE_COM_CM_COMMIT_NEW:
2185  jam();
2186  handleEnableComCommitNew(signal);
2187  break;
2188 
2189  case ENABLE_COM_API_REGREQ:
2190  jam();
2191  /* Only exactly one node possible here. */
2192  ndbrequire(node != NodeBitmask::NotFound);
2193  ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2194  NodeBitmask::NotFound);
2195  handleEnableComApiRegreq(signal, node);
2196  break;
2197 
2198  default:
2199  jam();
2200  ndbrequire(false);
2201  }
2202 }
2203 
2204 void
2205 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2211  nodePtr.p->phase = ZRUNNING;
2212  setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2213  findNeighbours(signal, __LINE__);
2214  c_clusterNodes.set(nodePtr.i);
2215  c_start.reset();
2216 
2221  sendHeartbeat(signal);
2222  hb_send_timer.reset(0);
2223 
2228  EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2229  enableComReq->m_senderRef = reference();
2230  enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2231  NodeBitmask::clear(enableComReq->m_nodeIds);
2232  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2233  jam();
2234  ptrAss(nodePtr, nodeRec);
2235  if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2236  /*-------------------------------------------------------------------*/
2237  // Enable full communication to all other nodes. Not really necessary
2238  // to open communication to ourself.
2239  /*-------------------------------------------------------------------*/
2240  jam();
2241  NodeBitmask::set(enableComReq->m_nodeIds, nodePtr.i);
2242  }//if
2243  }//for
2244 
2245  if (!NodeBitmask::isclear(enableComReq->m_nodeIds))
2246  {
2247  jam();
2248  sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2249  EnableComReq::SignalLength, JBA);
2250  }
2251  else
2252  {
2253  handleEnableComCommitNew(signal);
2254  }
2255 }
2256 
2257 void
2258 Qmgr::handleEnableComCommitNew(Signal *signal)
2259 {
2260  sendSttorryLab(signal);
2261 
2262  sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2263 }
2264 
2265 /* 4.10.7 CM_ACKADD - PRESIDENT IS RECEIVER - */
2266 /*---------------------------------------------------------------------------*/
2267 /* Entry point for an ack add signal.
2268  * The TTYPE defines if it is a prepare or a commit. */
2269 /*---------------------------------------------------------------------------*/
2270 void Qmgr::execCM_ACKADD(Signal* signal)
2271 {
2272  NodeRecPtr addNodePtr;
2273  NodeRecPtr senderNodePtr;
2274  jamEntry();
2275 
2276  CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2277  const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2278  addNodePtr.i = cmAckAdd->startingNodeId;
2279  senderNodePtr.i = cmAckAdd->senderNodeId;
2280 
2281  DEBUG_START3(signal, type);
2282 
2283  if (cpresident != getOwnNodeId()) {
2284  jam();
2285  /*-----------------------------------------------------------------------*/
2286  /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE. */
2287  /*------------------------------------------------------------_----------*/
2288  warningEvent("Received CM_ACKADD from %d president=%d",
2289  senderNodePtr.i, cpresident);
2290  return;
2291  }//if
2292 
2293  if (addNodePtr.i != c_start.m_startNode) {
2294  jam();
2295  /*----------------------------------------------------------------------*/
2296  /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
2297  /*----------------------------------------------------------------------*/
2298  warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
2299  senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
2300  return;
2301  }//if
2302 
2303  ndbrequire(c_start.m_gsn == GSN_CM_ADD);
2304  c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
2305  if(!c_start.m_nodes.done()){
2306  jam();
2307  return;
2308  }
2309 
2310  switch (type) {
2311  case CmAdd::Prepare:{
2312  jam();
2313 
2314  /*----------------------------------------------------------------------*/
2315  /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE. */
2316  /*----------------------------------------------------------------------*/
2317  c_start.m_gsn = GSN_CM_ADD;
2318  c_start.m_nodes = c_clusterNodes;
2319 
2320  CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2321  cmAdd->requestType = CmAdd::AddCommit;
2322  cmAdd->startingNodeId = addNodePtr.i;
2323  cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2324  cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2325  NodeReceiverGroup rg(QMGR, c_clusterNodes);
2326  sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
2327  DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
2328  return;
2329  }
2330  case CmAdd::AddCommit:{
2331  jam();
2332 
2333  /****************************************/
2334  /* Send commit to the new node so he */
2335  /* will change PHASE into ZRUNNING */
2336  /****************************************/
2337  c_start.m_gsn = GSN_CM_ADD;
2338  c_start.m_nodes.clearWaitingFor();
2339  c_start.m_nodes.setWaitingFor(addNodePtr.i);
2340 
2341  CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2342  cmAdd->requestType = CmAdd::CommitNew;
2343  cmAdd->startingNodeId = addNodePtr.i;
2344  cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2345  cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2346  sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
2347  CmAdd::SignalLength, JBA);
2348  DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
2349  return;
2350  }
2351  case CmAdd::CommitNew:
2352  jam();
2356  handleArbitNdbAdd(signal, addNodePtr.i);
2357  c_start.reset();
2358 
2359  if (c_start.m_starting_nodes.get(addNodePtr.i))
2360  {
2361  jam();
2362  c_start.m_starting_nodes.clear(addNodePtr.i);
2363  if (c_start.m_starting_nodes.isclear())
2364  {
2365  jam();
2366  sendSttorryLab(signal);
2367  }
2368  }
2369  return;
2370  }//switch
2371  ndbrequire(false);
2372 }//Qmgr::execCM_ACKADD()
2373 
2378 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
2379 {
2380  UintR toldLeftNeighbour;
2381  UintR tfnLeftFound;
2382  UintR tfnMaxFound;
2383  UintR tfnMinFound;
2384  UintR tfnRightFound;
2385  NodeRecPtr fnNodePtr;
2386  NodeRecPtr fnOwnNodePtr;
2387 
2388  Uint32 toldRightNeighbour = cneighbourh;
2389  toldLeftNeighbour = cneighbourl;
2390  tfnLeftFound = 0;
2391  tfnMaxFound = 0;
2392  tfnMinFound = (UintR)-1;
2393  tfnRightFound = (UintR)-1;
2394  fnOwnNodePtr.i = getOwnNodeId();
2395  ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
2396  for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
2397  jam();
2398  ptrAss(fnNodePtr, nodeRec);
2399  if (fnNodePtr.i != fnOwnNodePtr.i) {
2400  if (fnNodePtr.p->phase == ZRUNNING) {
2401  if (tfnMinFound > fnNodePtr.p->ndynamicId) {
2402  jam();
2403  tfnMinFound = fnNodePtr.p->ndynamicId;
2404  }//if
2405  if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
2406  jam();
2407  tfnMaxFound = fnNodePtr.p->ndynamicId;
2408  }//if
2409  if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
2410  jam();
2411  if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
2412  jam();
2413  tfnLeftFound = fnNodePtr.p->ndynamicId;
2414  }//if
2415  } else {
2416  jam();
2417  if (fnNodePtr.p->ndynamicId < tfnRightFound) {
2418  jam();
2419  tfnRightFound = fnNodePtr.p->ndynamicId;
2420  }//if
2421  }//if
2422  }//if
2423  }//if
2424  }//for
2425  if (tfnLeftFound == 0) {
2426  if (tfnMinFound == (UintR)-1) {
2427  jam();
2428  cneighbourl = ZNIL;
2429  } else {
2430  jam();
2431  cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
2432  }//if
2433  } else {
2434  jam();
2435  cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
2436  }//if
2437  if (tfnRightFound == (UintR)-1) {
2438  if (tfnMaxFound == 0) {
2439  jam();
2440  cneighbourh = ZNIL;
2441  } else {
2442  jam();
2443  cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
2444  }//if
2445  } else {
2446  jam();
2447  cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
2448  }//if
2449  if (toldLeftNeighbour != cneighbourl) {
2450  jam();
2451  if (cneighbourl != ZNIL) {
2452  jam();
2454  /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
2455  * EQUAL TO ZERO.
2456  *---------------------------------------------------------------------*/
2457  fnNodePtr.i = cneighbourl;
2458  ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
2459  setNodeInfo(fnNodePtr.i).m_heartbeat_cnt= 0;
2460  }//if
2461  }//if
2462 
2463  signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
2464  signal->theData[1] = getOwnNodeId();
2465  signal->theData[2] = cneighbourl;
2466  signal->theData[3] = cneighbourh;
2467  signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
2468  UintR Tlen = 5;
2469  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
2470  g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
2471  from,
2472  toldLeftNeighbour,
2473  toldRightNeighbour,
2474  cneighbourl,
2475  cneighbourh);
2476 }//Qmgr::findNeighbours()
2477 
2478 /*
2479 4.10.7 INIT_DATA */
2480 /*---------------------------------------------------------------------------*/
2481 /*---------------------------------------------------------------------------*/
2482 void Qmgr::initData(Signal* signal)
2483 {
2484  NDB_TICKS now = NdbTick_CurrentMillisecond();
2485  interface_check_timer.setDelay(1000);
2486  interface_check_timer.reset(now);
2487 
2488  // catch-all for missing initializations
2489  memset(&arbitRec, 0, sizeof(arbitRec));
2490 
2494  const ndb_mgm_configuration_iterator * p =
2495  m_ctx.m_config.getOwnConfigIterator();
2496  ndbrequire(p != 0);
2497 
2498  Uint32 hbDBDB = 1500;
2499  Uint32 arbitTimeout = 1000;
2500  Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
2501  Uint32 ccInterval = 0;
2502  c_restartPartialTimeout = 30000;
2503  c_restartPartionedTimeout = 60000;
2504  c_restartFailureTimeout = ~0;
2505  c_restartNoNodegroupTimeout = 15000;
2506  ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
2507  ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
2508  ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
2509  ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
2510  &c_restartPartialTimeout);
2511  ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
2512  &c_restartPartionedTimeout);
2513  ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
2514  &c_restartNoNodegroupTimeout);
2515  ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
2516  &c_restartFailureTimeout);
2517  ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
2518  &ccInterval);
2519 
2520  if(c_restartPartialTimeout == 0)
2521  {
2522  c_restartPartialTimeout = ~0;
2523  }
2524 
2525  if (c_restartPartionedTimeout ==0)
2526  {
2527  c_restartPartionedTimeout = ~0;
2528  }
2529 
2530  if (c_restartFailureTimeout == 0)
2531  {
2532  c_restartFailureTimeout = ~0;
2533  }
2534 
2535  if (c_restartNoNodegroupTimeout == 0)
2536  {
2537  c_restartNoNodegroupTimeout = ~0;
2538  }
2539 
2540  setHbDelay(hbDBDB);
2541  setCCDelay(ccInterval);
2542  setArbitTimeout(arbitTimeout);
2543 
2544  arbitRec.method = (ArbitRec::Method)arbitMethod;
2545  arbitRec.state = ARBIT_NULL; // start state for all nodes
2546  arbitRec.apiMask[0].clear(); // prepare for ARBIT_CFG
2547 
2548  Uint32 sum = 0;
2549  ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
2550  for (unsigned rank = 1; rank <= 2; rank++) {
2551  sd->sender = getOwnNodeId();
2552  sd->code = rank;
2553  sd->node = 0;
2554  sd->ticket.clear();
2555  sd->mask.clear();
2557  m_ctx.m_config.getClusterConfigIterator();
2558  for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
2559  Uint32 tmp = 0;
2560  if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
2561  tmp == rank){
2562  Uint32 nodeId = 0;
2563  ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
2564  sd->mask.set(nodeId);
2565  }
2566  }
2567  sum += sd->mask.count();
2568  execARBIT_CFG(signal);
2569  }
2570 
2571  if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
2572  sum == 0)
2573  {
2574  jam();
2575  infoEvent("Arbitration disabled, all API nodes have rank 0");
2576  arbitRec.method = ArbitRec::DISABLED;
2577  }
2578 
2579  setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
2580 
2582  m_ctx.m_config.getClusterConfigIterator();
2583  for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
2584  {
2585  jam();
2586  Uint32 nodeId = 0;
2587  if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
2588  {
2589  jam();
2590  if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
2591  {
2592  Uint32 hbOrder = 0;
2593  ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
2594 
2595  NodeRecPtr nodePtr;
2596  nodePtr.i = nodeId;
2597  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2598  nodePtr.p->hbOrder = hbOrder;
2599  }
2600  }
2601  }
2602  int hb_order_error = check_hb_order_config();
2603  if (hb_order_error == -1)
2604  {
2605  char msg[] = "Illegal HeartbeatOrder config, "
2606  "all nodes must have non-zero config value";
2607  progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2608  return;
2609  }
2610  if (hb_order_error == -2)
2611  {
2612  char msg[] = "Illegal HeartbeatOrder config, "
2613  "the nodes must have distinct config values";
2614  progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2615  return;
2616  }
2617  ndbrequire(hb_order_error == 0);
2618 }//Qmgr::initData()
2619 
2620 
2627 void Qmgr::timerHandlingLab(Signal* signal)
2628 {
2629  NDB_TICKS TcurrentTime = NdbTick_CurrentMillisecond();
2630  NodeRecPtr myNodePtr;
2631  myNodePtr.i = getOwnNodeId();
2632  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2633 
2634  Uint32 sentHi = signal->theData[1];
2635  Uint32 sentLo = signal->theData[2];
2636  Uint64 sent = (Uint64(sentHi) << 32) + sentLo;
2637 
2638  if (TcurrentTime >= sent + 1000 || (TcurrentTime < sent))
2639  {
2640  jam();
2641  g_eventLogger->warning("timerHandlingLab now: %llu sent: %llu diff: %d",
2642  TcurrentTime, sent, int(TcurrentTime - sent));
2643  }
2644  else if (TcurrentTime >= sent + 150)
2645  {
2646  g_eventLogger->info("timerHandlingLab now: %llu sent: %llu diff: %d",
2647  TcurrentTime, sent, int(TcurrentTime - sent));
2648  }
2649 
2650  if (myNodePtr.p->phase == ZRUNNING) {
2651  jam();
2655  if (hb_send_timer.check(TcurrentTime)) {
2656  jam();
2657  sendHeartbeat(signal);
2658  hb_send_timer.reset(TcurrentTime);
2659  }
2660  if (likely(! m_connectivity_check.m_active))
2661  {
2662  if (hb_check_timer.check(TcurrentTime)) {
2663  jam();
2664  checkHeartbeat(signal);
2665  hb_check_timer.reset(TcurrentTime);
2666  }
2667  }
2668  else
2669  {
2670  /* Connectivity check */
2671  if (m_connectivity_check.m_timer.check(TcurrentTime)) {
2672  jam();
2673  checkConnectivityTimeSignal(signal);
2674  m_connectivity_check.m_timer.reset(TcurrentTime);
2675  }
2676  }
2677  }
2678 
2679  if (interface_check_timer.check(TcurrentTime)) {
2680  jam();
2681  interface_check_timer.reset(TcurrentTime);
2682  checkStartInterface(signal, TcurrentTime);
2683  }
2684 
2685  if (hb_api_timer.check(TcurrentTime))
2686  {
2687  jam();
2688  hb_api_timer.reset(TcurrentTime);
2689  apiHbHandlingLab(signal, TcurrentTime);
2690  }
2691 
2692  if (cactivateApiCheck != 0) {
2693  jam();
2694  if (clatestTransactionCheck == 0) {
2695  //-------------------------------------------------------------
2696  // Initialise the Transaction check timer.
2697  //-------------------------------------------------------------
2698  clatestTransactionCheck = TcurrentTime;
2699  }//if
2700  int counter = 0;
2701  while (TcurrentTime > ((NDB_TICKS)10 + clatestTransactionCheck)) {
2702  jam();
2703  clatestTransactionCheck += (NDB_TICKS)10;
2704  sendSignal(DBTC_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2705  sendSignal(DBLQH_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2706  counter++;
2707  if (counter > 1) {
2708  jam();
2709  break;
2710  } else {
2711  ;
2712  }//if
2713  }//while
2714  }//if
2715 
2716  //--------------------------------------------------
2717  // Resend this signal with 10 milliseconds delay.
2718  //--------------------------------------------------
2719  signal->theData[0] = ZTIMER_HANDLING;
2720  signal->theData[1] = Uint32(TcurrentTime >> 32);
2721  signal->theData[2] = Uint32(TcurrentTime);
2722  sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
2723  return;
2724 }//Qmgr::timerHandlingLab()
2725 
2726 /*---------------------------------------------------------------------------*/
2727 /* THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS. */
2728 /*---------------------------------------------------------------------------*/
2729 void Qmgr::sendHeartbeat(Signal* signal)
2730 {
2731  NodeRecPtr localNodePtr;
2732  localNodePtr.i = cneighbourh;
2733  if (localNodePtr.i == ZNIL) {
2734  jam();
2739  return;
2740  }//if
2741  ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
2742  signal->theData[0] = getOwnNodeId();
2743 
2744  sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
2745 #ifdef VM_TRACE
2746  signal->theData[0] = NDB_LE_SentHeartbeat;
2747  signal->theData[1] = localNodePtr.i;
2748  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2749 #endif
2750 }//Qmgr::sendHeartbeat()
2751 
2752 void Qmgr::checkHeartbeat(Signal* signal)
2753 {
2754  NodeRecPtr nodePtr;
2755 
2756  nodePtr.i = cneighbourl;
2757  if (nodePtr.i == ZNIL) {
2758  jam();
2763  return;
2764  }//if
2765  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2766 
2767  setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
2768  ndbrequire(nodePtr.p->phase == ZRUNNING);
2769  ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
2770 
2771  if(getNodeInfo(nodePtr.i).m_heartbeat_cnt > 2){
2772  signal->theData[0] = NDB_LE_MissedHeartbeat;
2773  signal->theData[1] = nodePtr.i;
2774  signal->theData[2] = getNodeInfo(nodePtr.i).m_heartbeat_cnt - 1;
2775  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2776  }
2777 
2778  if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) {
2779  jam();
2780  if (m_connectivity_check.getEnabled())
2781  {
2782  jam();
2783  /* Start connectivity check, indicating the cause */
2784  startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
2785  return;
2786  }
2787  else
2788  {
2793  signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2794  signal->theData[1] = nodePtr.i;
2795  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2796 
2797  failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
2798  return;
2799  }
2800  }//if
2801 }//Qmgr::checkHeartbeat()
2802 
2803 void Qmgr::apiHbHandlingLab(Signal* signal, Uint64 now)
2804 {
2805  NodeRecPtr TnodePtr;
2806 
2807  for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
2808  const Uint32 nodeId = TnodePtr.i;
2809  ptrAss(TnodePtr, nodeRec);
2810 
2811  const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
2812  if(type == NodeInfo::DB)
2813  continue;
2814 
2815  if(type == NodeInfo::INVALID)
2816  continue;
2817 
2818  if (c_connectedNodes.get(nodeId))
2819  {
2820  jam();
2821  setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
2822 
2823  if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2)
2824  {
2825  signal->theData[0] = NDB_LE_MissedHeartbeat;
2826  signal->theData[1] = nodeId;
2827  signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
2828  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2829  }
2830 
2831  if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4)
2832  {
2833  jam();
2834  /*------------------------------------------------------------------*/
2835  /* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
2836  * WE WILL DISCONNECT FROM IT NOW.
2837  *------------------------------------------------------------------*/
2838  /*------------------------------------------------------------------*/
2839  /* We call node_failed to release all connections for this api node */
2840  /*------------------------------------------------------------------*/
2841  signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2842  signal->theData[1] = nodeId;
2843  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2844 
2845  api_failed(signal, nodeId);
2846  }//if
2847  }//if
2848  else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
2849  TnodePtr.p->m_secret != 0 && now > TnodePtr.p->m_alloc_timeout)
2850  {
2851  jam();
2852  TnodePtr.p->m_secret = 0;
2853  warningEvent("Releasing node id allocation for node %u",
2854  TnodePtr.i);
2855  }
2856  }//for
2857  return;
2858 }//Qmgr::apiHbHandlingLab()
2859 
2860 void Qmgr::checkStartInterface(Signal* signal, Uint64 now)
2861 {
2862  NodeRecPtr nodePtr;
2863  /*------------------------------------------------------------------------*/
2864  // This method is called once per second. After a disconnect we wait at
2865  // least three seconds before allowing new connects. We will also ensure
2866  // that handling of the failure is completed before we allow new connections.
2867  /*------------------------------------------------------------------------*/
2868  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
2869  ptrAss(nodePtr, nodeRec);
2870  Uint32 type = getNodeInfo(nodePtr.i).m_type;
2871  if (nodePtr.p->phase == ZFAIL_CLOSING) {
2872  jam();
2873  setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
2874  if (c_connectedNodes.get(nodePtr.i)){
2875  jam();
2876  /*-------------------------------------------------------------------*/
2877  // We need to ensure that the connection is not restored until it has
2878  // been disconnected for at least three seconds.
2879  /*-------------------------------------------------------------------*/
2880  setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2881  }//if
2882  if ((getNodeInfo(nodePtr.i).m_heartbeat_cnt > 3)
2883  && (nodePtr.p->failState == NORMAL)) {
2890  nodePtr.p->failState = NORMAL;
2891  nodePtr.p->m_secret = 0;
2892  switch(type){
2893  case NodeInfo::DB:
2894  jam();
2895  nodePtr.p->phase = ZINIT;
2896  break;
2897  case NodeInfo::MGM:
2898  jam();
2899  nodePtr.p->phase = ZAPI_INACTIVE;
2900  break;
2901  case NodeInfo::API:
2902  jam();
2903  if (c_allow_api_connect)
2904  {
2905  jam();
2906  nodePtr.p->phase = ZAPI_INACTIVE;
2907  break;
2908  }
2909  else
2910  {
2914  jam();
2915  setNodeInfo(nodePtr.i).m_heartbeat_cnt = 3;
2916  continue;
2917  }
2918  }
2919 
2920  setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2921  signal->theData[0] = 0;
2922  signal->theData[1] = nodePtr.i;
2923  sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
2924  }
2925  else
2926  {
2927  jam();
2928  if(((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 60) == 0)
2929  {
2930  jam();
2931  char buf[256];
2932  if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
2933  {
2934  jam();
2935  BaseString::snprintf(buf, sizeof(buf),
2936  "Failure handling of node %d has not completed"
2937  " in %d min - state = %d",
2938  nodePtr.i,
2939  (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60,
2940  nodePtr.p->failState);
2941  warningEvent("%s", buf);
2942  if (((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 300) == 0)
2943  {
2944  jam();
2948  signal->theData[0] = 7019;
2949  signal->theData[1] = nodePtr.i;
2950  sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
2951  }
2952  }
2953  else
2954  {
2955  jam();
2956  BaseString::snprintf(buf, sizeof(buf),
2957  "Failure handling of api %u has not completed"
2958  " in %d min - state = %d",
2959  nodePtr.i,
2960  (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60,
2961  nodePtr.p->failState);
2962  warningEvent("%s", buf);
2963  if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
2964  {
2965  jam();
2966  compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5);
2967  BaseString::snprintf(buf, sizeof(buf),
2968  " Waiting for blocks: %u %u %u %u %u",
2969  nodePtr.p->m_failconf_blocks[0],
2970  nodePtr.p->m_failconf_blocks[1],
2971  nodePtr.p->m_failconf_blocks[2],
2972  nodePtr.p->m_failconf_blocks[3],
2973  nodePtr.p->m_failconf_blocks[4]);
2974  warningEvent("%s", buf);
2975  }
2976  }
2977  }
2978  }
2979  }
2980  else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
2981  nodePtr.p->m_secret != 0 && now > nodePtr.p->m_alloc_timeout)
2982  {
2983  jam();
2984  nodePtr.p->m_secret = 0;
2985  warningEvent("Releasing node id allocation for node %u",
2986  nodePtr.i);
2987  }
2988  }//for
2989  return;
2990 }//Qmgr::checkStartInterface()
2991 
2996 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
2997 {
2998  jamEntry();
2999  signal->theData[0] = failedNodeNo;
3000  signal->theData[1] = QMGR_REF;
3001 
3002  /* We route the ApiFailReq signals via CMVMI
3003  * This is done to ensure that they are received after
3004  * any pending signals from the failed Api node when
3005  * running ndbmtd, as these signals would be enqueued from
3006  * the thread running CMVMI
3007  */
3008  Uint32 routedSignalSectionI = RNIL;
3009  ndbrequire(appendToSection(routedSignalSectionI,
3010  &signal->theData[0],
3011  2));
3012  SectionHandle handle(this, routedSignalSectionI);
3013 
3014  /* RouteOrd data */
3015  RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3016  routeOrd->srcRef = reference();
3017  routeOrd->gsn = GSN_API_FAILREQ;
3018 
3019  NodeRecPtr failedNodePtr;
3020  failedNodePtr.i = failedNodeNo;
3021  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3022  failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3023 
3024 
3025  /* Send ROUTE_ORD signals to CMVMI via JBA
3026  * CMVMI will then immediately send the API_FAILREQ
3027  * signals to the destination block(s) using JBB
3028  * These API_FAILREQ signals will be sent *after*
3029  * any JBB signals enqueued from the failed API
3030  * by the CMVMI thread.
3031  */
3032  if (!sumaOnly)
3033  {
3034  jam();
3035  add_failconf_block(failedNodePtr, DBTC);
3036  routeOrd->dstRef = DBTC_REF;
3037  sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3038  RouteOrd::SignalLength,
3039  JBA, &handle);
3040 
3041  add_failconf_block(failedNodePtr, DBDICT);
3042  routeOrd->dstRef = DBDICT_REF;
3043  sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3044  RouteOrd::SignalLength,
3045  JBA, &handle);
3046 
3047  add_failconf_block(failedNodePtr, DBSPJ);
3048  routeOrd->dstRef = DBSPJ_REF;
3049  sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3050  RouteOrd::SignalLength,
3051  JBA, &handle);
3052  }
3053 
3054  /* Suma always notified */
3055  add_failconf_block(failedNodePtr, SUMA);
3056  routeOrd->dstRef = SUMA_REF;
3057  sendSignal(CMVMI_REF, GSN_ROUTE_ORD, signal,
3058  RouteOrd::SignalLength,
3059  JBA, &handle);
3060 }//Qmgr::sendApiFailReq()
3061 
3062 void Qmgr::execAPI_FAILREQ(Signal* signal)
3063 {
3064  jamEntry();
3065  NodeRecPtr failedNodePtr;
3066  failedNodePtr.i = signal->theData[0];
3067  // signal->theData[1] == QMGR_REF
3068  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3069 
3070  ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3071 
3072  api_failed(signal, signal->theData[0]);
3073 }
3074 
3075 void Qmgr::execAPI_FAILCONF(Signal* signal)
3076 {
3077  NodeRecPtr failedNodePtr;
3078 
3079  jamEntry();
3080  failedNodePtr.i = signal->theData[0];
3081  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3082 
3083  Uint32 block = refToMain(signal->theData[1]);
3084  if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3085  !remove_failconf_block(failedNodePtr, block))
3086  {
3087  jam();
3088  ndbout << "execAPI_FAILCONF from " << block
3089  << " failedNodePtr.p->failState = "
3090  << (Uint32)(failedNodePtr.p->failState)
3091  << " blocks: ";
3092  for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3093  {
3094  printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3095  }
3096  ndbout << endl;
3097  systemErrorLab(signal, __LINE__);
3098  }//if
3099 
3100  if (is_empty_failconf_block(failedNodePtr))
3101  {
3102  jam();
3103  failedNodePtr.p->failState = NORMAL;
3104 
3109  }
3110  return;
3111 }//Qmgr::execAPI_FAILCONF()
3112 
3113 void
3114 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3115 {
3116  // Check that it does not already exists!!
3117  Uint32 pos = 0;
3118  for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3119  {
3120  jam();
3121  if (nodePtr.p->m_failconf_blocks[pos] == 0)
3122  {
3123  jam();
3124  break;
3125  }
3126  else if (nodePtr.p->m_failconf_blocks[pos] == block)
3127  {
3128  jam();
3129  break;
3130  }
3131  }
3132 
3133  ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3134  ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3135  if (nodePtr.p->m_failconf_blocks[pos] == block)
3136  {
3137  jam();
3141 #ifdef ERROR_INSERT
3142  ndbrequire(false);
3143 #endif
3144  return;
3145  }
3146  ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3147  nodePtr.p->m_failconf_blocks[pos] = block;
3148 }
3149 
3150 bool
3151 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3152 {
3153  // Check that it does exists!!
3154  Uint32 pos = 0;
3155  for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3156  {
3157  jam();
3158  if (nodePtr.p->m_failconf_blocks[pos] == 0)
3159  {
3160  jam();
3161  break;
3162  }
3163  else if (nodePtr.p->m_failconf_blocks[pos] == block)
3164  {
3165  jam();
3166  break;
3167  }
3168  }
3169 
3170  if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3171  nodePtr.p->m_failconf_blocks[pos] != block)
3172  {
3173  jam();
3177  return false;
3178  }
3179 
3180  nodePtr.p->m_failconf_blocks[pos] = 0;
3181  for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3182  {
3183  jam();
3184  nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3185  }
3186 
3187  return true;
3188 }
3189 
3190 bool
3191 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3192 {
3193  return nodePtr.p->m_failconf_blocks[0] == 0;
3194 }
3195 
3196 void Qmgr::execNDB_FAILCONF(Signal* signal)
3197 {
3198  NodeRecPtr failedNodePtr;
3199  NodeRecPtr nodePtr;
3200 
3201  jamEntry();
3202  failedNodePtr.i = signal->theData[0];
3203 
3204  if (ERROR_INSERTED(930))
3205  {
3206  CLEAR_ERROR_INSERT_VALUE;
3207  infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3208  return;
3209  }
3210 
3211  ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3212  if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
3213  failedNodePtr.p->failState = NORMAL;
3214  } else {
3215  jam();
3216 
3217  char buf[100];
3218  BaseString::snprintf(buf, 100,
3219  "Received NDB_FAILCONF for node %u with state: %d %d",
3220  failedNodePtr.i,
3221  failedNodePtr.p->phase,
3222  failedNodePtr.p->failState);
3223  progError(__LINE__, 0, buf);
3224  systemErrorLab(signal, __LINE__);
3225  }//if
3226 
3227  if (cpresident == getOwnNodeId())
3228  {
3229  jam();
3230 
3231  CRASH_INSERTION(936);
3232  }
3233 
3242  NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
3243  nfComp->blockNo = QMGR_REF;
3244  nfComp->nodeId = getOwnNodeId();
3245  nfComp->failedNodeId = failedNodePtr.i;
3246 
3247  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3248  {
3249  jam();
3250  ptrAss(nodePtr, nodeRec);
3251  if (nodePtr.p->phase == ZAPI_ACTIVE){
3252  jam();
3253  sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
3254  NFCompleteRep::SignalLength, JBB);
3255  }//if
3256  }//for
3257  return;
3258 }//Qmgr::execNDB_FAILCONF()
3259 
3260 void
3261 Qmgr::execNF_COMPLETEREP(Signal* signal)
3262 {
3263  jamEntry();
3264  NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
3265  if (rep.blockNo != DBTC)
3266  {
3267  jam();
3268  ndbassert(false);
3269  return;
3270  }
3271 
3278  signal->theData[0] = rep.failedNodeId;
3279  NodeRecPtr nodePtr;
3280  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3281  {
3282  jam();
3283  ptrAss(nodePtr, nodeRec);
3284  if (nodePtr.p->phase == ZAPI_ACTIVE &&
3285  ndb_takeovertc(getNodeInfo(nodePtr.i).m_version))
3286  {
3287  jam();
3288  sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
3289  NFCompleteRep::SignalLength, JBB);
3290  }//if
3291  }//for
3292  return;
3293 }
3294 
3295 /*******************************/
3296 /* DISCONNECT_REP */
3297 /*******************************/
3298 const char *lookupConnectionError(Uint32 err);
3299 
3300 void Qmgr::execDISCONNECT_REP(Signal* signal)
3301 {
3302  jamEntry();
3303  const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
3304  const Uint32 nodeId = rep->nodeId;
3305  const Uint32 err = rep->err;
3306  const NodeInfo nodeInfo = getNodeInfo(nodeId);
3307  c_connectedNodes.clear(nodeId);
3308 
3309  if (nodeInfo.getType() == NodeInfo::DB)
3310  {
3311  c_readnodes_nodes.clear(nodeId);
3312  }
3313 
3314  NodeRecPtr nodePtr;
3315  nodePtr.i = getOwnNodeId();
3316  ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
3317 
3318  char buf[100];
3319  if (nodeInfo.getType() == NodeInfo::DB &&
3321  {
3322  jam();
3323  CRASH_INSERTION(932);
3324  CRASH_INSERTION(938);
3325  BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3326  progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3327  ndbrequire(false);
3328  }
3329 
3330  if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
3331  {
3332  jam();
3333  api_failed(signal, nodeId);
3334  return;
3335  }
3336 
3337  switch(nodePtr.p->phase){
3338  case ZRUNNING:
3339  jam();
3340  break;
3341  case ZINIT:
3342  ndbrequire(false);
3343  case ZSTARTING:
3344  progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
3345  lookupConnectionError(err));
3346  ndbrequire(false);
3347  case ZPREPARE_FAIL:
3348  ndbrequire(false);
3349  case ZFAIL_CLOSING:
3350  ndbrequire(false);
3351  case ZAPI_ACTIVE:
3352  ndbrequire(false);
3353  case ZAPI_INACTIVE:
3354  {
3355  BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3356  progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3357  ndbrequire(false);
3358  }
3359  }
3360  node_failed(signal, nodeId);
3361 }//DISCONNECT_REP
3362 
3363 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
3364 {
3365  NodeRecPtr failedNodePtr;
3370  failedNodePtr.i = aFailedNode;
3371  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3372  failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3373 
3374  ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
3375 
3380  switch(failedNodePtr.p->phase){
3381  case ZRUNNING:
3382  jam();
3383  failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3384  return;
3385  case ZFAIL_CLOSING:
3386  jam();
3387  return;
3388  case ZSTARTING:
3393  failedNodePtr.p->phase = ZRUNNING;
3394  failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3395  return;
3396  // Fall-through
3397  default:
3398  jam();
3399  /*---------------------------------------------------------------------*/
3400  // The other node is still not in the cluster but disconnected.
3401  // We must restart communication in three seconds.
3402  /*---------------------------------------------------------------------*/
3403  failedNodePtr.p->failState = NORMAL;
3404  failedNodePtr.p->phase = ZFAIL_CLOSING;
3405  setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
3406 
3407  CloseComReqConf * const closeCom =
3408  (CloseComReqConf *)&signal->theData[0];
3409 
3410  closeCom->xxxBlockRef = reference();
3411  closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
3412  closeCom->failNo = 0;
3413  closeCom->noOfNodes = 1;
3414  NodeBitmask::clear(closeCom->theNodes);
3415  NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3416  sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3417  CloseComReqConf::SignalLength, JBA);
3418  }//if
3419  return;
3420 }
3421 
3422 void
3423 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
3424 {
3425  const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
3426  switch(ord->type){
3427  case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
3428  jam();
3429  m_micro_gcp_enabled = true;
3430  return;
3431  }
3432 }
3433 
3434 void
3435 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
3436 {
3437  NodeRecPtr failedNodePtr;
3442  failedNodePtr.i = nodeId;
3443  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3444  failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3445 
3446  if (failedNodePtr.p->phase == ZFAIL_CLOSING)
3447  {
3451  jam();
3452  return;
3453  }
3454 
3455  ndbrequire(failedNodePtr.p->failState == NORMAL);
3456 
3457  /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
3458  * quickly
3459  * Local application blocks get API_FAILREQ once all pending signals
3460  * from the failed API have been processed.
3461  */
3462  signal->theData[0] = failedNodePtr.i;
3463  signal->theData[1] = QMGR_REF;
3464  NodeReceiverGroup rg(QMGR, c_clusterNodes);
3465  sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
3466 
3467  /* Now ask CMVMI to disconnect the node */
3468  FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
3469  WAITING_FOR_CLOSECOMCONF_ACTIVE :
3470  WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
3471 
3472  failedNodePtr.p->failState = initialState;
3473  failedNodePtr.p->phase = ZFAIL_CLOSING;
3474  setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
3475  setNodeInfo(failedNodePtr.i).m_version = 0;
3476  recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3477 
3478  CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
3479  closeCom->xxxBlockRef = reference();
3480  closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
3481  closeCom->failNo = 0;
3482  closeCom->noOfNodes = 1;
3483  NodeBitmask::clear(closeCom->theNodes);
3484  NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3485  sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3486  CloseComReqConf::SignalLength, JBA);
3487 } // api_failed
3488 
3493 /*******************************/
3494 /* API_REGREQ */
3495 /*******************************/
3496 void Qmgr::execAPI_REGREQ(Signal* signal)
3497 {
3498  jamEntry();
3499 
3500  ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
3501  const Uint32 version = req->version;
3502  const BlockReference ref = req->ref;
3503 
3504  Uint32 mysql_version = req->mysql_version;
3505  if (version < NDBD_SPLIT_VERSION)
3506  mysql_version = 0;
3507 
3508  NodeRecPtr apiNodePtr;
3509  apiNodePtr.i = refToNode(ref);
3510  ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3511 
3512  if (apiNodePtr.p->phase == ZFAIL_CLOSING)
3513  {
3514  jam();
3519  return;
3520  }
3521 
3522 #if 0
3523  ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
3524 #endif
3525 
3526  bool compatability_check;
3527  const char * extra = 0;
3528  NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
3529  switch(type){
3530  case NodeInfo::API:
3531  if (m_micro_gcp_enabled && !ndb_check_micro_gcp(version))
3532  {
3533  jam();
3534  compatability_check = false;
3535  extra = ": micro gcp enabled";
3536  }
3537  else
3538  {
3539  jam();
3540  compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
3541  }
3542  break;
3543  case NodeInfo::MGM:
3544  compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
3545  break;
3546  case NodeInfo::DB:
3547  case NodeInfo::INVALID:
3548  default:
3549  sendApiRegRef(signal, ref, ApiRegRef::WrongType);
3550  infoEvent("Invalid connection attempt with type %d", type);
3551  return;
3552  }
3553 
3554  if (!compatability_check) {
3555  jam();
3556  char buf[NDB_VERSION_STRING_BUF_SZ];
3557  infoEvent("Connection attempt from %s id=%d with %s "
3558  "incompatible with %s%s",
3559  type == NodeInfo::API ? "api or mysqld" : "management server",
3560  apiNodePtr.i,
3561  ndbGetVersionString(version, mysql_version, 0,
3562  buf,
3563  sizeof(buf)),
3564  NDB_VERSION_STRING,
3565  extra ? extra : "");
3566  apiNodePtr.p->phase = ZAPI_INACTIVE;
3567  sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
3568  return;
3569  }
3570 
3571  setNodeInfo(apiNodePtr.i).m_version = version;
3572  setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
3573  setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0;
3574 
3575  NodeState state = getNodeState();
3576  if (apiNodePtr.p->phase == ZAPI_INACTIVE)
3577  {
3578  apiNodePtr.p->blockRef = ref;
3579  if ((state.startLevel == NodeState::SL_STARTED ||
3580  state.getSingleUserMode() ||
3581  (state.startLevel == NodeState::SL_STARTING &&
3582  state.starting.startPhase >= 100)))
3583  {
3584  jam();
3589  apiNodePtr.p->phase = ZAPI_ACTIVE;
3590  EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
3591  enableComReq->m_senderRef = reference();
3592  enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
3593  NodeBitmask::clear(enableComReq->m_nodeIds);
3594  NodeBitmask::set(enableComReq->m_nodeIds, apiNodePtr.i);
3595  sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
3596  EnableComReq::SignalLength, JBA);
3597  return;
3598  }
3599  }
3600 
3601  sendApiRegConf(signal, apiNodePtr.i);
3602 }//Qmgr::execAPI_REGREQ()
3603 
3604 void
3605 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
3606 {
3607  NodeInfo::NodeType type = getNodeInfo(node).getType();
3608  Uint32 version = getNodeInfo(node).m_version;
3609  recompute_version_info(type, version);
3610 
3611  signal->theData[0] = node;
3612  signal->theData[1] = version;
3613  NodeReceiverGroup rg(QMGR, c_clusterNodes);
3614  rg.m_nodes.clear(getOwnNodeId());
3615  sendVersionedDb(rg, GSN_NODE_VERSION_REP, signal, 2, JBB,
3616  NDBD_NODE_VERSION_REP);
3617 
3618  signal->theData[0] = node;
3619  EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
3620 
3621  sendApiRegConf(signal, node);
3622 }
3623 
3624 void
3625 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
3626 {
3627  NodeRecPtr apiNodePtr;
3628  apiNodePtr.i = node;
3629  ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3630  const BlockReference ref = apiNodePtr.p->blockRef;
3631  ndbassert(ref != 0);
3632 
3633  ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
3634  apiRegConf->qmgrRef = reference();
3635  apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
3636  apiRegConf->version = NDB_VERSION;
3637  apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
3638  apiRegConf->nodeState = getNodeState();
3639  {
3640  NodeRecPtr nodePtr;
3641  nodePtr.i = getOwnNodeId();
3642  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3643  Uint32 dynamicId = nodePtr.p->ndynamicId;
3644 
3645  if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
3646  jam();
3647  apiRegConf->nodeState.dynamicId = dynamicId;
3648  } else {
3649  apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
3650  }
3651  }
3652  NodeVersionInfo info = getNodeVersionInfo();
3653  apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
3654  apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
3655  sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
3656 }
3657 
3658 void
3659 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
3660  GlobalSignalNumber gsn,
3661  Signal* signal,
3662  Uint32 length,
3663  JobBufferLevel jbuf,
3664  Uint32 minversion)
3665 {
3666  jam();
3667  NodeVersionInfo info = getNodeVersionInfo();
3668  if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
3669  {
3670  jam();
3671  sendSignal(rg, gsn, signal, length, jbuf);
3672  }
3673  else
3674  {
3675  jam();
3676  Uint32 i = 0, cnt = 0;
3677  while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
3678  {
3679  jam();
3680  if (getNodeInfo(i).m_version >= minversion)
3681  {
3682  jam();
3683  cnt++;
3684  sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
3685  }
3686  }
3687  ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
3688  (cnt < rg.m_nodes.count()));
3689  }
3690 }
3691 
3692 void
3693 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
3694  jamEntry();
3695  ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
3696 
3697  Uint32 senderRef = req->senderRef;
3698  Uint32 nodeId = req->nodeId;
3699 
3700  ApiVersionConf * conf = (ApiVersionConf *)req;
3701  if(getNodeInfo(nodeId).m_connected)
3702  {
3703  conf->version = getNodeInfo(nodeId).m_version;
3704  conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
3705  struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
3706  conf->inet_addr= in.s_addr;
3707  }
3708  else
3709  {
3710  conf->version = 0;
3711  conf->mysql_version = 0;
3712  conf->inet_addr= 0;
3713  }
3714  conf->nodeId = nodeId;
3715 
3716  sendSignal(senderRef,
3717  GSN_API_VERSION_CONF,
3718  signal,
3719  ApiVersionConf::SignalLength, JBB);
3720 }
3721 
3722 void
3723 Qmgr::execNODE_VERSION_REP(Signal* signal)
3724 {
3725  jamEntry();
3726  Uint32 nodeId = signal->theData[0];
3727  Uint32 version = signal->theData[1];
3728 
3729  if (nodeId < MAX_NODES)
3730  {
3731  jam();
3732  Uint32 type = getNodeInfo(nodeId).m_type;
3733  setNodeInfo(nodeId).m_version = version;
3734  recompute_version_info(type, version);
3735  }
3736 }
3737 
3738 void
3739 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
3740 {
3741  NodeVersionInfo& info = setNodeVersionInfo();
3742  switch(type){
3743  case NodeInfo::DB:
3744  case NodeInfo::API:
3745  case NodeInfo::MGM:
3746  break;
3747  default:
3748  return;
3749  }
3750 
3751  if (info.m_type[type].m_min_version == 0 ||
3752  version < info.m_type[type].m_min_version)
3753  info.m_type[type].m_min_version = version;
3754  if (version > info.m_type[type].m_max_version)
3755  info.m_type[type].m_max_version = version;
3756 }
3757 
3758 void
3759 Qmgr::recompute_version_info(Uint32 type)
3760 {
3761  switch(type){
3762  case NodeInfo::DB:
3763  case NodeInfo::API:
3764  case NodeInfo::MGM:
3765  break;
3766  default:
3767  return;
3768  }
3769 
3770  Uint32 min = ~0, max = 0;
3771  Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
3772  for (Uint32 i = 1; i<cnt; i++)
3773  {
3774  if (getNodeInfo(i).m_type == type)
3775  {
3776  Uint32 version = getNodeInfo(i).m_version;
3777 
3778  if (version)
3779  {
3780  if (version < min)
3781  min = version;
3782  if (version > max)
3783  max = version;
3784  }
3785  }
3786  }
3787 
3788  NodeVersionInfo& info = setNodeVersionInfo();
3789  info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
3790  info.m_type[type].m_max_version = max;
3791 }
3792 
3793 #if 0
3794 bool
3795 Qmgr::checkAPIVersion(NodeId nodeId,
3796  Uint32 apiVersion, Uint32 ownVersion) const {
3797  bool ret=true;
3801  if ((getMajor(apiVersion) < getMajor(ownVersion) ||
3802  getMinor(apiVersion) < getMinor(ownVersion)) &&
3803  apiVersion >= API_UPGRADE_VERSION) {
3804  jam();
3805  if ( getNodeInfo(nodeId).getType() != NodeInfo::MGM ) {
3806  jam();
3807  ret = false;
3808  } else {
3809  jam();
3810  /* we have a software upgrade situation, mgmtsrvr should be
3811  * the highest, let him decide what to do
3812  */
3813  ;
3814  }
3815  }
3816  return ret;
3817 }
3818 #endif
3819 
3820 void
3821 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
3822  ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
3823  ref->ref = reference();
3824  ref->version = NDB_VERSION;
3825  ref->mysql_version = NDB_MYSQL_VERSION_D;
3826  ref->errorCode = err;
3827  sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
3828 }
3829 
3835 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
3836  FailRep::FailCause aFailCause,
3837  Uint16 sourceNode)
3838 {
3839  NodeRecPtr nodePtr;
3840  NodeRecPtr failedNodePtr;
3841  NodeRecPtr myNodePtr;
3842  UintR TnoFailedNodes;
3843 
3844  failedNodePtr.i = aFailedNode;
3845  ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3846  FailRep* rep = (FailRep*)signal->getDataPtr();
3847 
3848  if (check_multi_node_shutdown(signal))
3849  {
3850  jam();
3851  return;
3852  }
3853 
3854  if (isNodeConnectivitySuspect(sourceNode) &&
3855  // (! isNodeConnectivitySuspect(aFailedNode)) && // TODO : Required?
3856  ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
3857  (aFailCause == FailRep::ZLINK_FAILURE)))
3858  {
3859  jam();
3860  /* Connectivity related failure report from a node with suspect
3861  * connectivity, handle differently
3862  */
3863  ndbrequire(sourceNode != getOwnNodeId());
3864 
3865  handleFailFromSuspect(signal,
3866  aFailCause,
3867  aFailedNode,
3868  sourceNode);
3869  return;
3870  }
3871 
3872  if (failedNodePtr.i == getOwnNodeId()) {
3873  jam();
3874 
3875  Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
3876  const char * msg = 0;
3877  char extra[100];
3878  switch(aFailCause){
3879  case FailRep::ZOWN_FAILURE:
3880  msg = "Own failure";
3881  break;
3882  case FailRep::ZOTHER_NODE_WHEN_WE_START:
3883  case FailRep::ZOTHERNODE_FAILED_DURING_START:
3884  msg = "Other node died during start";
3885  break;
3886  case FailRep::ZIN_PREP_FAIL_REQ:
3887  msg = "Prep fail";
3888  break;
3889  case FailRep::ZSTART_IN_REGREQ:
3890  msg = "Start timeout";
3891  break;
3892  case FailRep::ZHEARTBEAT_FAILURE:
3893  msg = "Heartbeat failure";
3894  break;
3895  case FailRep::ZLINK_FAILURE:
3896  msg = "Connection failure";
3897  break;
3898  case FailRep::ZPARTITIONED_CLUSTER:
3899  {
3900  code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
3901  char buf1[100], buf2[100];
3902  c_clusterNodes.getText(buf1);
3903  if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength) ||
3904  (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength)) &&
3905  signal->header.theVerId_signalNumber == GSN_FAIL_REP)
3906  {
3907  jam();
3908  NdbNodeBitmask part;
3909  part.assign(NdbNodeBitmask::Size, rep->partitioned.partition);
3910  part.getText(buf2);
3911  BaseString::snprintf(extra, sizeof(extra),
3912  "Our cluster: %s other cluster: %s",
3913  buf1, buf2);
3914  }
3915  else
3916  {
3917  jam();
3918  BaseString::snprintf(extra, sizeof(extra),
3919  "Our cluster: %s", buf1);
3920  }
3921  msg = extra;
3922  break;
3923  }
3924  case FailRep::ZMULTI_NODE_SHUTDOWN:
3925  msg = "Multi node shutdown";
3926  break;
3927  case FailRep::ZCONNECT_CHECK_FAILURE:
3928  msg = "Connectivity check failure";
3929  break;
3930  default:
3931  msg = "<UNKNOWN>";
3932  }
3933 
3934  CRASH_INSERTION(932);
3935  CRASH_INSERTION(938);
3936 
3937  char buf[255];
3938  BaseString::snprintf(buf, sizeof(buf),
3939  "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
3940  getOwnNodeId(),
3941  sourceNode,
3942  refToNode(signal->getSendersBlockRef()),
3943  msg ? msg : "<Unknown>",
3944  aFailCause);
3945 
3946  progError(__LINE__, code, buf);
3947  return;
3948  }//if
3949 
3950  myNodePtr.i = getOwnNodeId();
3951  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
3952  if (myNodePtr.p->phase != ZRUNNING) {
3953  jam();
3954  systemErrorLab(signal, __LINE__);
3955  return;
3956  }//if
3957 
3958  if (getNodeState().startLevel < NodeState::SL_STARTED)
3959  {
3960  jam();
3961  CRASH_INSERTION(932);
3962  CRASH_INSERTION(938);
3963  char buf[100];
3964  BaseString::snprintf(buf, 100, "Node failure during restart");
3965  progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3966  ndbrequire(false);
3967  }
3968 
3969  TnoFailedNodes = cnoFailedNodes;
3970  failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
3971  if (cpresident == getOwnNodeId()) {
3972  jam();
3973  if (ctoStatus == Q_NOT_ACTIVE) {
3974  jam();
3985  if (TnoFailedNodes != cnoFailedNodes) {
3986  jam();
3987  cfailureNr = cfailureNr + 1;
3988  for (nodePtr.i = 1;
3989  nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
3990  jam();
3991  ptrAss(nodePtr, nodeRec);
3992  if (nodePtr.p->phase == ZRUNNING) {
3993  jam();
3994  sendPrepFailReq(signal, nodePtr.i);
3995  }//if
3996  }//for
3997  }//if
3998  }//if
3999  }//if
4000  return;
4001 }//Qmgr::failReportLab()
4002 
4008 /*******************************/
4009 /* PREP_FAILREQ */
4010 /*******************************/
4011 void Qmgr::execPREP_FAILREQ(Signal* signal)
4012 {
4013  NodeRecPtr myNodePtr;
4014  jamEntry();
4015 
4016  c_start.reset();
4017 
4018  if (check_multi_node_shutdown(signal))
4019  {
4020  jam();
4021  return;
4022  }
4023 
4024  PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4025 
4026  BlockReference Tblockref = prepFail->xxxBlockRef;
4027  Uint16 TfailureNr = prepFail->failNo;
4028  cnoPrepFailedNodes = prepFail->noOfNodes;
4029  UintR arrayIndex = 0;
4030  Uint32 Tindex;
4031  for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4032  if (NdbNodeBitmask::get(prepFail->theNodes, Tindex)){
4033  cprepFailedNodes[arrayIndex] = Tindex;
4034  arrayIndex++;
4035  }//if
4036  }//for
4037  UintR guard0;
4038 
4044  BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
4045  block->failNo = TfailureNr;
4046  EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
4047  BlockCommitOrd::SignalLength);
4048 
4049  myNodePtr.i = getOwnNodeId();
4050  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4051  if (myNodePtr.p->phase != ZRUNNING) {
4052  jam();
4053  systemErrorLab(signal, __LINE__);
4054  return;
4055  }//if
4056 
4057  if (getNodeState().startLevel < NodeState::SL_STARTED)
4058  {
4059  jam();
4060  CRASH_INSERTION(932);
4061  CRASH_INSERTION(938);
4062  char buf[100];
4063  BaseString::snprintf(buf, 100, "Node failure during restart");
4064  progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4065  ndbrequire(false);
4066  }
4067 
4068  guard0 = cnoPrepFailedNodes - 1;
4069  arrGuard(guard0, MAX_NDB_NODES);
4070  for (Tindex = 0; Tindex <= guard0; Tindex++) {
4071  jam();
4072  failReport(signal,
4073  cprepFailedNodes[Tindex],
4074  (UintR)ZFALSE,
4075  FailRep::ZIN_PREP_FAIL_REQ,
4076  0); /* Source node not required (or known) here */
4077  }//for
4078  sendCloseComReq(signal, Tblockref, TfailureNr);
4079  cnoCommitFailedNodes = 0;
4080  cprepareFailureNr = TfailureNr;
4081  return;
4082 }//Qmgr::execPREP_FAILREQ()
4083 
4084 
4085 void Qmgr::handleApiCloseComConf(Signal* signal)
4086 {
4087  jam();
4088  CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4089 
4090  /* Api failure special case */
4091  for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId ++)
4092  {
4093  if (NodeBitmask::get(closeCom->theNodes, nodeId))
4094  {
4095  jam();
4096  /* Check that *only* 1 *API* node is included in
4097  * this CLOSE_COM_CONF
4098  */
4099  ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
4100  ndbrequire(closeCom->noOfNodes == 1);
4101  NodeBitmask::clear(closeCom->theNodes, nodeId);
4102  ndbrequire(NodeBitmask::isclear(closeCom->theNodes));
4103 
4104  /* Now that we know communication from the failed Api has
4105  * ceased, we can send the required API_FAILREQ signals
4106  * and continue API failure handling
4107  */
4108  NodeRecPtr failedNodePtr;
4109  failedNodePtr.i = nodeId;
4110  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4111 
4112  ndbrequire((failedNodePtr.p->failState ==
4113  WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
4114  (failedNodePtr.p->failState ==
4115  WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
4116 
4117  if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
4118  {
4122  jam();
4123  sendApiFailReq(signal, nodeId, false); // !sumaOnly
4124  arbitRec.code = ArbitCode::ApiFail;
4125  handleArbitApiFail(signal, nodeId);
4126  }
4127  else
4128  {
4132  jam();
4133  sendApiFailReq(signal, nodeId, true); // sumaOnly
4134  }
4135 
4136  if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
4137  {
4141  jam();
4142  setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3;
4143  }
4144 
4145  /* Handled the single API node failure */
4146  return;
4147  }
4148  }
4149  /* Never get here */
4150  ndbrequire(false);
4151 }
4152 
4159 /*******************************/
4160 /* CLOSE_COMCONF */
4161 /*******************************/
4162 void Qmgr::execCLOSE_COMCONF(Signal* signal)
4163 {
4164  jamEntry();
4165 
4166  CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4167 
4168  Uint32 requestType = closeCom->requestType;
4169 
4170  if (requestType == CloseComReqConf::RT_API_FAILURE)
4171  {
4172  jam();
4173  handleApiCloseComConf(signal);
4174  return;
4175  }
4176 
4177  /* Normal node failure preparation path */
4178  ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
4179  BlockReference Tblockref = closeCom->xxxBlockRef;
4180  Uint16 TfailureNr = closeCom->failNo;
4181 
4182  cnoPrepFailedNodes = closeCom->noOfNodes;
4183  UintR arrayIndex = 0;
4184  UintR Tindex = 0;
4185  for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++){
4186  if(NdbNodeBitmask::get(closeCom->theNodes, Tindex)){
4187  cprepFailedNodes[arrayIndex] = Tindex;
4188  arrayIndex++;
4189  }
4190  }
4191  ndbassert(arrayIndex == cnoPrepFailedNodes);
4192  UintR tprepFailConf;
4193  UintR Tindex2;
4194  UintR guard0;
4195  UintR guard1;
4196  UintR Tfound;
4197  Uint16 TfailedNodeNo;
4198 
4199  tprepFailConf = ZTRUE;
4200  if (cnoFailedNodes > 0) {
4201  jam();
4202  /* Check whether the set of nodes which have had communications
4203  * closed is the same as the set of failed nodes.
4204  * If it is, we can confirm the PREP_FAIL phase for this set
4205  * of nodes to the President.
4206  * If it is not, we Refuse the PREP_FAIL phase for this set
4207  * of nodes, the President will start a new PREP_FAIL phase
4208  * for the new set.
4209  */
4210  guard0 = cnoFailedNodes - 1;
4211  arrGuard(guard0, MAX_NDB_NODES);
4212  for (Tindex = 0; Tindex <= guard0; Tindex++) {
4213  jam();
4214  TfailedNodeNo = cfailedNodes[Tindex];
4215  Tfound = ZFALSE;
4216  guard1 = cnoPrepFailedNodes - 1;
4217  arrGuard(guard1, MAX_NDB_NODES);
4218  for (Tindex2 = 0; Tindex2 <= guard1; Tindex2++) {
4219  jam();
4220  if (TfailedNodeNo == cprepFailedNodes[Tindex2]) {
4221  jam();
4222  Tfound = ZTRUE;
4223  }//if
4224  }//for
4225  if (Tfound == ZFALSE) {
4226  jam();
4227  /* A failed node is missing from the set, we will not
4228  * confirm this Prepare_Fail phase.
4229  * Store the node id in the array for later.
4230  */
4231  tprepFailConf = ZFALSE;
4232  arrGuard(cnoPrepFailedNodes, MAX_NDB_NODES);
4233  cprepFailedNodes[cnoPrepFailedNodes] = TfailedNodeNo;
4234  cnoPrepFailedNodes = cnoPrepFailedNodes + 1;
4235  }//if
4236  }//for
4237  }//if
4238  if (tprepFailConf == ZFALSE) {
4239  jam();
4240  /* Inform President that we cannot confirm the PREP_FAIL
4241  * phase as we are aware of at least one other node
4242  * failure
4243  */
4244  for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4245  cfailedNodes[Tindex] = cprepFailedNodes[Tindex];
4246  }//for
4247  cnoFailedNodes = cnoPrepFailedNodes;
4248  sendPrepFailReqRef(signal,
4249  Tblockref,
4250  GSN_PREP_FAILREF,
4251  reference(),
4252  cfailureNr,
4253  cnoPrepFailedNodes,
4254  cprepFailedNodes);
4255  } else {
4256  /* We have prepared the failure of the requested nodes
4257  * send confirmation to the president
4258  */
4259  jam();
4260  cnoCommitFailedNodes = cnoPrepFailedNodes;
4261  guard0 = cnoPrepFailedNodes - 1;
4262  arrGuard(guard0, MAX_NDB_NODES);
4263  for (Tindex = 0; Tindex <= guard0; Tindex++) {
4264  jam();
4265  arrGuard(Tindex, MAX_NDB_NODES);
4266  ccommitFailedNodes[Tindex] = cprepFailedNodes[Tindex];
4267  }//for
4268  signal->theData[0] = getOwnNodeId();
4269  signal->theData[1] = TfailureNr;
4270  sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
4271  }//if
4272  return;
4273 }//Qmgr::execCLOSE_COMCONF()
4274 
4275 /*---------------------------------------------------------------------------*/
4276 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE. */
4277 /*---------------------------------------------------------------------------*/
4278 /*******************************/
4279 /* PREP_FAILCONF */
4280 /*******************************/
4281 void Qmgr::execPREP_FAILCONF(Signal* signal)
4282 {
4283  NodeRecPtr nodePtr;
4284  NodeRecPtr replyNodePtr;
4285  jamEntry();
4286  replyNodePtr.i = signal->theData[0];
4287  Uint16 TfailureNr = signal->theData[1];
4288  if (TfailureNr != cfailureNr) {
4289  jam();
4294  return;
4295  }//if
4296  ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4297  replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4298  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4299  jam();
4300  ptrAss(nodePtr, nodeRec);
4301  if (nodePtr.p->phase == ZRUNNING) {
4302  if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
4303  jam();
4304  return;
4305  }//if
4306  }//if
4307  }//for
4312  arbitRec.failureNr = cfailureNr;
4313  const NodeState & s = getNodeState();
4314  if(s.startLevel == NodeState::SL_STOPPING_3 && s.stopping.systemShutdown){
4315  jam();
4320  return;
4321  }
4322 
4323  switch(arbitRec.method){
4324  case ArbitRec::DISABLED:
4325  jam();
4326  // No arbitration -> immediately commit the failed nodes
4327  sendCommitFailReq(signal);
4328  break;
4329 
4330  case ArbitRec::METHOD_EXTERNAL:
4331  case ArbitRec::METHOD_DEFAULT:
4332  jam();
4333  handleArbitCheck(signal);
4334  break;
4335 
4336  }
4337  return;
4338 }//Qmgr::execPREP_FAILCONF()
4339 
4340 void
4341 Qmgr::sendCommitFailReq(Signal* signal)
4342 {
4343  NodeRecPtr nodePtr;
4344  jam();
4345  if (arbitRec.failureNr != cfailureNr) {
4346  jam();
4351  return;
4352  }//if
4357  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4358  jam();
4359  ptrAss(nodePtr, nodeRec);
4360 
4361 #ifdef ERROR_INSERT
4362  if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
4363  {
4364  ndbout_c("skipping node %d", c_error_insert_extra);
4365  CLEAR_ERROR_INSERT_VALUE;
4366  signal->theData[0] = 9999;
4367  sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
4368  continue;
4369  }
4370 #endif
4371 
4372  if (nodePtr.p->phase == ZRUNNING) {
4373  jam();
4374  nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4375  signal->theData[0] = cpdistref;
4376  signal->theData[1] = cfailureNr;
4377  sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4378  }//if
4379  }//for
4380  ctoStatus = Q_ACTIVE;
4381  cnoFailedNodes = 0;
4382  return;
4383 }//sendCommitFailReq()
4384 
4385 /*---------------------------------------------------------------------------*/
4386 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
4387 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES. */
4388 /*---------------------------------------------------------------------------*/
4389 /*******************************/
4390 /* PREP_FAILREF */
4391 /*******************************/
4392 void Qmgr::execPREP_FAILREF(Signal* signal)
4393 {
4394  NodeRecPtr nodePtr;
4395  jamEntry();
4396 
4397  PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4398 
4399  Uint16 TfailureNr = prepFail->failNo;
4400  cnoPrepFailedNodes = prepFail->noOfNodes;
4401 
4402  UintR arrayIndex = 0;
4403  UintR Tindex = 0;
4404  for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4405  jam();
4406  if(NdbNodeBitmask::get(prepFail->theNodes, Tindex)){
4407  jam();
4408  cprepFailedNodes[arrayIndex] = Tindex;
4409  arrayIndex++;
4410  }//if
4411  }//for
4412  if (TfailureNr != cfailureNr) {
4413  jam();
4418  return;
4419  }//if
4420  UintR guard0;
4421  UintR Ti;
4422 
4423  cnoFailedNodes = cnoPrepFailedNodes;
4424  guard0 = cnoPrepFailedNodes - 1;
4425  arrGuard(guard0, MAX_NDB_NODES);
4426  for (Ti = 0; Ti <= guard0; Ti++) {
4427  jam();
4428  cfailedNodes[Ti] = cprepFailedNodes[Ti];
4429  }//for
4430  cfailureNr = cfailureNr + 1;
4431  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4432  jam();
4433  ptrAss(nodePtr, nodeRec);
4434  if (nodePtr.p->phase == ZRUNNING) {
4435  jam();
4436  sendPrepFailReq(signal, nodePtr.i);
4437  }//if
4438  }//for
4439  return;
4440 }//Qmgr::execPREP_FAILREF()
4441 
4442 static
4443 Uint32
4444 clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt, const Uint16 src[])
4445 {
4446  if (srccnt == 0)
4447  return dstcnt;
4448 
4449  Uint32 pos = 0;
4450  for (Uint32 i = 0; i<dstcnt; i++)
4451  {
4452  Uint32 node = dst[i];
4453  for (Uint32 j = 0; j<srccnt; j++)
4454  {
4455  if (node == dst[j])
4456  {
4457  node = RNIL;
4458  break;
4459  }
4460  }
4461  if (node != RNIL)
4462  {
4463  dst[pos++] = node;
4464  }
4465  }
4466  return pos;
4467 }
4468 
4469 /*---------------------------------------------------------------------------*/
4470 /* THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE. */
4471 /*---------------------------------------------------------------------------*/
4472 /***********************/
4473 /* COMMIT_FAILREQ */
4474 /***********************/
4475 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
4476 {
4477  NodeRecPtr nodePtr;
4478  jamEntry();
4479 
4480  CRASH_INSERTION(935);
4481 
4482  BlockReference Tblockref = signal->theData[0];
4483  UintR TfailureNr = signal->theData[1];
4484  if (Tblockref != cpdistref) {
4485  jam();
4486  return;
4487  }//if
4488  UintR guard0;
4489  UintR Tj;
4490 
4496  UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
4497  unblock->failNo = TfailureNr;
4498  EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
4499  UnblockCommitOrd::SignalLength);
4500 
4501  if ((ccommitFailureNr != TfailureNr) &&
4502  (cnoCommitFailedNodes > 0)) {
4503  jam();
4508  ccommitFailureNr = TfailureNr;
4509  NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4510 
4511  nodeFail->failNo = ccommitFailureNr;
4512  nodeFail->noOfNodes = cnoCommitFailedNodes;
4513  nodeFail->masterNodeId = cpresident;
4514  NdbNodeBitmask::clear(nodeFail->theNodes);
4515  for(unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4516  jam();
4517  NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4518  }//if
4519 
4520  if (ERROR_INSERTED(936))
4521  {
4522  sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4523  200, NodeFailRep::SignalLength);
4524  }
4525  else
4526  {
4527  sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4528  NodeFailRep::SignalLength, JBB);
4529  }
4530 
4531  guard0 = cnoCommitFailedNodes - 1;
4532  arrGuard(guard0, MAX_NDB_NODES);
4537  for (Tj = 0; Tj <= guard0; Tj++) {
4538  jam();
4539  nodePtr.i = ccommitFailedNodes[Tj];
4540  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4541  nodePtr.p->phase = ZFAIL_CLOSING;
4542  nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
4543  setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
4544  setNodeInfo(nodePtr.i).m_version = 0;
4545  c_clusterNodes.clear(nodePtr.i);
4546  }//for
4547  recompute_version_info(NodeInfo::DB);
4548  /*----------------------------------------------------------------------*/
4549  /* WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES. */
4550  /*----------------------------------------------------------------------*/
4551  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
4552  jam();
4553  ptrAss(nodePtr, nodeRec);
4554  if (nodePtr.p->phase == ZAPI_ACTIVE) {
4555  jam();
4556 
4557  NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4558 
4559  nodeFail->failNo = ccommitFailureNr;
4560  nodeFail->noOfNodes = cnoCommitFailedNodes;
4561  NdbNodeBitmask::clear(nodeFail->theNodes);
4562  for(unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4563  jam();
4564  NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4565  }//for
4566  sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
4567  NodeFailRep::SignalLength, JBB);
4568  }//if
4569  }//for
4570 
4574  cnoFailedNodes = clear_nodes(cnoFailedNodes,
4575  cfailedNodes,
4576  cnoCommitFailedNodes,
4577  ccommitFailedNodes);
4578  cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes,
4579  cprepFailedNodes,
4580  cnoCommitFailedNodes,
4581  ccommitFailedNodes);
4582  cnoCommitFailedNodes = 0;
4583  }//if
4588  signal->theData[0] = getOwnNodeId();
4589  sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4590  return;
4591 }//Qmgr::execCOMMIT_FAILREQ()
4592 
4593 /*--------------------------------------------------------------------------*/
4594 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
4595 /*--------------------------------------------------------------------------*/
4596 /*******************************/
4597 /* COMMIT_FAILCONF */
4598 /*******************************/
4599 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
4600 {
4601  NodeRecPtr nodePtr;
4602  NodeRecPtr replyNodePtr;
4603  jamEntry();
4604  replyNodePtr.i = signal->theData[0];
4605 
4606  ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4607  replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4608  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4609  jam();
4610  ptrAss(nodePtr, nodeRec);
4611  if (nodePtr.p->phase == ZRUNNING) {
4612  if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4613  jam();
4614  return;
4615  }//if
4616  }//if
4617  }//for
4618  /*-----------------------------------------------------------------------*/
4619  /* WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES. */
4620  /*-----------------------------------------------------------------------*/
4621  ctoStatus = Q_NOT_ACTIVE;
4622  if (cnoFailedNodes != 0) {
4623  jam();
4628  cfailureNr = cfailureNr + 1;
4629  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4630  jam();
4631  ptrAss(nodePtr, nodeRec);
4632  if (nodePtr.p->phase == ZRUNNING) {
4633  jam();
4634  sendPrepFailReq(signal, nodePtr.i);
4635  }//if
4636  }//for
4637  }//if
4638  return;
4639 }//Qmgr::execCOMMIT_FAILCONF()
4640 
4645 /*******************************/
4646 /* PRES_TOCONF */
4647 /*******************************/
4648 void Qmgr::execPRES_TOCONF(Signal* signal)
4649 {
4650  NodeRecPtr nodePtr;
4651  NodeRecPtr replyNodePtr;
4652  jamEntry();
4653  replyNodePtr.i = signal->theData[0];
4654  UintR TfailureNr = signal->theData[1];
4655  if (ctoFailureNr < TfailureNr) {
4656  jam();
4657  ctoFailureNr = TfailureNr;
4658  }//if
4659  ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4660  replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4661  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4662  jam();
4663  ptrAss(nodePtr, nodeRec);
4664  if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
4665  jam();
4666  return;
4667  }//if
4668  }//for
4669  /*-------------------------------------------------------------------------*/
4670  /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT. */
4671  /*-------------------------------------------------------------------------*/
4672  if (ctoFailureNr > ccommitFailureNr) {
4673  jam();
4674  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4675  jam();
4676  ptrAss(nodePtr, nodeRec);
4677  if (nodePtr.p->phase == ZRUNNING) {
4678  jam();
4679  nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4680  signal->theData[0] = cpdistref;
4681  signal->theData[1] = ctoFailureNr;
4682  sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4683  }//if
4684  }//for
4685  return;
4686  }//if
4687  /*-------------------------------------------------------------------------*/
4688  /* WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS. */
4689  /*-------------------------------------------------------------------------*/
4690  ctoStatus = Q_NOT_ACTIVE;
4691  cfailureNr = cfailureNr + 1;
4692  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4693  jam();
4694  ptrAss(nodePtr, nodeRec);
4695  if (nodePtr.p->phase == ZRUNNING) {
4696  jam();
4697  sendPrepFailReq(signal, nodePtr.i);
4698  }//if
4699  }//for
4700  return;
4701 }//Qmgr::execPRES_TOCONF()
4702 
4703 /*--------------------------------------------------------------------------*/
4704 // Provide information about the configured NDB nodes in the system.
4705 /*--------------------------------------------------------------------------*/
4706 void Qmgr::execREAD_NODESREQ(Signal* signal)
4707 {
4708  jamEntry();
4709 
4710  BlockReference TBref = signal->theData[0];
4711 
4712  ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
4713 
4714  NodeRecPtr nodePtr;
4715  nodePtr.i = getOwnNodeId();
4716  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4717 
4718  NdbNodeBitmask tmp = c_definedNodes;
4719  tmp.bitANDC(c_clusterNodes);
4720 
4721  readNodes->noOfNodes = c_definedNodes.count();
4722  readNodes->masterNodeId = cpresident;
4723  readNodes->ndynamicId = nodePtr.p->ndynamicId;
4724  c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
4725  c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
4726  tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
4727  NdbNodeBitmask::clear(readNodes->startingNodes);
4728  NdbNodeBitmask::clear(readNodes->startedNodes);
4729 
4730  sendSignal(TBref, GSN_READ_NODESCONF, signal,
4731  ReadNodesConf::SignalLength, JBB);
4732 }//Qmgr::execREAD_NODESREQ()
4733 
4734 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
4735  NodeId failedNodeId) {
4736  jam();
4737 
4738  // Broadcast that this node is failing to other nodes
4739  failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4740 
4741  char buf[100];
4742  BaseString::snprintf(buf, 100,
4743  "Node was shutdown during startup because node %d failed",
4744  failedNodeId);
4745 
4746  progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4747 }
4748 
4749 
4750 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
4751 {
4752  jam();
4753  // Broadcast that this node is failing to other nodes
4754  failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4755 
4756  // If it's known why shutdown occured
4757  // an error message has been passed to this function
4758  progError(line, NDBD_EXIT_NDBREQUIRE, message);
4759 
4760  return;
4761 }//Qmgr::systemErrorLab()
4762 
4763 
4768 void Qmgr::failReport(Signal* signal,
4769  Uint16 aFailedNode,
4770  UintR aSendFailRep,
4771  FailRep::FailCause aFailCause,
4772  Uint16 sourceNode)
4773 {
4774  UintR tfrMinDynamicId;
4775  NodeRecPtr failedNodePtr;
4776  NodeRecPtr nodePtr;
4777  NodeRecPtr presidentNodePtr;
4778 
4779 
4780  ndbassert((! aSendFailRep) || (sourceNode != 0));
4781 
4782  failedNodePtr.i = aFailedNode;
4783  ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4784  if (failedNodePtr.p->phase == ZRUNNING) {
4785  jam();
4786 
4787 #ifdef ERROR_INSERT
4788  if (ERROR_INSERTED(938))
4789  {
4790  nodeFailCount++;
4791  ndbout_c("QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount);
4792  /* Count DB nodes */
4793  Uint32 nodeCount = 0;
4794  for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
4795  {
4796  if (getNodeInfo(i).getType() == NODE_TYPE_DB)
4797  nodeCount++;
4798  }
4799 
4800  /* When > 25% of cluster has failed, resume communications */
4801  if (nodeFailCount > (nodeCount / 4))
4802  {
4803  ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
4804  Signal save = *signal;
4805  signal->theData[0] = 9991;
4806  sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
4807  *signal = save;
4808  nodeFailCount = 0;
4809  SET_ERROR_INSERT_VALUE(932);
4810  }
4811  }
4812 #endif
4813 
4814 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
4815  if (cpresident == getOwnNodeId()) {
4816  jam();
4817  if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4818  jam();
4819  signal->theData[0] = failedNodePtr.i;
4820  sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4821  }//if
4822  if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
4823  jam();
4824  signal->theData[0] = failedNodePtr.i;
4825  signal->theData[1] = ccommitFailureNr;
4826  sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
4827  }//if
4828  }//if
4829  failedNodePtr.p->phase = ZPREPARE_FAIL;
4830  failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4831  failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4832  failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4833  setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
4834  if (aSendFailRep == ZTRUE) {
4835  jam();
4836  if (failedNodePtr.i != getOwnNodeId()) {
4837  jam();
4838  FailRep * const failRep = (FailRep *)&signal->theData[0];
4839  failRep->failNodeId = failedNodePtr.i;
4840  failRep->failCause = aFailCause;
4841  failRep->failSourceNodeId = sourceNode;
4842  sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
4843  FailRep::SignalLength, JBA);
4844  }//if
4845  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4846  jam();
4847  ptrAss(nodePtr, nodeRec);
4848  if (nodePtr.p->phase == ZRUNNING) {
4849  jam();
4850  FailRep * const failRep = (FailRep *)&signal->theData[0];
4851  failRep->failNodeId = failedNodePtr.i;
4852  failRep->failCause = aFailCause;
4853  failRep->failSourceNodeId = sourceNode;
4854  sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
4855  FailRep::SignalLength, JBA);
4856  }//if
4857  }//for
4858  }//if
4859  if (failedNodePtr.i == getOwnNodeId()) {
4860  jam();
4861  return;
4862  }//if
4863 
4864  if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
4865  {
4866  jam();
4867  connectivityCheckCompleted(signal);
4868  }
4869 
4870  failedNodePtr.p->ndynamicId = 0;
4871  findNeighbours(signal, __LINE__);
4872  if (failedNodePtr.i == cpresident) {
4873  jam();
4878  tfrMinDynamicId = (UintR)-1;
4879  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4880  jam();
4881  ptrAss(nodePtr, nodeRec);
4882  if (nodePtr.p->phase == ZRUNNING) {
4883  if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
4884  jam();
4885  tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
4886  cpresident = nodePtr.i;
4887  }//if
4888  }//if
4889  }//for
4890  presidentNodePtr.i = cpresident;
4891  ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
4892  cpdistref = presidentNodePtr.p->blockRef;
4893  if (cpresident == getOwnNodeId()) {
4894  CRASH_INSERTION(920);
4895  cfailureNr = cprepareFailureNr;
4896  ctoFailureNr = 0;
4897  ctoStatus = Q_ACTIVE;
4898  c_start.reset(); // Don't take over nodes being started
4899  if (cnoCommitFailedNodes > 0) {
4900  jam();
4906  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4907  nodePtr.i++) {
4908  jam();
4909  ptrAss(nodePtr, nodeRec);
4910  if (nodePtr.p->phase == ZRUNNING) {
4911  jam();
4912  nodePtr.p->sendPresToStatus = Q_ACTIVE;
4913  signal->theData[0] = cpdistref;
4914  signal->theData[1] = cprepareFailureNr;
4915  sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
4916  signal, 1, JBA);
4917  }//if
4918  }//for
4919  } else {
4920  jam();
4921  /*-----------------------------------------------------------------*/
4922  // In this case it could be that a commit process is still ongoing.
4923  // If so we must conclude it as the new master.
4924  /*-----------------------------------------------------------------*/
4925  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4926  nodePtr.i++) {
4927  jam();
4928  ptrAss(nodePtr, nodeRec);
4929  if (nodePtr.p->phase == ZRUNNING) {
4930  jam();
4931  nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4932  signal->theData[0] = cpdistref;
4933  signal->theData[1] = ccommitFailureNr;
4934  sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
4935  2, JBA);
4936  }//if
4937  }//for
4938  }//if
4939  }//if
4940  }//if
4941  arrGuard(cnoFailedNodes, MAX_NDB_NODES);
4942  cfailedNodes[cnoFailedNodes] = failedNodePtr.i;
4943  cnoFailedNodes = cnoFailedNodes + 1;
4944  }//if
4945 }//Qmgr::failReport()
4946 
4947 /*---------------------------------------------------------------------------*/
4948 /* INPUT: TTDI_DYN_ID */
4949 /* OUTPUT: TTDI_NODE_ID */
4950 /*---------------------------------------------------------------------------*/
4951 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
4952 {
4953  NodeRecPtr tdiNodePtr;
4954  Uint16 TtdiNodeId = ZNIL;
4955 
4956  for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
4957  jam();
4958  ptrAss(tdiNodePtr, nodeRec);
4959  if (tdiNodePtr.p->ndynamicId == TdynamicId) {
4960  jam();
4961  TtdiNodeId = tdiNodePtr.i;
4962  break;
4963  }//if
4964  }//for
4965  if (TtdiNodeId == ZNIL) {
4966  jam();
4967  systemErrorLab(signal, __LINE__);
4968  }//if
4969  return TtdiNodeId;
4970 }//Qmgr::translateDynamicIdToNodeId()
4971 
4976 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
4977 {
4978  CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4979 
4980  closeCom->xxxBlockRef = TBRef;
4981  closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
4982  closeCom->failNo = aFailNo;
4983  closeCom->noOfNodes = cnoPrepFailedNodes;
4984 
4985  NodeBitmask::clear(closeCom->theNodes);
4986 
4987  for(int i = 0; i < cnoPrepFailedNodes; i++) {
4988  const NodeId nodeId = cprepFailedNodes[i];
4989  jam();
4990  NodeBitmask::set(closeCom->theNodes, nodeId);
4991  }
4992 
4993  sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
4994  CloseComReqConf::SignalLength, JBA);
4995 
4996 }//Qmgr::sendCloseComReq()
4997 
4998 void
4999 Qmgr::sendPrepFailReqRef(Signal* signal,
5000  Uint32 dstBlockRef,
5001  GlobalSignalNumber gsn,
5002  Uint32 blockRef,
5003  Uint32 failNo,
5004  Uint32 noOfNodes,
5005  const NodeId theNodes[]){
5006 
5007  PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5008  prepFail->xxxBlockRef = blockRef;
5009  prepFail->failNo = failNo;
5010  prepFail->noOfNodes = noOfNodes;
5011 
5012  NdbNodeBitmask::clear(prepFail->theNodes);
5013 
5014  for(Uint32 i = 0; i<noOfNodes; i++){
5015  const NodeId nodeId = theNodes[i];
5016  NdbNodeBitmask::set(prepFail->theNodes, nodeId);
5017  }
5018 
5019  sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA);
5020 }
5021 
5022 
5026 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
5027 {
5028  NodeRecPtr sendNodePtr;
5029  sendNodePtr.i = aNode;
5030  ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
5031  sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
5032 
5033  sendPrepFailReqRef(signal,
5034  sendNodePtr.p->blockRef,
5035  GSN_PREP_FAILREQ,
5036  reference(),
5037  cfailureNr,
5038  cnoFailedNodes,
5039  cfailedNodes);
5040 }//Qmgr::sendPrepFailReq()
5041 
5051 static const bool g_ndb_arbit_one_half_rule = false;
5052 
5056 void
5057 Qmgr::execARBIT_CFG(Signal* signal)
5058 {
5059  jamEntry();
5060  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5061  unsigned rank = sd->code;
5062  ndbrequire(1 <= rank && rank <= 2);
5063  arbitRec.apiMask[0].bitOR(sd->mask);
5064  arbitRec.apiMask[rank].assign(sd->mask);
5065 }
5066 
5070 Uint32 Qmgr::getArbitDelay()
5071 {
5072  switch (arbitRec.state) {
5073  case ARBIT_NULL:
5074  jam();
5075  break;
5076  case ARBIT_INIT:
5077  jam();
5078  case ARBIT_FIND:
5079  jam();
5080  case ARBIT_PREP1:
5081  jam();
5082  case ARBIT_PREP2:
5083  jam();
5084  case ARBIT_START:
5085  jam();
5086  return 100;
5087  case ARBIT_RUN:
5088  jam();
5089  return 1000;
5090  case ARBIT_CHOOSE:
5091  jam();
5092  return 10;
5093  case ARBIT_CRASH: // if we could wait
5094  jam();
5095  return 100;
5096  }
5097  ndbrequire(false);
5098  return (Uint32)-1;
5099 }
5100 
5105 Uint32 Qmgr::getArbitTimeout()
5106 {
5107  switch (arbitRec.state) {
5108  case ARBIT_NULL:
5109  jam();
5110  break;
5111  case ARBIT_INIT: // not used
5112  jam();
5113  case ARBIT_FIND: // not used
5114  jam();
5115  return 1000;
5116  case ARBIT_PREP1:
5117  jam();
5118  case ARBIT_PREP2:
5119  jam();
5120  return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
5121  case ARBIT_START:
5122  jam();
5123  return 1000 + arbitRec.timeout;
5124  case ARBIT_RUN: // not used (yet)
5125  jam();
5126  return 1000;
5127  case ARBIT_CHOOSE:
5128  jam();
5129  return arbitRec.timeout;
5130  case ARBIT_CRASH: // if we could wait
5131  jam();
5132  return 100;
5133  }
5134  ndbrequire(false);
5135  return (Uint32)-1;
5136 }
5137 
5145 void
5146 Qmgr::handleArbitStart(Signal* signal)
5147 {
5148  jam();
5149  ndbrequire(cpresident == getOwnNodeId());
5150  ndbrequire(arbitRec.state == ARBIT_NULL);
5151  arbitRec.state = ARBIT_INIT;
5152  arbitRec.newstate = true;
5153  startArbitThread(signal);
5154 }
5155 
5161 void
5162 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
5163 {
5164  if (arbitRec.node != nodeId) {
5165  jam();
5166  return;
5167  }
5168  reportArbitEvent(signal, NDB_LE_ArbitState);
5169  arbitRec.node = 0;
5170  switch (arbitRec.state) {
5171  case ARBIT_NULL: // should not happen
5172  jam();
5173  case ARBIT_INIT:
5174  jam();
5175  case ARBIT_FIND:
5176  jam();
5177  break;
5178  case ARBIT_PREP1: // start from beginning
5179  jam();
5180  case ARBIT_PREP2:
5181  jam();
5182  case ARBIT_START:
5183  jam();
5184  case ARBIT_RUN:
5185  if (cpresident == getOwnNodeId()) {
5186  jam();
5187  arbitRec.state = ARBIT_INIT;
5188  arbitRec.newstate = true;
5189  startArbitThread(signal);
5190  } else {
5191  jam();
5192  arbitRec.state = ARBIT_NULL;
5193  }
5194  break;
5195  case ARBIT_CHOOSE: // XXX too late
5196  jam();
5197  case ARBIT_CRASH:
5198  jam();
5199  break;
5200  default:
5201  ndbrequire(false);
5202  break;
5203  }
5204 }
5205 
5212 void
5213 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
5214 {
5215  jam();
5216  ndbrequire(cpresident == getOwnNodeId());
5217  switch (arbitRec.state) {
5218  case ARBIT_NULL: // before db opened
5219  jam();
5220  break;
5221  case ARBIT_INIT: // start from beginning
5222  jam();
5223  case ARBIT_FIND:
5224  jam();
5225  case ARBIT_PREP1:
5226  jam();
5227  case ARBIT_PREP2:
5228  jam();
5229  arbitRec.state = ARBIT_INIT;
5230  arbitRec.newstate = true;
5231  startArbitThread(signal);
5232  break;
5233  case ARBIT_START: // process in RUN state
5234  jam();
5235  case ARBIT_RUN:
5236  jam();
5237  arbitRec.newMask.set(nodeId);
5238  break;
5239  case ARBIT_CHOOSE: // XXX too late
5240  jam();
5241  case ARBIT_CRASH:
5242  jam();
5243  break;
5244  default:
5245  ndbrequire(false);
5246  break;
5247  }
5248 }
5249 
5257 void
5258 Qmgr::handleArbitCheck(Signal* signal)
5259 {
5260  jam();
5261  ndbrequire(cpresident == getOwnNodeId());
5262  NdbNodeBitmask ndbMask;
5263  computeArbitNdbMask(ndbMask);
5264  if (g_ndb_arbit_one_half_rule &&
5265  2 * ndbMask.count() < cnoOfNodes) {
5266  jam();
5267  arbitRec.code = ArbitCode::LoseNodes;
5268  } else {
5269  jam();
5270  CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
5271  sd->blockRef = reference();
5272  sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
5273  sd->mask = ndbMask;
5274  EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
5275  CheckNodeGroups::SignalLength);
5276  jamEntry();
5277  switch (sd->output) {
5278  case CheckNodeGroups::Win:
5279  jam();
5280  arbitRec.code = ArbitCode::WinGroups;
5281  break;
5282  case CheckNodeGroups::Lose:
5283  jam();
5284  arbitRec.code = ArbitCode::LoseGroups;
5285  break;
5286  case CheckNodeGroups::Partitioning:
5287  jam();
5288  arbitRec.code = ArbitCode::Partitioning;
5289  if (g_ndb_arbit_one_half_rule &&
5290  2 * ndbMask.count() > cnoOfNodes) {
5291  jam();
5292  arbitRec.code = ArbitCode::WinNodes;
5293  }
5294  break;
5295  default:
5296  ndbrequire(false);
5297  break;
5298  }
5299  }
5300  switch (arbitRec.code) {
5301  case ArbitCode::LoseNodes:
5302  jam();
5303  case ArbitCode::LoseGroups:
5304  jam();
5305  goto crashme;
5306  case ArbitCode::WinNodes:
5307  jam();
5308  case ArbitCode::WinGroups:
5309  jam();
5310  if (arbitRec.state == ARBIT_RUN) {
5311  jam();
5312  break;
5313  }
5314  arbitRec.state = ARBIT_INIT;
5315  arbitRec.newstate = true;
5316  break;
5317  case ArbitCode::Partitioning:
5318  if (arbitRec.state == ARBIT_RUN) {
5319  jam();
5320  arbitRec.state = ARBIT_CHOOSE;
5321  arbitRec.newstate = true;
5322  break;
5323  }
5324  if (arbitRec.apiMask[0].count() != 0) {
5325  jam();
5326  arbitRec.code = ArbitCode::LoseNorun;
5327  } else {
5328  jam();
5329  arbitRec.code = ArbitCode::LoseNocfg;
5330  }
5331  goto crashme;
5332  default:
5333  crashme:
5334  jam();
5335  arbitRec.state = ARBIT_CRASH;
5336  arbitRec.newstate = true;
5337  break;
5338  }
5339  reportArbitEvent(signal, NDB_LE_ArbitResult);
5340  switch (arbitRec.state) {
5341  default:
5342  jam();
5343  arbitRec.newMask.bitAND(ndbMask); // delete failed nodes
5344  arbitRec.recvMask.bitAND(ndbMask);
5345  sendCommitFailReq(signal); // start commit of failed nodes
5346  break;
5347  case ARBIT_CHOOSE:
5348  jam();
5349  case ARBIT_CRASH:
5350  jam();
5351  break;
5352  }
5353  startArbitThread(signal);
5354 }
5355 
5360 void
5361 Qmgr::startArbitThread(Signal* signal)
5362 {
5363  jam();
5364  ndbrequire(cpresident == getOwnNodeId());
5365  arbitRec.code = ArbitCode::ThreadStart;
5366  reportArbitEvent(signal, NDB_LE_ArbitState);
5367  signal->theData[1] = ++arbitRec.thread;
5368  runArbitThread(signal);
5369 }
5370 
5375 void
5376 Qmgr::runArbitThread(Signal* signal)
5377 {
5378 #ifdef DEBUG_ARBIT
5379  char buf[256];
5380  NdbNodeBitmask ndbMask;
5381  computeArbitNdbMask(ndbMask);
5382  ndbout << "arbit thread:";
5383  ndbout << " state=" << arbitRec.state;
5384  ndbout << " newstate=" << arbitRec.newstate;
5385  ndbout << " thread=" << arbitRec.thread;
5386  ndbout << " node=" << arbitRec.node;
5387  arbitRec.ticket.getText(buf, sizeof(buf));
5388  ndbout << " ticket=" << buf;
5389  ndbMask.getText(buf);
5390  ndbout << " ndbmask=" << buf;
5391  ndbout << " sendcount=" << arbitRec.sendCount;
5392  ndbout << " recvcount=" << arbitRec.recvCount;
5393  arbitRec.recvMask.getText(buf);
5394  ndbout << " recvmask=" << buf;
5395  ndbout << " code=" << arbitRec.code;
5396  ndbout << endl;
5397 #endif
5398  if (signal->theData[1] != arbitRec.thread) {
5399  jam();
5400  return; // old thread dies
5401  }
5402  switch (arbitRec.state) {
5403  case ARBIT_INIT: // main thread
5404  jam();
5405  stateArbitInit(signal);
5406  break;
5407  case ARBIT_FIND:
5408  jam();
5409  stateArbitFind(signal);
5410  break;
5411  case ARBIT_PREP1:
5412  jam();
5413  case ARBIT_PREP2:
5414  jam();
5415  stateArbitPrep(signal);
5416  break;
5417  case ARBIT_START:
5418  jam();
5419  stateArbitStart(signal);
5420  break;
5421  case ARBIT_RUN:
5422  jam();
5423  stateArbitRun(signal);
5424  break;
5425  case ARBIT_CHOOSE: // partitition thread
5426  jam();
5427  stateArbitChoose(signal);
5428  break;
5429  case ARBIT_CRASH:
5430  jam();
5431  stateArbitCrash(signal);
5432  break;
5433  default:
5434  ndbrequire(false);
5435  break;
5436  }
5437  signal->theData[0] = ZARBIT_HANDLING;
5438  signal->theData[1] = arbitRec.thread;
5439  signal->theData[2] = arbitRec.state; // just for signal log
5440  Uint32 delay = getArbitDelay();
5441  if (delay == 0) {
5442  jam();
5443  sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
5444  } else if (delay == 1) {
5445  jam();
5446  sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
5447  } else {
5448  jam();
5449  sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
5450  }//if
5451 }
5452 
5457 void
5458 Qmgr::stateArbitInit(Signal* signal)
5459 {
5460  if (arbitRec.newstate) {
5461  jam();
5462  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5463 
5464  arbitRec.node = 0;
5465  arbitRec.ticket.update();
5466  arbitRec.newMask.clear();
5467  arbitRec.code = 0;
5468  arbitRec.newstate = false;
5469  }
5470  arbitRec.state = ARBIT_FIND;
5471  arbitRec.newstate = true;
5472  stateArbitFind(signal);
5473 }
5474 
5484 void
5485 Qmgr::stateArbitFind(Signal* signal)
5486 {
5487  if (arbitRec.newstate) {
5488  jam();
5489  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5490 
5491  arbitRec.code = 0;
5492  arbitRec.newstate = false;
5493  }
5494 
5495  switch (arbitRec.method){
5496  case ArbitRec::METHOD_EXTERNAL:
5497  {
5498  // Don't select any API node as arbitrator
5499  arbitRec.node = 0;
5500  arbitRec.state = ARBIT_PREP1;
5501  arbitRec.newstate = true;
5502  stateArbitPrep(signal);
5503  return;
5504  break;
5505  }
5506 
5507  case ArbitRec::METHOD_DEFAULT:
5508  {
5509  NodeRecPtr aPtr;
5510  // Select the best available API node as arbitrator
5511  for (unsigned rank = 1; rank <= 2; rank++) {
5512  jam();
5513  aPtr.i = 0;
5514  const unsigned stop = NodeBitmask::NotFound;
5515  while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
5516  jam();
5517  ptrAss(aPtr, nodeRec);
5518  if (aPtr.p->phase != ZAPI_ACTIVE)
5519  continue;
5520  arbitRec.node = aPtr.i;
5521  arbitRec.state = ARBIT_PREP1;
5522  arbitRec.newstate = true;
5523  stateArbitPrep(signal);
5524  return;
5525  }
5526  }
5527  return;
5528  break;
5529  }
5530 
5531  default:
5532  ndbrequire(false);
5533  }
5534 }
5535 
5541 void
5542 Qmgr::stateArbitPrep(Signal* signal)
5543 {
5544  if (arbitRec.newstate) {
5545  jam();
5546  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5547 
5548  arbitRec.sendCount = 0; // send all at once
5549  computeArbitNdbMask(arbitRec.recvMask); // to send and recv
5550  arbitRec.recvMask.clear(getOwnNodeId());
5551  arbitRec.code = 0;
5552  arbitRec.newstate = false;
5553  }
5554  if (! arbitRec.sendCount) {
5555  jam();
5556  NodeRecPtr aPtr;
5557  aPtr.i = 0;
5558  const unsigned stop = NodeBitmask::NotFound;
5559  while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
5560  jam();
5561  ptrAss(aPtr, nodeRec);
5562  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5563  sd->sender = getOwnNodeId();
5564  if (arbitRec.state == ARBIT_PREP1) {
5565  jam();
5566  sd->code = ArbitCode::PrepPart1;
5567  } else {
5568  jam();
5569  sd->code = ArbitCode::PrepPart2;
5570  }
5571  sd->node = arbitRec.node;
5572  sd->ticket = arbitRec.ticket;
5573  sd->mask.clear();
5574  sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5575  ArbitSignalData::SignalLength, JBB);
5576  }
5577  arbitRec.setTimestamp(); // send time
5578  arbitRec.sendCount = 1;
5579  return;
5580  }
5581  if (arbitRec.code != 0) { // error
5582  jam();
5583  arbitRec.state = ARBIT_INIT;
5584  arbitRec.newstate = true;
5585  return;
5586  }
5587  if (arbitRec.recvMask.count() == 0) { // recv all
5588  if (arbitRec.state == ARBIT_PREP1) {
5589  jam();
5590  arbitRec.state = ARBIT_PREP2;
5591  arbitRec.newstate = true;
5592  } else {
5593  jam();
5594  arbitRec.state = ARBIT_START;
5595  arbitRec.newstate = true;
5596  stateArbitStart(signal);
5597  }
5598  return;
5599  }
5600  if (arbitRec.getTimediff() > getArbitTimeout()) {
5601  jam();
5602  arbitRec.state = ARBIT_INIT;
5603  arbitRec.newstate = true;
5604  return;
5605  }
5606 }
5607 
5608 void
5609 Qmgr::execARBIT_PREPREQ(Signal* signal)
5610 {
5611  jamEntry();
5612  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5613  if (getOwnNodeId() == cpresident) {
5614  jam();
5615  return; // wrong state
5616  }
5617  if (sd->sender != cpresident) {
5618  jam();
5619  return; // wrong state
5620  }
5621  NodeRecPtr aPtr;
5622  aPtr.i = sd->sender;
5623  ptrAss(aPtr, nodeRec);
5624  switch (sd->code) {
5625  case ArbitCode::PrepPart1: // zero them just to be sure
5626  jam();
5627  arbitRec.node = 0;
5628  arbitRec.ticket.clear();
5629  break;
5630  case ArbitCode::PrepPart2: // non-president enters RUN state
5631  jam();
5632  case ArbitCode::PrepAtrun:
5633  jam();
5634  arbitRec.node = sd->node;
5635  arbitRec.ticket = sd->ticket;
5636  arbitRec.code = sd->code;
5637  reportArbitEvent(signal, NDB_LE_ArbitState);
5638  arbitRec.state = ARBIT_RUN;
5639  arbitRec.newstate = true;
5640  if (sd->code == ArbitCode::PrepAtrun) {
5641  jam();
5642  return;
5643  }
5644  break;
5645  default:
5646  jam();
5647  ndbrequire(false);
5648  }
5649  sd->sender = getOwnNodeId();
5650  sd->code = 0;
5651  sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
5652  ArbitSignalData::SignalLength, JBB);
5653 }
5654 
5655 void
5656 Qmgr::execARBIT_PREPCONF(Signal* signal)
5657 {
5658  jamEntry();
5659  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5660  if (! arbitRec.match(sd)) {
5661  jam();
5662  return; // stray signal
5663  }
5664  if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
5665  jam();
5666  return; // wrong state
5667  }
5668  if (! arbitRec.recvMask.get(sd->sender)) {
5669  jam();
5670  return; // wrong state
5671  }
5672  arbitRec.recvMask.clear(sd->sender);
5673  if (arbitRec.code == 0 && sd->code != 0) {
5674  jam();
5675  arbitRec.code = sd->code;
5676  }//if
5677 }
5678 
5679 void
5680 Qmgr::execARBIT_PREPREF(Signal* signal)
5681 {
5682  jamEntry();
5683  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5684  if (sd->code == 0) {
5685  jam();
5686  sd->code = ArbitCode::ErrUnknown;
5687  }
5688  execARBIT_PREPCONF(signal);
5689 }
5690 
5695 void
5696 Qmgr::stateArbitStart(Signal* signal)
5697 {
5698  if (arbitRec.newstate) {
5699  jam();
5700  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5701 
5702  arbitRec.sendCount = 0;
5703  arbitRec.recvCount = 0;
5704  arbitRec.code = 0;
5705  arbitRec.newstate = false;
5706  }
5707 
5708  switch (arbitRec.method){
5709  case ArbitRec::METHOD_EXTERNAL:
5710  jam();
5711  ndbrequire(arbitRec.node == 0); // No arbitrator selected
5712 
5713  // Don't start arbitrator in API node => ARBIT_RUN
5714  arbitRec.state = ARBIT_RUN;
5715  arbitRec.newstate = true;
5716  return;
5717  break;
5718 
5719  case ArbitRec::METHOD_DEFAULT:
5720  if (! arbitRec.sendCount) {
5721  jam();
5722  BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5723  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5724  sd->sender = getOwnNodeId();
5725  sd->code = 0;
5726  sd->node = arbitRec.node;
5727  sd->ticket = arbitRec.ticket;
5728  sd->mask.clear();
5729  sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
5730  ArbitSignalData::SignalLength, JBB);
5731  arbitRec.sendCount = 1;
5732  arbitRec.setTimestamp(); // send time
5733  return;
5734  }
5735  if (arbitRec.recvCount) {
5736  jam();
5737  reportArbitEvent(signal, NDB_LE_ArbitState);
5738  if (arbitRec.code == ArbitCode::ApiStart) {
5739  jam();
5740  arbitRec.state = ARBIT_RUN;
5741  arbitRec.newstate = true;
5742  return;
5743  }
5744  arbitRec.state = ARBIT_INIT;
5745  arbitRec.newstate = true;
5746  return;
5747  }
5748  if (arbitRec.getTimediff() > getArbitTimeout()) {
5749  jam();
5750  arbitRec.code = ArbitCode::ErrTimeout;
5751  reportArbitEvent(signal, NDB_LE_ArbitState);
5752  arbitRec.state = ARBIT_INIT;
5753  arbitRec.newstate = true;
5754  return;
5755  }
5756  break;
5757 
5758  default:
5759  ndbrequire(false);
5760  break;
5761  }
5762 }
5763 
5764 void
5765 Qmgr::execARBIT_STARTCONF(Signal* signal)
5766 {
5767  jamEntry();
5768  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5769  if (! arbitRec.match(sd)) {
5770  jam();
5771  return; // stray signal
5772  }
5773  if (arbitRec.state != ARBIT_START) {
5774  jam();
5775  return; // wrong state
5776  }
5777  if (arbitRec.recvCount) {
5778  jam();
5779  return; // wrong state
5780  }
5781  arbitRec.code = sd->code;
5782  arbitRec.recvCount = 1;
5783 }
5784 
5785 void
5786 Qmgr::execARBIT_STARTREF(Signal* signal)
5787 {
5788  jamEntry();
5789  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5790  if (sd->code == 0) {
5791  jam();
5792  sd->code = ArbitCode::ErrUnknown;
5793  }
5794  execARBIT_STARTCONF(signal);
5795 }
5796 
5801 void
5802 Qmgr::stateArbitRun(Signal* signal)
5803 {
5804  if (arbitRec.newstate) {
5805  jam();
5806  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5807 
5808  arbitRec.code = 0;
5809  arbitRec.newstate = false;
5810  }
5811  NodeRecPtr aPtr;
5812  aPtr.i = 0;
5813  const unsigned stop = NodeBitmask::NotFound;
5814  while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
5815  jam();
5816  arbitRec.newMask.clear(aPtr.i);
5817  ptrAss(aPtr, nodeRec);
5818  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5819  sd->sender = getOwnNodeId();
5820  sd->code = ArbitCode::PrepAtrun;
5821  sd->node = arbitRec.node;
5822  sd->ticket = arbitRec.ticket;
5823  sd->mask.clear();
5824  sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5825  ArbitSignalData::SignalLength, JBB);
5826  }
5827 }
5828 
5835 void
5836 Qmgr::stateArbitChoose(Signal* signal)
5837 {
5838  if (arbitRec.newstate) {
5839  jam();
5840  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5841 
5842  arbitRec.sendCount = 0;
5843  arbitRec.recvCount = 0;
5844  arbitRec.code = 0;
5845  arbitRec.newstate = false;
5846  }
5847 
5848  switch(arbitRec.method){
5849  case ArbitRec::METHOD_EXTERNAL:
5850  {
5851  if (! arbitRec.sendCount) {
5852  jam();
5853  ndbrequire(arbitRec.node == 0); // No arbitrator selected
5854  // Don't send CHOOSE to anyone, just wait for timeout to expire
5855  arbitRec.sendCount = 1;
5856  arbitRec.setTimestamp();
5857  return;
5858  }
5859 
5860  if (arbitRec.getTimediff() > getArbitTimeout()) {
5861  jam();
5862  // Arbitration timeout has expired
5863  ndbrequire(arbitRec.node == 0); // No arbitrator selected
5864 
5865  NodeBitmask nodes;
5866  computeArbitNdbMask(nodes);
5867  arbitRec.code = ArbitCode::WinWaitExternal;
5868  reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
5869 
5870  sendCommitFailReq(signal); // start commit of failed nodes
5871  arbitRec.state = ARBIT_INIT;
5872  arbitRec.newstate = true;
5873  return;
5874  }
5875  break;
5876  }
5877 
5878  case ArbitRec::METHOD_DEFAULT:
5879  {
5880  if (! arbitRec.sendCount) {
5881  jam();
5882  const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5883  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5884  sd->sender = getOwnNodeId();
5885  sd->code = 0;
5886  sd->node = arbitRec.node;
5887  sd->ticket = arbitRec.ticket;
5888  computeArbitNdbMask(sd->mask);
5889  sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
5890  ArbitSignalData::SignalLength, JBA);
5891  arbitRec.sendCount = 1;
5892  arbitRec.setTimestamp(); // send time
5893  return;
5894  }
5895 
5896  if (arbitRec.recvCount) {
5897  jam();
5898  reportArbitEvent(signal, NDB_LE_ArbitResult);
5899  if (arbitRec.code == ArbitCode::WinChoose) {
5900  jam();
5901  sendCommitFailReq(signal); // start commit of failed nodes
5902  arbitRec.state = ARBIT_INIT;
5903  arbitRec.newstate = true;
5904  return;
5905  }
5906  arbitRec.state = ARBIT_CRASH;
5907  arbitRec.newstate = true;
5908  stateArbitCrash(signal); // do it at once
5909  return;
5910  }
5911 
5912  if (arbitRec.getTimediff() > getArbitTimeout()) {
5913  jam();
5914  // Arbitration timeout has expired
5915  arbitRec.code = ArbitCode::ErrTimeout;
5916  reportArbitEvent(signal, NDB_LE_ArbitState);
5917  arbitRec.state = ARBIT_CRASH;
5918  arbitRec.newstate = true;
5919  stateArbitCrash(signal); // do it at once
5920  return;
5921  }
5922  break;
5923  }
5924 
5925  default:
5926  ndbrequire(false);
5927  break;
5928  }
5929 }
5930 
5931 void
5932 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
5933 {
5934  jamEntry();
5935  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5936  if (!arbitRec.match(sd)) {
5937  jam();
5938  return; // stray signal
5939  }
5940  if (arbitRec.state != ARBIT_CHOOSE) {
5941  jam();
5942  return; // wrong state
5943  }
5944  if (arbitRec.recvCount) {
5945  jam();
5946  return; // wrong state
5947  }
5948  arbitRec.recvCount = 1;
5949  arbitRec.code = sd->code;
5950 }
5951 
5952 void
5953 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
5954 {
5955  jamEntry();
5956  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5957  if (sd->code == 0) {
5958  jam();
5959  sd->code = ArbitCode::ErrUnknown;
5960  }
5961  execARBIT_CHOOSECONF(signal);
5962 }
5963 
5968 void
5969 Qmgr::stateArbitCrash(Signal* signal)
5970 {
5971  jam();
5972  if (arbitRec.newstate) {
5973  jam();
5974  CRASH_INSERTION((Uint32)910 + arbitRec.state);
5975  arbitRec.setTimestamp();
5976  arbitRec.code = 0;
5977  arbitRec.newstate = false;
5978  }
5979 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
5980  if (! (arbitRec.getTimediff() > getArbitTimeout()))
5981  return;
5982 #endif
5983  CRASH_INSERTION(932);
5984  CRASH_INSERTION(938);
5985  progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
5986  "Arbitrator decided to shutdown this node");
5987 }
5988 
5994 void
5995 Qmgr::execARBIT_STOPREP(Signal* signal)
5996 {
5997  jamEntry();
5998  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5999  if (! arbitRec.match(sd)) {
6000  jam();
6001  return; // stray signal
6002  }
6003  arbitRec.code = ArbitCode::ApiExit;
6004  handleArbitApiFail(signal, arbitRec.node);
6005 }
6006 
6007 void
6008 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
6009 {
6010  NodeRecPtr aPtr;
6011  aMask.clear();
6012  for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6013  jam();
6014  ptrAss(aPtr, nodeRec);
6015  if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6016  jam();
6017  aMask.set(aPtr.i);
6018  }
6019  }
6020 }
6021 
6022 void
6023 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
6024 {
6025  NodeRecPtr aPtr;
6026  aMask.clear();
6027  for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6028  jam();
6029  ptrAss(aPtr, nodeRec);
6030  if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6031  jam();
6032  aMask.set(aPtr.i);
6033  }
6034  }
6035 }
6036 
6041 void
6042 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
6043  const NodeBitmask mask)
6044 {
6045  ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6046  sd->sender = type;
6047  sd->code = arbitRec.code | (arbitRec.state << 16);
6048  sd->node = arbitRec.node;
6049  sd->ticket = arbitRec.ticket;
6050  sd->mask = mask;
6051  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
6052  ArbitSignalData::SignalLength, JBB);
6053 }
6054 
6055 // end of arbitration module
6056 
6057 void
6058 Qmgr::execDUMP_STATE_ORD(Signal* signal)
6059 {
6060  switch (signal->theData[0]) {
6061  case 1:
6062  infoEvent("creadyDistCom = %d, cpresident = %d\n",
6063  creadyDistCom, cpresident);
6064  infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
6065  cpresidentAlive,
6066  c_start.m_president_candidate,
6067  c_start.m_president_candidate_gci);
6068  infoEvent("ctoStatus = %d\n", ctoStatus);
6069  for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
6070  NodeRecPtr nodePtr;
6071  nodePtr.i = i;
6072  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6073  char buf[100];
6074  switch(nodePtr.p->phase){
6075  case ZINIT:
6076  sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
6077  break;
6078  case ZSTARTING:
6079  sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
6080  break;
6081  case ZRUNNING:
6082  sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
6083  break;
6084  case ZPREPARE_FAIL:
6085  sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
6086  break;
6087  case ZFAIL_CLOSING:
6088  sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
6089  break;
6090  case ZAPI_INACTIVE:
6091  sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
6092  break;
6093  case ZAPI_ACTIVE:
6094  sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
6095  break;
6096  default:
6097  sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
6098  break;
6099  }
6100  infoEvent("%s", buf);
6101  }
6102  }
6103 
6104 #ifdef ERROR_INSERT
6105  if (signal->theData[0] == 935 && signal->getLength() == 2)
6106  {
6107  SET_ERROR_INSERT_VALUE(935);
6108  c_error_insert_extra = signal->theData[1];
6109  }
6110 #endif
6111 
6112  if (signal->theData[0] == 900 && signal->getLength() == 2)
6113  {
6114  ndbout_c("disconnecting %u", signal->theData[1]);
6115  api_failed(signal, signal->theData[1]);
6116  }
6117 
6118  if (signal->theData[0] == 908)
6119  {
6120  int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
6121  char buf[8192];
6122  // for easy grepping in *out.log ...
6123  strcpy(buf, "HB:");
6124  if (tag >= 0)
6125  sprintf(buf+strlen(buf), "%d:", tag);
6126  sprintf(buf+strlen(buf), " pres:%u", cpresident);
6127  sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
6128  NodeRecPtr myNodePtr;
6129  myNodePtr.i = getOwnNodeId();
6130  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
6131  sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
6132  sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
6133  sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
6134  sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
6135  NodeRecPtr nodePtr;
6136  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6137  {
6138  ptrAss(nodePtr, nodeRec);
6139  Uint32 type = getNodeInfo(nodePtr.i).m_type;
6140  if (type == NodeInfo::DB)
6141  {
6142  sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
6143  }
6144  }
6145  ndbout << buf << endl;
6146  }
6147 
6148 #ifdef ERROR_INSERT
6149  Uint32 dumpCode = signal->theData[0];
6150  if ((dumpCode == 9992) ||
6151  (dumpCode == 9993))
6152  {
6153  if (signal->getLength() == 2)
6154  {
6155  Uint32 nodeId = signal->theData[1];
6156  Uint32& newNodeId = signal->theData[1];
6157  Uint32 length = 2;
6158  assert(257 > MAX_NODES);
6159  if (nodeId > MAX_NODES)
6160  {
6161  const char* type = "None";
6162  switch (nodeId)
6163  {
6164  case 257:
6165  {
6166  /* Left (lower) neighbour */
6167  newNodeId = cneighbourl;
6168  type = "Left neighbour";
6169  break;
6170  }
6171  case 258:
6172  {
6173  /* Right (higher) neighbour */
6174  newNodeId = cneighbourh;
6175  type = "Right neighbour";
6176  break;
6177  }
6178  case 259:
6179  {
6180  /* President */
6181  newNodeId = cpresident;
6182  type = "President";
6183  break;
6184  }
6185  }
6186  ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
6187  nodeId, newNodeId, type);
6188  if (newNodeId != nodeId)
6189  {
6190  sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
6191  }
6192  }
6193  }
6194  }
6195 
6196  if (dumpCode == 9994)
6197  {
6198  ndbout_c("setCCDelay(%u)", signal->theData[1]);
6199  setCCDelay(signal->theData[1]);
6200  m_connectivity_check.m_enabled = true;
6201  }
6202 #endif
6203 }//Qmgr::execDUMP_STATE_ORD()
6204 
6205 
6206 void
6207 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
6208 {
6209  jamEntry();
6210  ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
6211 
6212  SectionHandle handle(this, signal);
6213  Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
6214  memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
6215  4*len);
6216 
6217  NodeBitmask mask;
6218  NodeRecPtr nodePtr;
6219  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
6220  {
6221  jam();
6222  ptrAss(nodePtr, nodeRec);
6223  if (nodePtr.p->phase == ZAPI_ACTIVE &&
6224  getNodeInfo(nodePtr.i).m_version >= api.minVersion)
6225  {
6226  jam();
6227  mask.set(nodePtr.i);
6228  }
6229  }
6230 
6231  if (mask.isclear())
6232  {
6233  jam();
6234  releaseSections(handle);
6235  return;
6236  }
6237 
6238  NodeReceiverGroup rg(API_CLUSTERMGR, mask);
6239  sendSignal(rg, api.gsn, signal, len, JBB,
6240  &handle);
6241 }
6242 
6243 void
6244 Qmgr::execNODE_FAILREP(Signal * signal)
6245 {
6246  jamEntry();
6247  // make sure any distributed signals get acknowledged
6248  // destructive of the signal
6249  c_counterMgr.execNODE_FAILREP(signal);
6250 }
6251 
6252 void
6253 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
6254 {
6255  jamEntry();
6256  AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
6257  Uint32 error = 0;
6258 
6259  NodeRecPtr nodePtr;
6260  nodePtr.i = req.nodeId;
6261  ptrAss(nodePtr, nodeRec);
6262 
6263  if (refToBlock(req.senderRef) != QMGR) // request from management server
6264  {
6265  /* master */
6266 
6267  if (getOwnNodeId() != cpresident)
6268  {
6269  jam();
6270  error = AllocNodeIdRef::NotMaster;
6271  }
6272  else if (!opAllocNodeIdReq.m_tracker.done())
6273  {
6274  jam();
6275  error = AllocNodeIdRef::Busy;
6276  }
6277  else if (c_connectedNodes.get(req.nodeId))
6278  {
6279  jam();
6280  error = AllocNodeIdRef::NodeConnected;
6281  }
6282  else if (nodePtr.p->m_secret != 0)
6283  {
6284  jam();
6285  error = AllocNodeIdRef::NodeReserved;
6286  }
6287 
6288  if (error)
6289  {
6290  jam();
6291  AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6292  ref->senderRef = reference();
6293  ref->errorCode = error;
6294  ref->masterRef = numberToRef(QMGR, cpresident);
6295  ref->senderData = req.senderData;
6296  ref->nodeId = req.nodeId;
6297  sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6298  AllocNodeIdRef::SignalLength, JBB);
6299  return;
6300  }
6301 
6302  if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
6303  {
6304  CRASH_INSERTION(934);
6305  }
6306 
6310  Uint64 now = NdbTick_CurrentMillisecond();
6311  Uint32 secret_hi = Uint32(now >> 24);
6312  Uint32 secret_lo = Uint32(now << 8) + getOwnNodeId();
6313  req.secret_hi = secret_hi;
6314  req.secret_lo = secret_lo;
6315 
6316  if (req.timeout > 60000)
6317  req.timeout = 60000;
6318 
6319  nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
6320  nodePtr.p->m_alloc_timeout = now + req.timeout;
6321 
6322  opAllocNodeIdReq.m_req = req;
6323  opAllocNodeIdReq.m_error = 0;
6324  opAllocNodeIdReq.m_connectCount =
6325  getNodeInfo(refToNode(req.senderRef)).m_connectCount;
6326 
6327  jam();
6328  AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
6329  * req2 = req;
6330  req2->senderRef = reference();
6331  NodeReceiverGroup rg(QMGR, c_clusterNodes);
6332  RequestTracker & p = opAllocNodeIdReq.m_tracker;
6333  p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
6334 
6335  sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
6336  AllocNodeIdReq::SignalLengthQMGR, JBB);
6337  return;
6338  }
6339 
6340  /* participant */
6341  if (c_connectedNodes.get(req.nodeId))
6342  {
6343  jam();
6344  error = AllocNodeIdRef::NodeConnected;
6345  }
6346  else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6347  {
6348  jam();
6349  error = AllocNodeIdRef::NodeTypeMismatch;
6350  }
6351  else if (nodePtr.p->failState != NORMAL)
6352  {
6353  jam();
6354  error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6355  }
6356 #if 0
6357 
6362  else if (nodePtr.p->m_secret != 0)
6363  {
6364  jam();
6365  error = AllocNodeIdRef::NodeReserved;
6366  }
6367 #endif
6368 
6369  if (error)
6370  {
6371  jam();
6372  AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6373  ref->senderRef = reference();
6374  ref->errorCode = error;
6375  ref->senderData = req.senderData;
6376  ref->nodeId = req.nodeId;
6377  ref->masterRef = numberToRef(QMGR, cpresident);
6378  sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6379  AllocNodeIdRef::SignalLength, JBB);
6380  return;
6381  }
6382 
6383  AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6384  conf->senderRef = reference();
6385  conf->secret_hi = req.secret_hi;
6386  conf->secret_lo = req.secret_lo;
6387  sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6388  AllocNodeIdConf::SignalLength, JBB);
6389 }
6390 
6391 void
6392 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
6393 {
6394  /* master */
6395 
6396  jamEntry();
6397  const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
6398  opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
6399  refToNode(conf->senderRef));
6400 
6401  if (signal->getLength() >= AllocNodeIdConf::SignalLength)
6402  {
6403  jam();
6404  if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
6405  opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
6406  {
6407  jam();
6408  if (opAllocNodeIdReq.m_error == 0)
6409  {
6410  jam();
6411  opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
6412  }
6413  }
6414  }
6415 
6416  completeAllocNodeIdReq(signal);
6417 }
6418 
6419 
6420 void
6421 Qmgr::execALLOC_NODEID_REF(Signal * signal)
6422 {
6423  /* master */
6424 
6425  jamEntry();
6426  const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
6427  if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
6428  {
6429  jam();
6430  opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
6431  refToNode(ref->senderRef));
6432  }
6433  else
6434  {
6435  jam();
6436  opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6437  refToNode(ref->senderRef));
6438  if (opAllocNodeIdReq.m_error == 0)
6439  {
6440  jam();
6441  opAllocNodeIdReq.m_error = ref->errorCode;
6442  }
6443  }
6444  completeAllocNodeIdReq(signal);
6445 }
6446 
6447 void
6448 Qmgr::completeAllocNodeIdReq(Signal *signal)
6449 {
6450  /* master */
6451 
6452  if (!opAllocNodeIdReq.m_tracker.done())
6453  {
6454  jam();
6455  return;
6456  }
6457 
6458  if (opAllocNodeIdReq.m_connectCount !=
6459  getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
6460  {
6461  // management server not same version as the original requester
6462  jam();
6463  return;
6464  }
6465 
6466  if (opAllocNodeIdReq.m_tracker.hasRef())
6467  {
6468  jam();
6469 
6470  {
6474  NodeRecPtr nodePtr;
6475  nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
6476  ptrAss(nodePtr, nodeRec);
6477  nodePtr.p->m_secret = 0;
6478  }
6479 
6480  AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6481  ref->senderRef = reference();
6482  ref->senderData = opAllocNodeIdReq.m_req.senderData;
6483  ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
6484  ref->errorCode = opAllocNodeIdReq.m_error;
6485  ref->masterRef = numberToRef(QMGR, cpresident);
6486  ndbassert(AllocNodeIdRef::SignalLength == 5);
6487  sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6488  AllocNodeIdRef::SignalLength, JBB);
6489  return;
6490  }
6491 
6492  jam();
6493 
6494  AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6495  conf->senderRef = reference();
6496  conf->senderData = opAllocNodeIdReq.m_req.senderData;
6497  conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
6498  conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
6499  conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
6500  sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6501  AllocNodeIdConf::SignalLength, JBB);
6502 }
6503 
6504 void
6505 Qmgr::execSTOP_REQ(Signal* signal)
6506 {
6507  jamEntry();
6508  c_stopReq = * (StopReq*)signal->getDataPtr();
6509 
6510  if (c_stopReq.senderRef)
6511  {
6512  jam();
6513  ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()));
6514 
6515  StopConf *conf = (StopConf*)signal->getDataPtrSend();
6516  conf->senderData = c_stopReq.senderData;
6517  conf->nodeState = getOwnNodeId();
6518  sendSignal(c_stopReq.senderRef,
6519  GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
6520  }
6521 }
6522 
6523 bool
6524 Qmgr::check_multi_node_shutdown(Signal* signal)
6525 {
6526  if (c_stopReq.senderRef &&
6527  NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()))
6528  {
6529  jam();
6530  if(StopReq::getPerformRestart(c_stopReq.requestInfo))
6531  {
6532  jam();
6533  StartOrd * startOrd = (StartOrd *)&signal->theData[0];
6534  startOrd->restartInfo = c_stopReq.requestInfo;
6535  sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
6536  } else {
6537  sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
6538  }
6539  return true;
6540  }
6541  return false;
6542 }
6543 
6544 int
6545 Qmgr::check_hb_order_config()
6546 {
6547  m_hb_order_config_used = false;
6548  Uint32 count = 0;
6549  Uint32 count_zero = 0;
6550  NodeRecPtr nodePtr;
6551  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6552  {
6553  ptrAss(nodePtr, nodeRec);
6554  const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6555  if (nodeInfo.m_type == NodeInfo::DB)
6556  {
6557  count++;
6558  if (nodePtr.p->hbOrder == 0)
6559  count_zero++;
6560  }
6561  }
6562  ndbrequire(count != 0); // must have node info
6563  if (count_zero == count)
6564  {
6565  jam();
6566  return 0; // no hbOrder defined
6567  }
6568  if (count_zero != 0)
6569  {
6570  jam();
6571  return -1; // error: not all zero or all nonzero
6572  }
6573  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6574  {
6575  ptrAss(nodePtr, nodeRec);
6576  const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6577  if (nodeInfo.m_type == NodeInfo::DB)
6578  {
6579  NodeRecPtr nodePtr2;
6580  for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
6581  {
6582  ptrAss(nodePtr2, nodeRec);
6583  const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
6584  if (nodeInfo2.m_type == NodeInfo::DB)
6585  {
6586  if (nodePtr.i != nodePtr2.i &&
6587  nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
6588  {
6589  jam();
6590  return -2; // error: duplicate nonzero value
6591  }
6592  }
6593  }
6594  }
6595  }
6596  m_hb_order_config_used = true;
6597  return 0;
6598 }
6599 
6600 static const Uint32 CC_SuspectTicks = 1;
6601 static const Uint32 CC_FailedTicks = 2;
6602 
6603 void
6604 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
6605 {
6606  jam();
6607  ndbrequire(m_connectivity_check.getEnabled());
6608 
6609  if (m_connectivity_check.m_active)
6610  {
6611  jam();
6612  /* Connectivity check underway already
6613  * do nothing
6614  */
6615  return;
6616  }
6617 
6618 
6619  m_connectivity_check.m_nodesPinged.clear();
6620 
6621  /* Send NODE_PINGREQ signal to all other running nodes, and
6622  * initialise connectivity check bitmasks.
6623  * Note that nodes may already be considered suspect due to
6624  * a previous connectivity check round.
6625  */
6626  Uint32 ownId = getOwnNodeId();
6627  NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
6628  pingReq->senderData = ++m_connectivity_check.m_currentRound;
6629  pingReq->senderRef = reference();
6630 
6631  for (Uint32 i=1; i < MAX_NDB_NODES; i++)
6632  {
6633  if (i != ownId)
6634  {
6635  NodeRec& node = nodeRec[i];
6636  if (node.phase == ZRUNNING)
6637  {
6638  /* If connection was considered ok, treat as unknown,
6639  * If it was considered slow, continue to treat
6640  * as slow
6641  */
6642  sendSignal(node.blockRef,
6643  GSN_NODE_PING_REQ,
6644  signal,
6645  NodePingReq::SignalLength,
6646  JBA);
6647 
6648  m_connectivity_check.m_nodesPinged.set(i);
6649  }
6650  }
6651  }
6652 
6653  /* Initialise result bitmasks */
6654  m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
6655  m_connectivity_check.m_nodesFailedDuring.clear();
6656 
6657  /* Ensure only live nodes are considered suspect */
6658  m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
6659 
6660  const char* reasonText = "Unknown";
6661  bool firstTime = true;
6662 
6663  switch(reason)
6664  {
6665  case FailRep::ZHEARTBEAT_FAILURE:
6666  reasonText = "Heartbeat failure";
6667  break;
6668  case FailRep::ZCONNECT_CHECK_FAILURE:
6669  reasonText = "Connectivity check request";
6670  break;
6671  default:
6672  firstTime = false;
6673  ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
6674  break;
6675  }
6676 
6677  if (!m_connectivity_check.m_nodesPinged.isclear())
6678  {
6679  jam();
6680  {
6681  char buff[100];
6682  m_connectivity_check.m_nodesPinged.getText(buff);
6683  if (firstTime)
6684  {
6685  g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
6686  m_connectivity_check.m_nodesPinged.count(),
6687  buff,
6688  reasonText,
6689  causingNode);
6690  }
6691  else
6692  {
6693  char buff2[100];
6694  m_connectivity_check.m_nodesSuspect.getText(buff2);
6695  g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
6696  m_connectivity_check.m_nodesPinged.count(),
6697  buff,
6698  m_connectivity_check.m_nodesSuspect.count(),
6699  buff2);
6700  }
6701  }
6702 
6703  /* Generate cluster log event */
6704  Uint32 bitmaskSz = NdbNodeBitmask::Size;
6705  signal->theData[0] = NDB_LE_ConnectCheckStarted;
6706  signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
6707  signal->theData[2] = reason;
6708  signal->theData[3] = causingNode;
6709  signal->theData[4] = bitmaskSz;
6710  Uint32* sigPtr = &signal->theData[5];
6711  m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
6712  m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
6713  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
6714 
6715  m_connectivity_check.m_active = true;
6716  m_connectivity_check.m_tick = 0;
6717  NDB_TICKS now = NdbTick_CurrentMillisecond();
6718  m_connectivity_check.m_timer.reset(now);
6719  }
6720  else
6721  {
6722  g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
6723  reasonText,
6724  causingNode);
6725  }
6726 }
6727 
6728 void
6729 Qmgr::execNODE_PINGREQ(Signal* signal)
6730 {
6731  jamEntry();
6732  Uint32 ownId = getOwnNodeId();
6733  const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
6734  Uint32 sendersRef = signal->getSendersBlockRef();
6735  Uint32 sendersNodeId = refToNode(sendersRef);
6736  Uint32 senderData = pingReq->senderData;
6737 
6738  ndbrequire(sendersNodeId != ownId);
6739 
6740  /* We will start our own connectivity check if necessary
6741  * before responding with PING_CONF to the requestor.
6742  * This means that the sending node will receive our PING_REQ
6743  * before our PING_CONF, which should avoid them starting an
6744  * unnecessary extra connectivity check round in some cases.
6745  */
6746  if (likely(m_connectivity_check.getEnabled()))
6747  {
6748  jam();
6749  /* We have connectivity checking configured */
6750  if (! m_connectivity_check.m_active)
6751  {
6752  jam();
6753 
6754  {
6755  /* Don't start a new connectivity check if the requesting
6756  * node has failed from our point of view
6757  */
6758  NodeRecPtr nodePtr;
6759  nodePtr.i = sendersNodeId;
6760  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6761  if (unlikely(nodePtr.p->phase != ZRUNNING))
6762  {
6763  jam();
6764 
6765  g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
6766  sendersNodeId, nodePtr.p->phase);
6767  return;
6768  }
6769  }
6770 
6771  /* Start our own Connectivity Check now indicating reason and causing node */
6772  startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
6773  }
6774  }
6775  else
6776  {
6777  jam();
6778  g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
6779  "checking not configured on this node. Ensure all "
6780  "nodes have the same configuration for parameter "
6781  "ConnectCheckIntervalMillis.",
6782  sendersNodeId);
6783  }
6784 
6785  /* Now respond with NODE_PINGCONF */
6786  NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
6787 
6788  pingConf->senderData = senderData;
6789  pingConf->senderRef = reference();
6790 
6791  sendSignal(sendersRef,
6792  GSN_NODE_PING_CONF,
6793  signal,
6794  NodePingConf::SignalLength,
6795  JBA);
6796 }
6797 
6798 void
6799 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
6800 {
6801  /* Clear any suspicion */
6802  m_nodesSuspect.clear(nodeId);
6803 }
6804 
6805 bool
6806 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
6807 {
6808  if (unlikely(m_active))
6809  {
6810  m_nodesFailedDuring.set(nodeId);
6811 
6812  if (m_nodesWaiting.get(nodeId))
6813  {
6814  /* We were waiting for a NODE_PING_CONF from this node,
6815  * remove it from the set
6816  */
6817  m_nodesWaiting.clear(nodeId);
6818 
6819  return m_nodesWaiting.isclear();
6820  }
6821  }
6822  return false;
6823 }
6824 
6825 void
6826 Qmgr::execNODE_PINGCONF(Signal* signal)
6827 {
6828  jamEntry();
6829 
6830  ndbrequire(m_connectivity_check.getEnabled());
6831 
6832  const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
6833  Uint32 sendersBlockRef = signal->getSendersBlockRef();
6834  Uint32 sendersNodeId = refToNode(sendersBlockRef);
6835  Uint32 roundNumber = pingConf->senderData;
6836 
6837  ndbrequire(sendersNodeId != getOwnNodeId());
6838  ndbrequire((m_connectivity_check.m_active) || /* Normal */
6839  (m_connectivity_check.m_nodesWaiting.get(sendersNodeId) || /* We killed last round */
6840  m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId))); /* Someone killed */
6841 
6842  if (unlikely((! m_connectivity_check.m_active) ||
6843  (roundNumber != m_connectivity_check.m_currentRound)))
6844  {
6845  g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
6846  "but we are %sactive on round %u. Discarding.",
6847  sendersNodeId,
6848  roundNumber,
6849  ((m_connectivity_check.m_active)?"":"in"),
6850  m_connectivity_check.m_currentRound);
6851  return;
6852  }
6853 
6854  /* Node must have been pinged, we must be waiting for the response,
6855  * or the node must have already failed
6856  */
6857  ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
6858  ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
6859  m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
6860 
6861  m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
6862 
6863  if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
6864  {
6865  jam();
6866  /* Node responded on time, clear any suspicion about it */
6867  m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
6868  }
6869 
6870  if (m_connectivity_check.m_nodesWaiting.isclear())
6871  {
6872  jam();
6873  /* Connectivity check round is now finished */
6874  connectivityCheckCompleted(signal);
6875  }
6876 }
6877 
6878 void
6879 Qmgr::connectivityCheckCompleted(Signal* signal)
6880 {
6881  jam();
6882 
6883  m_connectivity_check.m_active = false;
6884 
6885  /* Log the following :
6886  * Nodes checked
6887  * Nodes responded ok
6888  * Nodes responded late (now suspect)
6889  * Nodes failed to respond.
6890  * Nodes failed during
6891  */
6892  char pinged[100];
6893  char late[100];
6894  char silent[100];
6895  char failed[100];
6896 
6897  /* Any 'waiting' nodes have been killed
6898  * Surviving suspects do not include them.
6899  */
6900  NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
6901  survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
6902 
6903  /* Nodes that failed during the check are also excluded */
6904  survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
6905 
6906  m_connectivity_check.m_nodesPinged.getText(pinged);
6907  survivingSuspects.getText(late);
6908  m_connectivity_check.m_nodesWaiting.getText(silent);
6909  m_connectivity_check.m_nodesFailedDuring.getText(failed);
6910 
6911  g_eventLogger->info("QMGR : Connectivity check completed, "
6912  "%u other nodes checked (%s), "
6913  "%u responded on time, "
6914  "%u responded late (%s), "
6915  "%u no response will be failed (%s), "
6916  "%u failed during check (%s)\n",
6917  m_connectivity_check.m_nodesPinged.count(),
6918  pinged,
6919  m_connectivity_check.m_nodesPinged.count() -
6920  m_connectivity_check.m_nodesSuspect.count(),
6921  survivingSuspects.count(),
6922  late,
6923  m_connectivity_check.m_nodesWaiting.count(),
6924  silent,
6925  m_connectivity_check.m_nodesFailedDuring.count(),
6926  failed);
6927 
6928  /* Log in Cluster log */
6929  signal->theData[0] = NDB_LE_ConnectCheckCompleted;
6930  signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
6931  signal->theData[2] = survivingSuspects.count();
6932  signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
6933  m_connectivity_check.m_nodesFailedDuring.count();
6934 
6935  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
6936 
6937  if (survivingSuspects.count() > 0)
6938  {
6939  jam();
6940  /* Still suspect nodes, start another round */
6941  g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
6942  /* Restart connectivity check, no external reason or cause */
6943  startConnectivityCheck(signal, 0, 0);
6944  }
6945  else
6946  {
6947  jam();
6948  /* No suspect nodes, stop the protocol now */
6949 
6950  g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
6951  m_connectivity_check.m_nodesPinged.count() -
6952  (m_connectivity_check.m_nodesWaiting.count() +
6953  m_connectivity_check.m_nodesFailedDuring.count()));
6954 
6955  /* Send a heartbeat to our right neighbour at this point as a gesture
6956  * of goodwill
6957  */
6958  sendHeartbeat(signal);
6959  hb_send_timer.reset(NdbTick_CurrentMillisecond());
6960  };
6961 }
6962 
6963 void
6964 Qmgr::checkConnectivityTimeSignal(Signal* signal)
6965 {
6966  /* Executed periodically when a connectivity check is
6967  * underway.
6968  * After CC_SuspectTicks have elapsed, any nodes
6969  * which have not responded are considered
6970  * 'Suspect'.
6971  * After CC_FailedTicks have elapsed, any nodes
6972  * which have not responded are considered
6973  * to have failed, and failure handling
6974  * begins.
6975  */
6976  jam();
6977 
6978  /* Preconditions, otherwise we shouldn't have been called */
6979  ndbrequire(m_connectivity_check.getEnabled());
6980  ndbrequire(m_connectivity_check.m_active);
6981  ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
6982 
6983  m_connectivity_check.m_tick++;
6984 
6985  switch (m_connectivity_check.m_tick)
6986  {
6987  case CC_SuspectTicks:
6988  {
6989  jam();
6990  /* Still waiting to hear from some nodes, they are now
6991  * suspect
6992  */
6993  m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
6994  return;
6995  }
6996  case CC_FailedTicks:
6997  {
6998  jam();
6999  /* Still waiting to hear from some nodes, they will now
7000  * be failed
7001  */
7002  m_connectivity_check.m_active = false;
7003  Uint32 nodeId = 0;
7004 
7005  while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
7006  != BitmaskImpl::NotFound)
7007  {
7008  jam();
7009  /* Log failure reason */
7010  /* Todo : Connectivity Check specific failure log? */
7011  signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
7012  signal->theData[1] = nodeId;
7013 
7014  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7015 
7016  /* Fail the node */
7017  /* TODO : Consider real time break here */
7018  failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
7019  nodeId++;
7020  }
7021 
7022  /* Now handle the end of the Connectivity Check */
7023  connectivityCheckCompleted(signal);
7024  }
7025  }
7026 }
7027 
7028 bool
7029 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
7030 {
7031  return m_connectivity_check.m_nodesSuspect.get(nodeId);
7032 }
7033 
7034 void
7035 Qmgr::handleFailFromSuspect(Signal* signal,
7036  Uint32 reason,
7037  Uint16 aFailedNode,
7038  Uint16 sourceNode)
7039 {
7040  jam();
7041 
7042  const char* reasonText = "Unknown";
7043 
7044  /* We have received a failure report about some node X from
7045  * some other node that we consider to have suspect connectivity
7046  * which may have caused the report.
7047  *
7048  * We will 'invert' the sense of this, and handle it as
7049  * a failure report of the sender, with the same cause.
7050  */
7051  switch(reason)
7052  {
7053  case FailRep::ZCONNECT_CHECK_FAILURE:
7054  jam();
7055  /* Suspect says that connectivity check failed for another node.
7056  * As suspect has bad connectivity from our point of view, we
7057  * blame him.
7058  */
7059  reasonText = "ZCONNECT_CHECK_FAILURE";
7060  break;
7061  case FailRep::ZLINK_FAILURE:
7062  jam();
7063  /* Suspect says that link failed for another node.
7064  * As suspect has bad connectivity from our point of view, we
7065  * blame her.
7066  */
7067  reasonText = "ZLINK_FAILURE";
7068  break;
7069  default:
7070  ndbrequire(false);
7071  }
7072 
7073  g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
7074  "%u from suspect node %u with reason %s. "
7075  "Mapping to failure of %u sourced by me.",
7076  aFailedNode, sourceNode, reasonText, sourceNode);
7077 
7078  signal->theData[0] = NDB_LE_NodeFailRejected;
7079  signal->theData[1] = reason;
7080  signal->theData[2] = aFailedNode;
7081  signal->theData[3] = sourceNode;
7082 
7083  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7084 
7085  failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
7086 }