MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Qmgr.hpp
1 /*
2  Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 #ifndef QMGR_H
19 #define QMGR_H
20 
21 
22 #include <pc.hpp>
23 #include <NdbTick.h>
24 #include <SimulatedBlock.hpp>
25 #include <NodeBitmask.hpp>
26 #include <SignalCounter.hpp>
27 
28 #include <signaldata/EventReport.hpp>
29 #include <signaldata/ArbitSignalData.hpp>
30 #include <signaldata/CmRegSignalData.hpp>
31 #include <signaldata/ApiRegSignalData.hpp>
32 #include <signaldata/FailRep.hpp>
33 #include <signaldata/AllocNodeId.hpp>
34 
35 #include <RequestTracker.hpp>
36 #include <signaldata/StopReq.hpp>
37 
38 #include "timer.hpp"
39 
40 #ifdef QMGR_C
41 
42 /* Delay values, ms -----------------------------*/
43 #define ZDELAY_REGREQ 1000
44 
45 /* Type of refuse in CM_NODEINFOREF -------------*/
46 #define ZNOT_RUNNING 0
47 
48 /* Type of continue in CONTINUEB ----------------*/
49 #define ZREGREQ_TIMELIMIT 0
50 #define ZHB_HANDLING 1
51 #define ZREGREQ_MASTER_TIMELIMIT 2
52 #define ZAPI_HB_HANDLING 3
53 #define ZTIMER_HANDLING 4
54 #define ZARBIT_HANDLING 5
55 #define ZSTART_FAILURE_LIMIT 6
56 
57 /* Error Codes ------------------------------*/
58 #define ZERRTOOMANY 1101
59 #define ZERRALREADYREG 1102
60 #define ZERRNHMISSING 1103
61 #define ZERRNLMISSING 1104
62 #define ZERRAPPMISSING 1105
63 #define ZERROR_NOT_IN_CFGFILE 1106
64 #define ZERROR_TIMEOUT 1107
65 #define ZERROR_NOT_ZINIT 1108
66 #define ZERROR_NODEINFOREF 1109
67 #define ZERROR_NOTLOCALQMGR 1110
68 #define ZERROR_NOTRUNNING 1111
69 #define ZCOULD_NOT_OCCUR_ERROR 1112
70 #define ZTIME_OUT_ERROR 1113
71 #define ZERROR_NOT_DEAD 1114
72 #define ZDECLARED_FAIL_ERROR 1115
73 #define ZOWN_NODE_ERROR 1116
74 #define ZWRONG_STATE_ERROR 1117
75 #define ZNODE_ZERO_ERROR 1118
76 #define ZWRONG_NODE_ERROR 1119
77 
78 #endif
79 
80 #define QMGR_MAX_FAIL_STATE_BLOCKS 5
81 
82 class Qmgr : public SimulatedBlock {
83 public:
84  // State values
85  enum QmgrState {
86  Q_NOT_ACTIVE = 0,
87  Q_ACTIVE = 1
88  };
89 
90  enum FailState {
91  NORMAL = 0,
92  WAITING_FOR_CLOSECOMCONF_ACTIVE = 1, /* Node had phase ZAPI_ACTIVE */
93  WAITING_FOR_CLOSECOMCONF_NOTACTIVE = 2, /* Node had phase != ZAPI_ACTIVE */
94  WAITING_FOR_API_FAILCONF = 3,
95  WAITING_FOR_NDB_FAILCONF = 6
96  };
97 
98  enum Phase {
99  ZINIT = 1, /* All nodes start in phase INIT */
100  ZSTARTING = 2, /* Node is connecting to cluster */
101  ZRUNNING = 3, /* Node is running in the cluster */
102  ZPREPARE_FAIL = 4, /* PREPARATION FOR FAILURE */
103  ZFAIL_CLOSING = 5, /* API/NDB IS DISCONNECTING */
104  ZAPI_ACTIVE = 6, /* API IS RUNNING IN NODE */
105  ZAPI_INACTIVE = 7 /* Inactive API */
106  };
107 
108  struct StartRecord {
109  StartRecord() {}
110  void reset(){
111  m_startKey++;
112  m_startNode = 0;
113  m_gsn = RNIL;
114  m_nodes.clearWaitingFor();
115  }
116  Uint32 m_startKey;
117  Uint32 m_startNode;
118  Uint64 m_startTimeout;
119 
120  Uint32 m_gsn;
121  SignalCounter m_nodes;
122  Uint32 m_latest_gci;
123 
124  Uint32 m_start_type;
125  NdbNodeBitmask m_skip_nodes;
126  NdbNodeBitmask m_starting_nodes;
127  NdbNodeBitmask m_starting_nodes_w_log;
128  NdbNodeBitmask m_no_nodegroup_nodes;
129 
130  Uint16 m_president_candidate;
131  Uint32 m_president_candidate_gci;
132  Uint16 m_regReqReqSent;
133  Uint16 m_regReqReqRecv;
134  Uint32 m_node_gci[MAX_NDB_NODES];
135  } c_start;
136 
137  NdbNodeBitmask c_definedNodes; // DB nodes in config
138  NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
139  NodeBitmask c_connectedNodes; // All kinds of connected nodes
140 
147 
148  Uint32 c_maxDynamicId;
149 
151  {
152  bool m_enabled; // Config set && all node version OK
153  bool m_active; // Connectivity check underway?
154  Timer m_timer; // Check timer object
155  Uint32 m_currentRound; // Last round started
156  Uint32 m_tick; // Periods elapsed in current check
157  NdbNodeBitmask m_nodesPinged; // Nodes sent a NodePingReq in round
158  NdbNodeBitmask m_nodesWaiting; // Nodes which have not sent a response
159  NdbNodeBitmask m_nodesFailedDuring; // Nodes which failed during check
160  NdbNodeBitmask m_nodesSuspect; // Nodes with suspect connectivity
161 
163  {
164  m_enabled = false;
165  m_active = false;
166  m_currentRound = 0;
167  m_tick = 0;
168  m_nodesPinged.clear();
169  m_nodesWaiting.clear();
170  m_nodesFailedDuring.clear();
171  m_nodesSuspect.clear();
172  }
173 
174  void reportNodeConnect(Uint32 nodeId);
175  /* reportNodeFailure.
176  * return code true means the connect check is completed
177  */
178  bool reportNodeFailure(Uint32 nodeId);
179 
180  bool getEnabled() const {
181  if (m_enabled)
182  {
183  assert(m_timer.getDelay() > 0);
184  }
185  return m_enabled;
186  }
187  };
188 
189  ConnectCheckRec m_connectivity_check;
190 
191  // Records
192  struct NodeRec {
193  /*
194  * Dynamic id is received from president. Lower half is next
195  * c_maxDynamicId and upper half is hbOrder. Heartbeat circle is
196  * ordered by full dynamic id. When president fails, only the lower
197  * half of dynamic id is used by other nodes to agree on next
198  * president (the one with minimum value).
199  */
200  UintR ndynamicId;
201  /*
202  * HeartbeatOrder from config.ini. Takes effect when this node
203  * becomes president and starts handing out dynamic ids to starting
204  * nodes. To define a new order, two rolling restarts is required.
205  */
206  Uint32 hbOrder;
207  Phase phase;
208 
209  QmgrState sendPrepFailReqStatus;
210  QmgrState sendCommitFailReqStatus;
211  QmgrState sendPresToStatus;
212  FailState failState;
213  BlockReference blockRef;
214  Uint64 m_secret;
215  Uint64 m_alloc_timeout;
216  Uint16 m_failconf_blocks[QMGR_MAX_FAIL_STATE_BLOCKS];
217 
218  NodeRec() { bzero(m_failconf_blocks, sizeof(m_failconf_blocks)); }
219  }; /* p2c: size = 52 bytes */
220 
221  typedef Ptr<NodeRec> NodeRecPtr;
222 
223  enum ArbitState {
224  ARBIT_NULL = 0,
225  ARBIT_INIT = 1, // create new ticket
226  ARBIT_FIND = 2, // find candidate arbitrator node
227  ARBIT_PREP1 = 3, // PREP db nodes with null ticket
228  ARBIT_PREP2 = 4, // PREP db nodes with current ticket
229  ARBIT_START = 5, // START arbitrator API thread
230  ARBIT_RUN = 6, // running with arbitrator
231  ARBIT_CHOOSE = 7, // ask arbitrator after network partition
232  ARBIT_CRASH = 8 // crash ourselves
233  };
234 
235  struct ArbitRec {
236  ArbitRec() {}
237 
238  enum Method {
239  DISABLED = ARBIT_METHOD_DISABLED, // Arbitration disabled
240  METHOD_DEFAULT = ARBIT_METHOD_DEFAULT, // Default arbitration
241  // Delay commit to give "external" time to arbitrate
242  METHOD_EXTERNAL = ARBIT_METHOD_WAITEXTERNAL
243  } method;
244 
245  ArbitState state; // state
246  bool newstate; // flag to initialize new state
247  unsigned thread; // identifies a continueB "thread"
248  NodeId node; // current arbitrator candidate
249  ArbitTicket ticket; // ticket
250  NodeBitmask apiMask[1+2]; // arbitrators 0=all 1,2=per rank
251  NdbNodeBitmask newMask; // new nodes to process in RUN state
252  Uint8 sendCount; // control send/recv of signals
253  Uint8 recvCount;
254  NdbNodeBitmask recvMask; // left to recv
255  Uint32 code; // code field from signal
256  Uint32 failureNr; // cfailureNr at arbitration start
257  Uint32 timeout; // timeout for CHOOSE state
258  NDB_TICKS timestamp; // timestamp for checking timeouts
259 
260  inline bool match(ArbitSignalData* sd) {
261  return
262  node == sd->node &&
263  ticket.match(sd->ticket);
264  }
265 
266  inline void setTimestamp() {
267  timestamp = NdbTick_CurrentMillisecond();
268  }
269 
270  inline NDB_TICKS getTimediff() {
271  NDB_TICKS now = NdbTick_CurrentMillisecond();
272  return now < timestamp ? 0 : now - timestamp;
273  }
274  };
275 
276  /* State values for handling ENABLE_COMREQ / ENABLE_COMCONF. */
277  enum EnableComState {
278  ENABLE_COM_CM_ADD_COMMIT = 0,
279  ENABLE_COM_CM_COMMIT_NEW = 1,
280  ENABLE_COM_API_REGREQ = 2
281  };
282 
283 public:
285  virtual ~Qmgr();
286 
287 private:
288  BLOCK_DEFINES(Qmgr);
289 
290  // Transit signals
291  void execDEBUG_SIG(Signal* signal);
292  void execCONTINUEB(Signal* signal);
293  void execCM_HEARTBEAT(Signal* signal);
294  void execCM_ADD(Signal* signal);
295  void execCM_ACKADD(Signal* signal);
296  void execCM_REGREQ(Signal* signal);
297  void execCM_REGCONF(Signal* signal);
298  void execCM_REGREF(Signal* signal);
299  void execCM_NODEINFOREQ(Signal* signal);
300  void execCM_NODEINFOCONF(Signal* signal);
301  void execCM_NODEINFOREF(Signal* signal);
302  void execPREP_FAILREQ(Signal* signal);
303  void execPREP_FAILCONF(Signal* signal);
304  void execPREP_FAILREF(Signal* signal);
305  void execCOMMIT_FAILREQ(Signal* signal);
306  void execCOMMIT_FAILCONF(Signal* signal);
307  void execFAIL_REP(Signal* signal);
308  void execPRES_TOREQ(Signal* signal);
309  void execPRES_TOCONF(Signal* signal);
310  void execDISCONNECT_REP(Signal* signal);
311  void execSYSTEM_ERROR(Signal* signal);
312  void execSTOP_REQ(Signal* signal);
313 
314  // Received signals
315  void execDUMP_STATE_ORD(Signal* signal);
316  void execCONNECT_REP(Signal* signal);
317  void execNDB_FAILCONF(Signal* signal);
318  void execNF_COMPLETEREP(Signal*);
319  void execREAD_CONFIG_REQ(Signal* signal);
320  void execSTTOR(Signal* signal);
321  void execCM_INFOCONF(Signal* signal);
322  void execCLOSE_COMCONF(Signal* signal);
323  void execAPI_REGREQ(Signal* signal);
324  void execAPI_FAILCONF(Signal* signal);
325  void execREAD_NODESREQ(Signal* signal);
326  void execAPI_FAILREQ(Signal* signal);
327 
328  void execREAD_NODESREF(Signal* signal);
329  void execREAD_NODESCONF(Signal* signal);
330 
331  void execDIH_RESTARTREF(Signal* signal);
332  void execDIH_RESTARTCONF(Signal* signal);
333 
334  void execAPI_VERSION_REQ(Signal* signal);
335  void execAPI_BROADCAST_REP(Signal* signal);
336 
337  void execNODE_FAILREP(Signal *);
338  void execALLOC_NODEID_REQ(Signal *);
339  void execALLOC_NODEID_CONF(Signal *);
340  void execALLOC_NODEID_REF(Signal *);
341  void completeAllocNodeIdReq(Signal *);
342  void execENABLE_COMCONF(Signal *signal);
343  void handleEnableComAddCommit(Signal *signal, Uint32 node);
344  void handleEnableComCommitNew(Signal *signal);
345  void handleEnableComApiRegreq(Signal *signal, Uint32 node);
346  void sendApiRegConf(Signal *signal, Uint32 node);
347 
348  void execSTART_ORD(Signal*);
349 
350  // Arbitration signals
351  void execARBIT_CFG(Signal* signal);
352  void execARBIT_PREPREQ(Signal* signal);
353  void execARBIT_PREPCONF(Signal* signal);
354  void execARBIT_PREPREF(Signal* signal);
355  void execARBIT_STARTCONF(Signal* signal);
356  void execARBIT_STARTREF(Signal* signal);
357  void execARBIT_CHOOSECONF(Signal* signal);
358  void execARBIT_CHOOSEREF(Signal* signal);
359  void execARBIT_STOPREP(Signal* signal);
360 
361  void execUPGRADE_PROTOCOL_ORD(Signal*);
362 
363  // Connectivity check signals
364  void execNODE_PINGREQ(Signal* signal);
365  void execNODE_PINGCONF(Signal* signal);
366 
367  // Statement blocks
368  void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
369  Uint32 check_startup(Signal* signal);
370 
371  void api_failed(Signal* signal, Uint32 aFailedNode);
372  void node_failed(Signal* signal, Uint16 aFailedNode);
373  void checkStartInterface(Signal* signal, Uint64 now);
374  void failReport(Signal* signal,
375  Uint16 aFailedNode,
376  UintR aSendFailRep,
377  FailRep::FailCause failCause,
378  Uint16 sourceNode);
379  void findNeighbours(Signal* signal, Uint32 from);
380  Uint16 translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId);
381 
382  void initData(Signal* signal);
383  void sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 TfailNo);
384  void sendPrepFailReq(Signal* signal, Uint16 aNode);
385  void sendApiFailReq(Signal* signal, Uint16 aFailedNode, bool sumaOnly);
386  void sendApiRegRef(Signal*, Uint32 ref, ApiRegRef::ErrorCode);
387 
388  // Generated statement blocks
389  void startphase1(Signal* signal);
390  void electionWon(Signal* signal);
391  void cmInfoconf010Lab(Signal* signal);
392 
393  void apiHbHandlingLab(Signal* signal, Uint64 now);
394  void timerHandlingLab(Signal* signal);
395  void hbReceivedLab(Signal* signal);
396  void sendCmRegrefLab(Signal* signal, BlockReference ref,
397  CmRegRef::ErrorCode);
398  void systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line, NodeId);
399  void systemErrorLab(Signal* signal, Uint32 line,
400  const char* message = NULL);
401  void prepFailReqLab(Signal* signal);
402  void prepFailConfLab(Signal* signal);
403  void prepFailRefLab(Signal* signal);
404  void commitFailReqLab(Signal* signal);
405  void commitFailConfLab(Signal* signal);
406  void failReportLab(Signal* signal, Uint16 aFailedNode,
407  FailRep::FailCause aFailCause,
408  Uint16 sourceNode);
409  void sendCommitFailReq(Signal* signal);
410  void presToConfLab(Signal* signal);
411  void sendSttorryLab(Signal* signal);
412  void sttor020Lab(Signal* signal);
413  void closeComConfLab(Signal* signal);
414  void apiRegReqLab(Signal* signal);
415  void regreqTimeLimitLab(Signal* signal);
416  void regreqTimeMasterLimitLab(Signal* signal);
417  void cmRegreq010Lab(Signal* signal);
418  void cmRegconf010Lab(Signal* signal);
419  void sttor010Lab(Signal* signal);
420  void sendHeartbeat(Signal* signal);
421  void checkHeartbeat(Signal* signal);
422  void setHbDelay(UintR aHbDelay);
423  void setHbApiDelay(UintR aHbApiDelay);
424  void setArbitTimeout(UintR aArbitTimeout);
425  void setCCDelay(UintR aCCDelay);
426 
427  // Interface to arbitration module
428  void handleArbitStart(Signal* signal);
429  void handleArbitApiFail(Signal* signal, Uint16 nodeId);
430  void handleArbitNdbAdd(Signal* signal, Uint16 nodeId);
431  void handleArbitCheck(Signal* signal);
432 
433  // Private arbitration routines
434  Uint32 getArbitDelay();
435  Uint32 getArbitTimeout();
436  void startArbitThread(Signal* signal);
437  void runArbitThread(Signal* signal);
438  void stateArbitInit(Signal* signal);
439  void stateArbitFind(Signal* signal);
440  void stateArbitPrep(Signal* signal);
441  void stateArbitStart(Signal* signal);
442  void stateArbitRun(Signal* signal);
443  void stateArbitChoose(Signal* signal);
444  void stateArbitCrash(Signal* signal);
445  void computeArbitNdbMask(NodeBitmaskPOD& aMask);
446  void computeArbitNdbMask(NdbNodeBitmaskPOD& aMask);
447  void reportArbitEvent(Signal* signal, Ndb_logevent_type type,
448  const NodeBitmask mask = NodeBitmask());
449 
450  // Interface to Connectivity Check
451  void startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 node);
452  void checkConnectivityTimeSignal(Signal* signal);
453  void connectivityCheckCompleted(Signal* signal);
454  bool isNodeConnectivitySuspect(Uint32 nodeId) const;
455  void handleFailFromSuspect(Signal* signal,
456  Uint32 reason,
457  Uint16 aFailedNode,
458  Uint16 sourceNode);
459 
460  // Initialisation
461  void initData();
462  void initRecords();
463 
464  // Transit signals
465  // Variables
466 
467  bool checkAPIVersion(NodeId, Uint32 nodeVersion, Uint32 ownVersion) const;
468  bool checkNDBVersion(NodeId, Uint32 nodeVersion, Uint32 ownVersion) const;
469 
470  void cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec* self);
471  void sendCmAckAdd(Signal *, Uint32 nodeId, CmAdd::RequestType);
472  void joinedCluster(Signal* signal, NodeRecPtr nodePtr);
473  void sendCmRegReq(Signal * signal, Uint32 nodeId);
474  void sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self);
475 
476 private:
477  void sendPrepFailReqRef(Signal* signal,
478  Uint32 dstBlockRef,
479  GlobalSignalNumber gsn,
480  Uint32 blockRef,
481  Uint32 failNo,
482  Uint32 noOfNodes,
483  const NodeId theNodes[]);
484 
485  void handleApiCloseComConf(Signal* signal);
486  void add_failconf_block(NodeRecPtr, Uint32 block);
487  bool remove_failconf_block(NodeRecPtr, Uint32 block);
488  bool is_empty_failconf_block(NodeRecPtr) const;
489 
490  /* Wait this time until we try to join the */
491  /* cluster again */
492 
493  /**** Common stored variables ****/
494 
495  NodeRec *nodeRec;
496  ArbitRec arbitRec;
497 
498  /* Block references ------------------------------*/
499  BlockReference cpdistref; /* Dist. ref of president */
500 
501  /* Node numbers. ---------------------------------*/
502  Uint16 cneighbourl; /* Node no. of lower neighbour */
503  Uint16 cneighbourh; /* Node no. of higher neighbour */
504  Uint16 cpresident; /* Node no. of president */
505 
506  /* Counters --------------------------------------*/
507  Uint16 cnoOfNodes; /* Static node counter */
508  /* Status flags ----------------------------------*/
509 
510  Uint32 c_restartPartialTimeout;
511  Uint32 c_restartPartionedTimeout;
512  Uint32 c_restartFailureTimeout;
513  Uint32 c_restartNoNodegroupTimeout;
514  Uint64 c_start_election_time;
515 
516  Uint16 creadyDistCom;
517 
518  Uint16 cdelayRegreq;
519  Uint16 cpresidentAlive;
520  Uint16 cnoFailedNodes;
521  Uint16 cnoPrepFailedNodes;
522  Uint16 cnoCommitFailedNodes;
523  Uint16 cactivateApiCheck;
524  Uint16 c_allow_api_connect;
525  UintR chbApiDelay;
526 
527  UintR ccommitFailureNr;
528  UintR cprepareFailureNr;
529  UintR ctoFailureNr;
530  UintR cfailureNr;
531 
532  QmgrState ctoStatus;
533  bool cHbSent;
534  NDB_TICKS clatestTransactionCheck;
535 
536  Timer interface_check_timer;
537  Timer hb_check_timer;
538  Timer hb_send_timer;
539  Timer hb_api_timer;
540 
541 
542  Uint16 cfailedNodes[MAX_NDB_NODES];
543  Uint16 cprepFailedNodes[MAX_NDB_NODES];
544  Uint16 ccommitFailedNodes[MAX_NDB_NODES];
545 
546  struct OpAllocNodeIdReq {
547  RequestTracker m_tracker;
548  AllocNodeIdReq m_req;
549  Uint32 m_connectCount;
550  Uint32 m_error;
551  };
552 
553  struct OpAllocNodeIdReq opAllocNodeIdReq;
554 
555  StopReq c_stopReq;
556  bool check_multi_node_shutdown(Signal* signal);
557 
558  void recompute_version_info(Uint32 type);
559  void recompute_version_info(Uint32 type, Uint32 version);
560  void execNODE_VERSION_REP(Signal* signal);
561  void sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr);
562  void sendVersionedDb(NodeReceiverGroup rg,
563  GlobalSignalNumber gsn,
564  Signal* signal,
565  Uint32 length,
566  JobBufferLevel jbuf,
567  Uint32 minversion);
568 
569  bool m_micro_gcp_enabled;
570 
571  // user-defined hbOrder must set all values non-zero and distinct
572  int check_hb_order_config();
573  bool m_hb_order_config_used;
574 
575 #ifdef ERROR_INSERT
576  Uint32 nodeFailCount;
577 #endif
578 };
579 
580 #endif