MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DbtuxStat.cpp
1 /*
2  Copyright (C) 2005-2007 MySQL AB, 2009 Sun Microsystems, Inc.
3  All rights reserved. Use is subject to license terms.
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; version 2 of the License.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with this program; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 #define DBTUX_STAT_CPP
20 #include "Dbtux.hpp"
21 #include <math.h>
22 
23 // debug note: uses new-style debug macro "D" unlike rest of DBTUX
24 // there is no filtering feature (yet) like "DebugStat"
25 
26 void
27 Dbtux::execREAD_PSEUDO_REQ(Signal* signal)
28 {
29  jamEntry();
30  ScanOpPtr scanPtr;
31  scanPtr.i = signal->theData[0];
32  c_scanOpPool.getPtr(scanPtr);
33  StatOpPtr statPtr;
34  statPtr.i = scanPtr.p->m_statOpPtrI;
35 
36  Uint32 attrId = signal->theData[1];
37  Uint32* out = &signal->theData[0];
38 
39  switch (attrId) {
40  case AttributeHeader::RECORDS_IN_RANGE:
41  jam();
42  ndbrequire(statPtr.i == RNIL);
43  statRecordsInRange(scanPtr, out);
44  break;
45  case AttributeHeader::INDEX_STAT_KEY:
46  jam();
47  ndbrequire(statPtr.i != RNIL);
48  c_statOpPool.getPtr(statPtr);
49  statScanReadKey(statPtr, out);
50  break;
51  case AttributeHeader::INDEX_STAT_VALUE:
52  jam();
53  ndbrequire(statPtr.i != RNIL);
54  c_statOpPool.getPtr(statPtr);
55  statScanReadValue(statPtr, out);
56  break;
57  default:
58  ndbrequire(false);
59  break;
60  }
61 }
62 
63 // RECORDS_IN_RANGE
64 
65 /*
66  * Estimate entries in range. Scan is at first entry. Search for last
67  * entry i.e. start of descending scan. Use the 2 positions to estimate
68  * entries before and after the range. Finally get entries in range by
69  * subtracting from total. Errors come from imperfectly balanced tree
70  * and from uncommitted entries which differ only in tuple version.
71  *
72  * Returns 4 Uint32 values: 0) total entries 1) in range 2) before range
73  * 3) after range. 1-3) are estimates and need not add up to 0).
74  */
75 void
76 Dbtux::statRecordsInRange(ScanOpPtr scanPtr, Uint32* out)
77 {
78  ScanOp& scan = *scanPtr.p;
79  Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI);
80  const Index& index = *c_indexPool.getPtr(frag.m_indexId);
81  TreeHead& tree = frag.m_tree;
82  // get first and last position
83  TreePos pos1 = scan.m_scanPos;
84  TreePos pos2;
85  { // as in scanFirst()
86  const unsigned idir = 1;
87  const ScanBound& scanBound = scan.m_scanBound[idir];
88  KeyDataC searchBoundData(index.m_keySpec, true);
89  KeyBoundC searchBound(searchBoundData);
90  unpackBound(c_ctx, scanBound, searchBound);
91  searchToScan(frag, idir, searchBound, pos2);
92  // committed read (same timeslice) and range not empty
93  ndbrequire(pos2.m_loc != NullTupLoc);
94  }
95  // wl4124_todo change all to Uint64 if ever needed (unlikely)
96  out[0] = (Uint32)frag.m_entryCount;
97  out[2] = getEntriesBeforeOrAfter(frag, pos1, 0);
98  out[3] = getEntriesBeforeOrAfter(frag, pos2, 1);
99  if (pos1.m_loc == pos2.m_loc) {
100  ndbrequire(pos2.m_pos >= pos1.m_pos);
101  out[1] = pos2.m_pos - pos1.m_pos + 1;
102  } else {
103  Uint32 rem = out[2] + out[3];
104  if (out[0] > rem) {
105  out[1] = out[0] - rem;
106  } else {
107  // random guess one node apart
108  out[1] = tree.m_maxOccup;
109  }
110  }
111 }
112 
113 /*
114  * Estimate number of entries strictly before or after given position.
115  * Each branch to right direction wins parent node and the subtree on
116  * the other side. Subtree entries is estimated from depth and total
117  * entries by assuming that the tree is perfectly balanced.
118  */
119 Uint32
120 Dbtux::getEntriesBeforeOrAfter(Frag& frag, TreePos pos, unsigned idir)
121 {
122  NodeHandle node(frag);
123  selectNode(node, pos.m_loc);
124  Uint16 path[MaxTreeDepth + 1];
125  unsigned depth = getPathToNode(node, path);
126  ndbrequire(depth != 0 && depth <= MaxTreeDepth);
127  TreeHead& tree = frag.m_tree;
128  Uint32 cnt = 0;
129  Uint32 tot = (Uint32)frag.m_entryCount;
130  unsigned i = 0;
131  // contribution from levels above
132  while (i + 1 < depth) {
133  unsigned occup2 = (path[i] >> 8);
134  unsigned side = (path[i + 1] & 0xFF);
135  // subtree of this node has about half the entries
136  tot = tot >= occup2 ? (tot - occup2) / 2 : 0;
137  // branch to other side wins parent and a subtree
138  if (side != idir) {
139  cnt += occup2;
140  cnt += tot;
141  }
142  i++;
143  }
144  // contribution from this node
145  unsigned occup = (path[i] >> 8);
146  ndbrequire(pos.m_pos < occup);
147  if (idir == 0) {
148  if (pos.m_pos != 0)
149  cnt += pos.m_pos - 1;
150  } else {
151  cnt += occup - (pos.m_pos + 1);
152  }
153  // contribution from levels below
154  tot = tot >= occup ? (tot - occup) / 2 : 0;
155  cnt += tot;
156  return cnt;
157 }
158 
159 /*
160  * Construct path to given node. Returns depth. Root node has path
161  * 2 and depth 1. In general the path is 2{0,1}* where 0,1 is the side
162  * (left,right branch). In addition the occupancy of each node is
163  * returned in the upper 8 bits.
164  */
165 unsigned
166 Dbtux::getPathToNode(NodeHandle node, Uint16* path)
167 {
168  TupLoc loc = node.m_loc;
169  unsigned i = MaxTreeDepth;
170  while (loc != NullTupLoc) {
171  jam();
172  selectNode(node, loc);
173  path[i] = node.getSide() | (node.getOccup() << 8);
174  loc = node.getLink(2);
175  ndbrequire(i != 0);
176  i--;
177  }
178  unsigned depth = MaxTreeDepth - i;
179  unsigned j = 0;
180  while (j < depth) {
181  path[j] = path[i + 1 + j];
182  j++;
183  }
184  path[j] = 0xFFFF; // catch bug
185  return depth;
186 }
187 
188 // stats scan
189 
190 // windows has no log2
191 static double
192 tux_log2(double x)
193 {
194  return ::log(x) / ::log((double)2.0);
195 }
196 
197 int
198 Dbtux::statScanInit(StatOpPtr statPtr, const Uint32* data, Uint32 len,
199  Uint32* usedLen)
200 {
201  StatOp& stat = *statPtr.p;
202  ScanOp& scan = *c_scanOpPool.getPtr(stat.m_scanOpPtrI);
203  Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI);
204  const Index& index = *c_indexPool.getPtr(scan.m_indexId);
205  D("statScanInit");
206 
207  // options
208  stat.m_saveSize = c_indexStatSaveSize;
209  stat.m_saveScale = c_indexStatSaveScale;
210  Uint32 offset = 0;
211  while (offset + 1 <= len)
212  {
213  const Uint32 type = data[offset];
214  const Uint32 value = data[offset + 1];
215  switch (type) {
216  case TuxBoundInfo::StatSaveSize:
217  jam();
218  stat.m_saveSize = value;
219  break;
220  case TuxBoundInfo::StatSaveScale:
221  jam();
222  stat.m_saveScale = value;
223  break;
224  default:
225  jam();
226  scan.m_errorCode = TuxBoundInfo::InvalidBounds;
227  return -1;
228  }
229  offset += 2;
230  }
231  *usedLen = offset;
232 
233  // average key bytes as stored in stats
234  Uint32 avgKeyBytes = 0;
235  if (frag.m_entryCount != 0)
236  {
237  avgKeyBytes = (Uint32)(frag.m_entryBytes / frag.m_entryCount);
238  if (avgKeyBytes > stat.m_keySpec.get_max_data_len(false))
239  avgKeyBytes = stat.m_keySpec.get_max_data_len(false);
240  }
241 
242  // compute batch size - see wl4124.txt
243  {
244  double a = stat.m_saveSize;
245  double b = stat.m_saveScale;
246  double c = avgKeyBytes;
247  double d = index.m_numAttrs;
248  double e = c + (1 + d) * 4; // approx size of one sample
249  double f = (double)frag.m_entryCount;
250  double g = f * e; // max possible sample bytes
251  if (g < 1.0)
252  g = 1.0;
253  double h = 1 + 0.01 * b * tux_log2(g); // scale factor
254  double i = a * h; // sample bytes allowed
255  double j = i / e; // sample count
256  double k = f / j; // sampling frequency
257  if (k < 1.0)
258  k = 1.0;
259  double l = e * f / k; // estimated sample bytes
260 
261  stat.m_batchSize = (Uint32)(k + 0.5);
262  stat.m_estBytes = (Uint32)(l + 0.5);
263  ndbrequire(stat.m_batchSize != 0);
264  D("computed batch size" << V(stat));
265  }
266 
267  // key spec is already defined as ref to index key spec
268  stat.m_keyCount = index.m_numAttrs;
269  stat.m_valueCount = 1 + stat.m_keyCount;
270  stat.m_keyData1.reset();
271  stat.m_keyData2.reset();
272 
273  // define value spec
274  stat.m_valueCount = 1 + stat.m_keyCount;
275  NdbPack::Spec& valueSpec = stat.m_valueSpec;
276  valueSpec.reset();
277  {
278  NdbPack::Type type(NDB_TYPE_UNSIGNED, 4, false, 0);
279  int ret = valueSpec.add(type, stat.m_valueCount);
280  ndbrequire(ret == 0);
281  }
282 
283  return 0;
284 }
285 
286 int
287 Dbtux::statScanAddRow(StatOpPtr statPtr, TreeEnt ent)
288 {
289  StatOp& stat = *statPtr.p;
290  ScanOp& scan = *c_scanOpPool.getPtr(stat.m_scanOpPtrI);
291  Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI);
292  D("statScanAddRow" << V(stat));
293 
294  KeyData& keyData1 = stat.m_keyData1;
295  KeyData& keyData2 = stat.m_keyData2;
296  StatOp::Value& value1 = stat.m_value1;
297  StatOp::Value& value2 = stat.m_value2;
298 
299  stat.m_rowCount++;
300  stat.m_batchCurr++;
301  const bool firstRow = (stat.m_rowCount == 1);
302  int ret;
303 
304  // save previous value
305  if (!firstRow) {
306  ret = keyData1.copy(keyData2);
307  ndbrequire(ret == 0);
308  value1 = value2;
309  }
310 
311  // read current entry key
312  readKeyAttrs(c_ctx, frag, ent, keyData2, stat.m_keyCount);
313 
314  // calculate new values
315  value2.m_rir = stat.m_rowCount;
316  if (firstRow)
317  {
318  for (Uint32 i = 0; i < stat.m_keyCount; i++)
319  value2.m_unq[i] = 1;
320  stat.m_keyChange = false;
321  }
322  else
323  {
324  // how many initial attrs are equal
325  Uint32 num_eq;
326  int res = keyData1.cmp(keyData2, stat.m_keyCount, num_eq);
327  ndbrequire(res <= 0);
328  stat.m_keyChange = (res != 0);
329 
330  if (stat.m_keyChange)
331  {
332  ndbrequire(num_eq < stat.m_keyCount);
333  value2.m_unq[num_eq]++;
334  // propagate down
335  for (Uint32 i = num_eq + 1; i < stat.m_keyCount; i++)
336  value2.m_unq[i]++;
337  }
338  }
339 
340  // always report last index entry
341  bool lastEntry = false;
342  do
343  {
344  NodeHandle node(frag);
345  TreePos pos = scan.m_scanPos;
346  selectNode(node, pos.m_loc);
347  // more entries in this node
348  const unsigned occup = node.getOccup();
349  // funny cast to avoid signed vs unsigned warning
350  if (pos.m_dir == 3 && pos.m_pos + (unsigned)1 < occup)
351  {
352  jam();
353  break;
354  }
355  // can continue to right sub-tree
356  if (node.getLink(1) != NullTupLoc)
357  {
358  jam();
359  break;
360  }
361  // while child on right
362  while (node.getSide() == 1)
363  {
364  jam();
365  TupLoc loc = node.getLink(2);
366  selectNode(node, loc);
367  }
368  // did not reach root
369  if (node.getSide() != 2)
370  {
371  jam();
372  break;
373  }
374  lastEntry = true;
375  }
376  while (0);
377 
378  stat.m_usePrev = true;
379  if (lastEntry)
380  {
381  jam();
382  stat.m_usePrev = false;
383  return 1;
384  }
385  if (stat.m_batchCurr >= stat.m_batchSize && stat.m_keyChange)
386  {
387  jam();
388  stat.m_batchCurr = 0;
389  return 1;
390  }
391  return 0;
392 }
393 
394 void
395 Dbtux::statScanReadKey(StatOpPtr statPtr, Uint32* out)
396 {
397  StatOp& stat = *statPtr.p;
398  int ret;
399 
400  KeyData& keyData = stat.m_keyData;
401  ret = keyData.copy(stat.m_usePrev ? stat.m_keyData1 : stat.m_keyData2);
402  ndbrequire(ret == 0);
403  D("statScanReadKey" << V(keyData));
404  keyData.convert(NdbPack::Endian::Little);
405  memcpy(out, keyData.get_full_buf(), keyData.get_full_len());
406 }
407 
408 void
409 Dbtux::statScanReadValue(StatOpPtr statPtr, Uint32* out)
410 {
411  StatOp& stat = *statPtr.p;
412  int ret;
413  Uint32 len_out;
414 
415  const StatOp::Value& value = stat.m_usePrev ? stat.m_value1 : stat.m_value2;
416 
417  // verify sanity
418  ndbrequire(value.m_rir != 0);
419  for (Uint32 k = 0; k < stat.m_keyCount; k++)
420  {
421  ndbrequire(value.m_unq[k] != 0);
422  ndbrequire(value.m_rir >= value.m_unq[k]);
423  ndbrequire(k == 0 || value.m_unq[k] >= value.m_unq[k - 1]);
424  }
425 
426  NdbPack::Data& valueData = stat.m_valueData;
427  valueData.reset();
428 
429  ret = valueData.add(&value.m_rir, &len_out);
430  ndbrequire(ret == 0 && len_out == 4);
431  ret = valueData.add(&value.m_unq[0], stat.m_keyCount, &len_out);
432  ndbrequire(ret == 0 && len_out == stat.m_keyCount * 4);
433  ret = valueData.finalize();
434  ndbrequire(ret == 0);
435 
436  D("statScanReadValue" << V(valueData));
437  valueData.convert(NdbPack::Endian::Little);
438  memcpy(out, valueData.get_full_buf(), valueData.get_full_len());
439 }
440 
441 // at end of stats update, TRIX sends loadTime
442 void
443 Dbtux::execINDEX_STAT_REP(Signal* signal)
444 {
445  jamEntry();
446  const IndexStatRep* rep = (const IndexStatRep*)signal->getDataPtr();
447 
448  switch (rep->requestType) {
449  case IndexStatRep::RT_UPDATE_REQ:
450  ndbrequire(false);
451  break;
452  case IndexStatRep::RT_UPDATE_CONF:
453  {
454  Index& index = *c_indexPool.getPtr(rep->indexId);
455  FragPtr fragPtr;
456  findFrag(index, rep->fragId, fragPtr);
457  ndbrequire(fragPtr.i != RNIL);
458  // index.m_statFragPtrI need not be defined yet
459  D("loadTime" << V(index.m_statLoadTime) << " ->" << V(rep->loadTime));
460  index.m_statLoadTime = rep->loadTime;
461  }
462  break;
463  default:
464  ndbrequire(false);
465  break;
466  }
467 }
468 
469 // stats monitor
470 
471 void
472 Dbtux::execINDEX_STAT_IMPL_REQ(Signal* signal)
473 {
474  jamEntry();
475  const IndexStatImplReq* req = (const IndexStatImplReq*)signal->getDataPtr();
476 
477  StatMon& mon = c_statMon;
478  mon.m_req = *req;
479  mon.m_requestType = req->requestType;
480 
481  switch (mon.m_requestType) {
482  case IndexStatReq::RT_START_MON:
483  statMonStart(signal, mon);
484  break;
485  case IndexStatReq::RT_STOP_MON:
486  statMonStop(signal, mon);
487  break;
488  default:
489  ndbrequire(false);
490  break;
491  }
492 }
493 
494 void
495 Dbtux::statMonStart(Signal* signal, StatMon& mon)
496 {
497  const IndexStatImplReq* req = &mon.m_req;
498  Index& index = *c_indexPool.getPtr(req->indexId);
499  D("statMonStart" << V(mon));
500 
501  // RT_START_MON also sends ZNIL to all non-monitoring nodes
502  if (req->fragId == ZNIL)
503  {
504  jam();
505  index.m_statFragPtrI = RNIL;
506  D("non-monitoring node");
507  }
508  else
509  {
510  jam();
511  FragPtr fragPtr;
512  findFrag(index, req->fragId, fragPtr);
513  ndbrequire(fragPtr.i != RNIL);
514  index.m_statFragPtrI = fragPtr.i;
515  fragPtr.p->m_entryOps = 0;
516  D("monitoring node" << V(index));
517  }
518 
519  statMonConf(signal, mon);
520 }
521 
522 void
523 Dbtux::statMonStop(Signal* signal, StatMon& mon)
524 {
525  const IndexStatImplReq* req = &mon.m_req;
526  Index& index = *c_indexPool.getPtr(req->indexId);
527  D("statMonStop" << V(mon));
528 
529  // RT_STOP_MON simply sends ZNIL to every node
530  ndbrequire(req->fragId == ZNIL);
531  index.m_statFragPtrI = RNIL;
532 
533  statMonConf(signal, mon);
534 }
535 
536 void
537 Dbtux::statMonConf(Signal* signal, StatMon& mon)
538 {
539  const IndexStatImplReq* req = &mon.m_req;
540  D("statMonConf" << V(mon));
541 
542  IndexStatImplConf* conf = (IndexStatImplConf*)signal->getDataPtrSend();
543  conf->senderRef = reference();
544  conf->senderData = req->senderData;
545  sendSignal(req->senderRef, GSN_INDEX_STAT_IMPL_CONF,
546  signal, IndexStatImplConf::SignalLength, JBB);
547 }
548 
549 // continueB loop
550 
551 void
552 Dbtux::statMonSendContinueB(Signal* signal)
553 {
554  StatMon& mon = c_statMon;
555  D("statMonSendContinueB" << V(mon));
556 
557  signal->theData[0] = TuxContinueB::StatMon;
558  signal->theData[1] = mon.m_loopIndexId;
559  sendSignalWithDelay(reference(), GSN_CONTINUEB,
560  signal, mon.m_loopDelay, 2);
561 }
562 
563 void
564 Dbtux::statMonExecContinueB(Signal* signal)
565 {
566  StatMon& mon = c_statMon;
567  D("statMonExecContinueB" << V(mon));
568 
569  if (!c_indexStatAutoUpdate ||
570  c_indexStatTriggerPct == 0 ||
571  getNodeState().startLevel != NodeState::SL_STARTED)
572  {
573  }
574  else
575  {
576  jam();
577  statMonCheck(signal, mon);
578  }
579  statMonSendContinueB(signal);
580 }
581 
582 void
583 Dbtux::statMonCheck(Signal* signal, StatMon& mon)
584 {
585  const Uint32 now = (Uint32)time(0);
586  D("statMonCheck" << V(mon) << V(now));
587 
588  const uint maxloop = 32;
589  for (uint loop = 0; loop < maxloop; loop++, mon.m_loopIndexId++)
590  {
591  jam();
592  mon.m_loopIndexId %= c_indexPool.getSize();
593 
594  const Index& index = *c_indexPool.getPtr(mon.m_loopIndexId);
595  if (index.m_state == Index::NotDefined ||
596  index.m_state == Index::Dropping ||
597  index.m_statFragPtrI == RNIL)
598  {
599  jam();
600  continue;
601  }
602  const Frag& frag = *c_fragPool.getPtr(index.m_statFragPtrI);
603 
604  bool update = false;
605  if (index.m_statLoadTime == 0)
606  {
607  jam();
608  update = true;
609  D("statMonCheck" << V(update) << V(index.m_statLoadTime));
610  }
611  else if (now < index.m_statLoadTime + c_indexStatUpdateDelay)
612  {
613  jam();
614  update = false;
615  D("statMonCheck" << V(update) << V(index.m_statLoadTime));
616  }
617  else
618  {
619  const Uint64 count = frag.m_entryCount;
620  const Uint64 ops = frag.m_entryOps;
621  if (count <= 1)
622  {
623  jam();
624  update = (ops >= 1);
625  D("statMonCheck" << V(update) << V(ops));
626  }
627  else
628  {
629  jam();
630  // compute scaled percentags - see wl4124.txt
631  double a = c_indexStatTriggerPct;
632  double b = c_indexStatTriggerScale;
633  double c = (double)count;
634  double d = 1 + 0.01 * b * tux_log2(c); // inverse scale factor
635  double e = a / d; // scaled trigger pct
636  double f = (double)ops;
637  double g = 100.0 * f / c;
638  update = (g >= e);
639  D("statMonCheck" << V(update) << V(f) << V(c));
640  }
641  }
642 
643  if (update)
644  {
645  jam();
646  statMonRep(signal, mon);
647  // advance index afterwards
648  mon.m_loopIndexId++;
649  break;
650  }
651  }
652 }
653 
654 void
655 Dbtux::statMonRep(Signal* signal, StatMon& mon)
656 {
657  const Index& index = *c_indexPool.getPtr(mon.m_loopIndexId);
658  const Frag& frag = *c_fragPool.getPtr(index.m_statFragPtrI);
659  D("statMonRep" << V(mon));
660 
661  IndexStatRep* rep = (IndexStatRep*)signal->getDataPtrSend();
662  rep->senderRef = reference();
663  rep->senderData = mon.m_loopIndexId;
664  rep->requestType = IndexStatRep::RT_UPDATE_REQ;
665  rep->requestFlag = 0;
666  rep->indexId = mon.m_loopIndexId;
667  rep->indexVersion = 0; // not required
668  rep->tableId = index.m_tableId;
669  rep->fragId = frag.m_fragId;
670  rep->loadTime = index.m_statLoadTime;
671 
672  sendSignal(DBDICT_REF, GSN_INDEX_STAT_REP,
673  signal, IndexStatRep::SignalLength, JBB);
674 }