MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SuperPool.hpp
1 /*
2  Copyright (C) 2005, 2006 MySQL AB
3  All rights reserved. Use is subject to license terms.
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; version 2 of the License.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with this program; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 #ifndef SUPER_POOL_HPP
20 #define SUPER_POOL_HPP
21 
22 #include <ndb_global.h>
23 
24 #include <pc.hpp>
25 #include <ErrorReporter.hpp>
26 
27 /*
28  * SuperPool - super pool for record pools (abstract class)
29  *
30  * Documents: SuperPool GroupPool RecordPool<T>
31  *
32  * SUPER POOL
33  *
34  * A "super pool" is a shared pool of pages of fixed size. A "record
35  * pool" is a pool of records of fixed size. One super pool instance is
36  * used by a number of record pools to allocate their memory. A special
37  * case is a "page pool" where a record is a simple page whose size
38  * divides super pool page size.
39  *
40  * A record pool allocates memory in pages. Thus each used page is
41  * associated with one record pool and one record type. The records on
42  * a page form an array starting at start of page. Thus each record has
43  * an index within the page. Any last partial record which does not fit
44  * on the page is disregarded.
45  *
46  * I-VALUE
47  *
48  * The old "i-p" principle is kept. A reference to a super pool page or
49  * record is stored as an "i-value" from which the record pointer "p" is
50  * computed. In super pool the i-value is a Uint32 with two parts:
51  *
52  * - "ip" index of page within super pool (high "pageBits")
53  * - "ir" index of record within page (low "recBits")
54  *
55  * At most 16 recBits are used, the rest are zero.
56  *
57  * The translation between "ip" and page address is described in next
58  * section. Once page address is known, the record address is found
59  * from "ir" in the obvious way.
60  *
61  * One advantage of i-value is that it can be verified. The level of
62  * verification can depend on compile options.
63  *
64  * - "v1" check i-value specifies valid page
65  * - "v2" check record type matches page type, see below
66  * - "v3" check record is in use
67  * - "v4" check unused record is unmodified
68  *
69  * Another advantage of a 32-bit i-value is that it extends the space of
70  * 32-bit addressable records on a 64-bit platform.
71  *
72  * MEMORY ROOT
73  *
74  * This super pool requires a "memory root" i.e. a memory address such
75  * that the index of a page "ip" satisfies
76  *
77  * page address = memory root + (signed)ip * page size
78  *
79  * This is possible on all platforms, provided that the memory root and
80  * all pages are either on the heap or on the stack, in order to keep
81  * the size of "ip" reasonably small.
82  *
83  * The cast (signed)ip is done as integer of pageBits bits. "ip" has
84  * same sign bit as i-value "i" so (signed)ip = (Int32)i >> recBits.
85  *
86  * RESERVED I-VALUES
87  *
88  * RNIL is 0xffffff00 (signed -256). It is used everywhere in NDB as
89  * "null pointer" i.e. as an i-value which does not point to a record.
90  * In addition the signed values -255 to -1 are reserved for use by the
91  * application.
92  *
93  * An i-value with all "ir" bits set is used as terminator in free
94  * record list. Unlike RNIL, it still has valid page bits "ip".
95  *
96  * Following restrictions avoid hitting the reserved values:
97  *
98  * - pageBits is <= 30
99  * - the maximum "ip" value 2^pageBits-1 (signed -1) is not used
100  * - the maximum "ir" value 2^recBits-1 is not used
101  *
102  * PAGE ENTRIES
103  *
104  * Each super pool page has a "page entry". It contains:
105  *
106  * - page type
107  * - i-value of first free record on page
108  * - page use count, to see if page can be freed
109  * - pointers (as i-values) to next and previous page in list
110  *
111  * Page entry cannot be stored on the page itself since this prevents
112  * aligning pages to OS block size and the use of BATs for page pools in
113  * NDB. For now the implementation provides an array of page entries
114  * with place for all potential (2^pageBits) entries.
115  *
116  * PAGE TYPE
117  *
118  * Page type is unique to the record pool using the super pool. It is
119  * assigned in record pool constructor. Page type zero means that the
120  * page is free i.e. not allocated to a record pool.
121  *
122  * Each "i-p" conversion checks ("v2") that the record belongs to same
123  * pool as the page. This check is much more common than page or record
124  * allocation. To make it cache effective, there is a separate page
125  * type array. It truncates type to one non-zero byte.
126  *
127  * GROUP POOL
128  *
129  * Each record pool belongs to a group. The group specifies minimum
130  * size or memory percentage the group must be able to allocate. The
131  * sum of the minimum sizes of group pools is normally smaller than
132  * super pool size. This provides unclaimed memory which a group can
133  * use temporarily to allocate more than its minimum.
134  *
135  * The record pools within a group compete freely for the available
136  * memory within the group.
137  *
138  * Typical exmaple is group of all metadata pools. The group allows
139  * specifying the memory to reserve for metadata, without having to
140  * specify number of tables, attributes, indexes, triggers, etc.
141  *
142  * PAGE LISTS
143  *
144  * Super pool has free page list. Each group pool uses it to allocate
145  * its own free page list. And each record pool within the group uses
146  * the group's free list to allocate its pages.
147  *
148  * A page allocated to a record pool has a use count i.e. number of used
149  * records. When use count drops to zero the page can be returned to
150  * the group. This is not necessarily done at once.
151  *
152  * The list of free records in a record pool has two levels. There are
153  * available pages (some free) and a singly linked free list within the
154  * page. A page allocated to record pool is on one of 4 lists:
155  *
156  * - free page (all free, available, could be returned to group)
157  * - busy page (some free, some used, available)
158  * - full page (none free)
159  * - current page (list of one), see below
160  *
161  * Some usage types (temporary pools) may never free records. They pay
162  * a small penalty for the extra overhead.
163  *
164  * RECORD POOL
165  *
166  * A pool of records which allocates its memory from a super pool
167  * instance via a group pool. There are 3 basic operations:
168  *
169  * - getPtr - translate i-value to pointer-to-record p
170  * - seize - allocate record
171  * - release - free record
172  *
173  * CURRENT PAGE
174  *
175  * getPtr is a fast computation which does not touch the page entry.
176  * For seize (and release) there is a small optimization.
177  *
178  * The "current page" is the page of latest seize. It is unlinked from
179  * its normal list and the free record pointer is stored under record
180  * pool instance.
181  *
182  * The page remains current until there is a seize and the page is full.
183  * Then the real page entry and its list membership are updated, and
184  * a new page is made current.
185  *
186  * This implies that each (active) record pool allocates at least one
187  * page which is never returned to the group.
188  *
189  * PAGE POLICY
190  *
191  * A group pool returns its "excess" (above minimum) free pages to the
192  * super pool immediately.
193  *
194  * Allocating a new page to a record pool is expensive due to free list
195  * setup. Therefore a record pool should not always return empty pages
196  * to the group. Policies:
197  *
198  * - "pp1" never return empty page to the group
199  * - "pp2" always return empty (non-current) page to the group
200  * - "pp3" simple hysteresis
201  *
202  * Last one "pp3" is used. It works as follows:
203  *
204  * When a page becomes free, check if number of free records exceeds
205  * some fixed fraction of all records. If it does, move all free pages
206  * to the group. Current page is ignored in the check.
207  *
208  * TODO
209  *
210  * Define abstract class SuperAlloc. Make SuperPool a concrete class
211  * with SuperAlloc instance in ctor. Replace HeapPool by HeapAlloc.
212  */
213 
214 // Types forward.
215 class GroupPool;
216 
217 class SuperPool {
218 public:
219  // Type of i-value, used to reference both pages and records.
220  typedef Uint32 PtrI;
221 
222  // Page entry.
223  struct PageEnt {
224  PageEnt();
225  Uint16 m_pageType; // zero if not in record pool
226  Uint16 m_useCount; // used records on the page
227  PtrI m_freeRecI; // first free record on the page
228  PtrI m_nextPageI;
229  PtrI m_prevPageI;
230  };
231 
232  // Doubly-linked list of page entries.
233  struct PageList {
234  PageList();
235  PageList(PtrI pageI);
236  PtrI m_headPageI;
237  PtrI m_tailPageI;
238  Uint32 m_pageCount;
239  };
240 
241  // Constructor. Gives page size in bytes (must be power of 2) and
242  // number of bits to use for page index "ip" in i-value.
243  SuperPool(Uint32 pageSize, Uint32 pageBits);
244 
245  // Destructor.
246  virtual ~SuperPool() = 0;
247 
248  // Move all pages from second list to end of first list.
249  void movePages(PageList& pl1, PageList& pl2);
250 
251  // Add page to beginning of page list.
252  void addHeadPage(PageList& pl, PtrI pageI);
253 
254  // Add page to end of page list.
255  void addTailPage(PageList& pl, PtrI pageI);
256 
257  // Remove any page from page list.
258  void removePage(PageList& pl, PtrI pageI);
259 
260  // Translate i-value ("ri" ignored) to page entry.
261  PageEnt& getPageEnt(PtrI pageI);
262 
263  // Translate i-value ("ri" ignored) to page address.
264  void* getPageP(PtrI pageI);
265 
266  // Translate page address to i-value. Address must be page-aligned to
267  // memory root. Returns RNIL if "ip" range exceeded.
268  PtrI getPageI(void* pageP);
269 
270  // Record pool info.
271  struct RecInfo {
272  RecInfo(GroupPool& gp, Uint32 recSize);
273  GroupPool& m_groupPool;
274  Uint32 m_recSize;
275  Uint16 m_recType;
276  Uint16 m_maxPerPage;
277  PtrI m_freeRecI; // first free record on current page
278  Uint32 m_useCount; // used records excluding current page
279  PageList m_pageList[3]; // 0-free 1-busy 2-full
280  Uint16 m_hyX; // hysteresis fraction x/y in "pp3"
281  Uint16 m_hyY;
282  };
283 
284  // Translate i-value to record address.
285  void* getRecP(PtrI recI, RecInfo& ri);
286 
287  // Count records on page free list.
288  Uint32 getFreeCount(RecInfo& ri, PtrI freeRecPtrI);
289 
290  // Compute total number of pages in pool.
291  Uint32 getRecPageCount(RecInfo& ri);
292 
293  // Compute total number of records (used or not) in pool.
294  Uint32 getRecTotCount(RecInfo& ri);
295 
296  // Compute total number of used records in pool.
297  Uint32 getRecUseCount(RecInfo& ri);
298 
299  // Compute record pool page list index (0,1,2).
300  Uint32 getRecPageList(RecInfo& ri, PageEnt& pe);
301 
302  // Add current page.
303  void addCurrPage(RecInfo& ri, PtrI pageI);
304 
305  // Remove current page.
306  void removeCurrPage(RecInfo& ri);
307 
308  // Get page with some free records and make it current. Takes head of
309  // used or free list, or else gets free page from group pool.
310  bool getAvailPage(RecInfo& ri);
311 
312  // Get free page from group pool and add it to record pool free list.
313  // This is an expensive subroutine of getAvailPage(RecInfo&):
314  PtrI getFreePage(RecInfo& ri);
315 
316  // Get free detached (not on list) page from group pool.
317  PtrI getFreePage(GroupPool& gp);
318 
319  // Get free detached page from super pool.
320  PtrI getFreePage();
321 
322  // Get new free detached page from the implementation.
323  virtual PtrI getNewPage() = 0;
324 
325  // Initialize free list etc. Subroutine of getFreePage(RecInfo&).
326  void initFreePage(RecInfo& ri, PtrI pageI);
327 
328  // Release record which is not on current page.
329  void releaseNotCurrent(RecInfo& ri, PtrI recI);
330 
331  // Free pages from record pool according to page policy.
332  void freeRecPages(RecInfo& ri);
333 
334  // Free all pages in record pool.
335  void freeAllRecPages(RecInfo& ri, bool force);
336 
337  // Set pool size parameters in pages. Call allocMemory() for changes
338  // (such as extra mallocs) to take effect.
339  void setInitPages(Uint32 initPages);
340  void setIncrPages(Uint32 incrPages);
341  void setMaxPages(Uint32 maxPages);
342 
343  // Get number of pages reserved by all groups.
344  Uint32 getGpMinPages();
345 
346  // Get number of pages reserved to a group.
347  Uint32 getMinPages(GroupPool& gp);
348 
349  // Get max number of pages a group can try to allocate.
350  Uint32 getMaxPages(GroupPool& gp);
351 
352  // Allocate more memory according to current parameters. Returns
353  // false if no new memory was allocated. Otherwise returns true,
354  // even if the amount allocated was less than requested.
355  virtual bool allocMemory() = 0;
356 
357  // Debugging.
358  void verify(RecInfo& ri);
359  void verifyPageList(PageList& pl);
360 
361  // Super pool parameters.
362  const Uint32 m_pageSize;
363  const Uint16 m_pageBits;
364  const Uint16 m_recBits;
365  const Uint32 m_recMask;
366  // Implementation must set up these 3 pointers.
367  void* m_memRoot;
368  PageEnt* m_pageEnt;
369  Uint8* m_pageType;
370  // Free page list.
371  PageList m_freeList;
372  // Free pages and sizes.
373  Uint32 m_initPages;
374  Uint32 m_incrPages;
375  Uint32 m_maxPages;
376  Uint32 m_totPages;
377  Uint32 m_typeCount;
378  // Reserved and allocated by group pools.
379  Uint32 m_groupMinPct;
380  Uint32 m_groupMinPages;
381  Uint32 m_groupTotPages;
382 };
383 
384 inline SuperPool::PageEnt&
385 SuperPool::getPageEnt(PtrI pageI)
386 {
387  Uint32 ip = pageI >> m_recBits;
388  return m_pageEnt[ip];
389 }
390 
391 inline void*
392 SuperPool::getPageP(PtrI ptrI)
393 {
394  Int32 ip = (Int32)ptrI >> m_recBits;
395  return (Uint8*)m_memRoot + ip * (my_ptrdiff_t)m_pageSize;
396 }
397 
398 inline void*
399 SuperPool::getRecP(PtrI ptrI, RecInfo& ri)
400 {
401  Uint32 ip = ptrI >> m_recBits;
402  assert(m_pageType[ip] == (ri.m_recType & 0xFF));
403  Uint32 ir = ptrI & m_recMask;
404  return (Uint8*)getPageP(ptrI) + ir * ri.m_recSize;
405 }
406 
407 /*
408  * GroupPool - subset of a super pool pages (concrete class)
409  */
410 
411 class GroupPool {
412 public:
413  // Types.
415 
416  // Constructor.
417  GroupPool(SuperPool& sp);
418 
419  // Destructor.
420  ~GroupPool();
421 
422  // Set minimum pct reserved in super pool.
423  void setMinPct(Uint32 resPct);
424 
425  // Set minimum pages reserved in super pool.
426  void setMinPages(Uint32 resPages);
427 
428  SuperPool& m_superPool;
429  Uint32 m_minPct;
430  Uint32 m_minPages;
431  Uint32 m_totPages;
432  PageList m_freeList;
433 };
434 
435 /*
436  * RecordPool - record pool using one super pool instance (template)
437  */
438 
439 template <class T>
440 class RecordPool {
441 public:
442  // Constructor.
443  RecordPool(GroupPool& gp);
444 
445  // Destructor.
446  ~RecordPool();
447 
448  // Update pointer ptr.p according to i-value ptr.i.
449  void getPtr(Ptr<T>& ptr);
450 
451  // Allocate record from the pool.
452  bool seize(Ptr<T>& ptr);
453 
454  // Return record to the pool.
455  void release(Ptr<T>& ptr);
456 
457  // todo variants of basic methods
458 
459  // Return all pages to group pool. The force flag is required if
460  // there are any used records.
461  void freeAllRecPages(bool force);
462 
463  SuperPool& m_superPool;
464  SuperPool::RecInfo m_recInfo;
465 };
466 
467 template <class T>
468 inline
470  m_superPool(gp.m_superPool),
471  m_recInfo(gp, sizeof(T))
472 {
473 }
474 
475 template <class T>
476 inline
478 {
479  freeAllRecPages(true);
480 }
481 
482 template <class T>
483 inline void
485 {
486  void* recP = m_superPool.getRecP(ptr.i, m_recInfo);
487  ptr.p = static_cast<T*>(recP);
488 }
489 
490 template <class T>
491 inline bool
493 {
494  SuperPool& sp = m_superPool;
495  SuperPool::RecInfo& ri = m_recInfo;
496  Uint32 recMask = sp.m_recMask;
497  // get current page
498  if ((ri.m_freeRecI & recMask) != recMask ||
499  sp.getAvailPage(ri)) {
500  SuperPool::PtrI recI = ri.m_freeRecI;
501  void* recP = sp.getRecP(recI, ri);
502  ri.m_freeRecI = *(Uint32*)recP;
503  ptr.i = recI;
504  ptr.p = static_cast<T*>(recP);
505  return true;
506  }
507  ptr.i = RNIL;
508  ptr.p = 0;
509  return false;
510 }
511 
512 template <class T>
513 inline void
515 {
516  SuperPool& sp = m_superPool;
517  SuperPool::RecInfo& ri = m_recInfo;
518  SuperPool::PtrI recI = ptr.i;
519  Uint32 recMask = sp.m_recMask;
520  // check if current page
521  if ((recI & ~recMask) == (ri.m_freeRecI & ~recMask)) {
522  void* recP = sp.getRecP(recI, ri);
523  *(Uint32*)recP = ri.m_freeRecI;
524  ri.m_freeRecI = recI;
525  } else {
526  sp.releaseNotCurrent(ri, recI);
527  }
528  ptr.i = RNIL;
529  ptr.p = 0;
530 }
531 
532 template <class T>
533 inline void
535 {
536  SuperPool& sp = m_superPool;
537  sp.freeAllRecPages(m_recInfo, force);
538 }
539 
540 /*
541  * HeapPool - SuperPool on heap (concrete class)
542  *
543  * A super pool based on malloc with memory root on the heap. This
544  * pool type has 2 realistic uses:
545  *
546  * - a small pool with only initial malloc and pageBits set to match
547  * - the big pool from which all heap allocations are done
548  *
549  * A smart malloc may break "ip" limit by using different VM areas for
550  * different sized requests. For this reason malloc is done in units of
551  * increment size if possible. Memory root is set to the page-aligned
552  * address from first page malloc.
553  */
554 
555 class HeapPool : public SuperPool {
556 public:
557  // Describes malloc area. The areas are kept in singly linked list.
558  // There is a list head and pointers to current and last area.
559  struct Area {
560  Area();
561  Area* m_nextArea;
562  PtrI m_firstPageI;
563  Uint32 m_currPage;
564  void* m_memory; // from malloc
565  void* m_pages; // page-aligned pages
566  Uint32 m_numPages; // number of pages
567  };
568 
569  // Constructor.
570  HeapPool(Uint32 pageSize, Uint32 pageBits);
571 
572  // Destructor.
573  virtual ~HeapPool();
574 
575  // Get new page from current area.
576  virtual PtrI getNewPage();
577 
578  // Allocate fixed arrays.
579  bool allocInit();
580 
581  // Allocate array of aligned pages.
582  bool allocArea(Area* ap, Uint32 tryPages);
583 
584  // Allocate memory.
585  virtual bool allocMemory() { return allocMemoryImpl();}
586  bool allocMemoryImpl();
587 
588  // List of malloc areas.
589  Area m_areaHead;
590  Area* m_currArea;
591  Area* m_lastArea;
592 };
593 
594 #endif