MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
lf_alloc-pin.c
1 /* QQ: TODO multi-pinbox */
2 /* Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7 
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  GNU General Public License for more details.
12 
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16 
17 /*
18  wait-free concurrent allocator based on pinning addresses
19 
20  It works as follows: every thread (strictly speaking - every CPU, but
21  it's too difficult to do) has a small array of pointers. They're called
22  "pins". Before using an object its address must be stored in this array
23  (pinned). When an object is no longer necessary its address must be
24  removed from this array (unpinned). When a thread wants to free() an
25  object it scans all pins of all threads to see if somebody has this
26  object pinned. If yes - the object is not freed (but stored in a
27  "purgatory"). To reduce the cost of a single free() pins are not scanned
28  on every free() but only added to (thread-local) purgatory. On every
29  LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects
30  are freed.
31 
32  Pins are used to solve ABA problem. To use pins one must obey
33  a pinning protocol:
34 
35  1. Let's assume that PTR is a shared pointer to an object. Shared means
36  that any thread may modify it anytime to point to a different object
37  and free the old object. Later the freed object may be potentially
38  allocated by another thread. If we're unlucky that other thread may
39  set PTR to point to this object again. This is ABA problem.
40  2. Create a local pointer LOCAL_PTR.
41  3. Pin the PTR in a loop:
42  do
43  {
44  LOCAL_PTR= PTR;
45  pin(PTR, PIN_NUMBER);
46  } while (LOCAL_PTR != PTR)
47  4. It is guaranteed that after the loop has ended, LOCAL_PTR
48  points to an object (or NULL, if PTR may be NULL), that
49  will never be freed. It is not guaranteed though
50  that LOCAL_PTR == PTR (as PTR can change any time)
51  5. When done working with the object, remove the pin:
52  unpin(PIN_NUMBER)
53  6. When copying pins (as in the list traversing loop:
54  pin(CUR, 1);
55  while ()
56  {
57  do // standard
58  { // pinning
59  NEXT=CUR->next; // loop
60  pin(NEXT, 0); // see #3
61  } while (NEXT != CUR->next); // above
62  ...
63  ...
64  CUR=NEXT;
65  pin(CUR, 1); // copy pin[0] to pin[1]
66  }
67  which keeps CUR address constantly pinned), note than pins may be
68  copied only upwards (!!!), that is pin[N] to pin[M], M > N.
69  7. Don't keep the object pinned longer than necessary - the number of
70  pins you have is limited (and small), keeping an object pinned
71  prevents its reuse and cause unnecessary mallocs.
72 
73  Explanations:
74 
75  3. The loop is important. The following can occur:
76  thread1> LOCAL_PTR= PTR
77  thread2> free(PTR); PTR=0;
78  thread1> pin(PTR, PIN_NUMBER);
79  now thread1 cannot access LOCAL_PTR, even if it's pinned,
80  because it points to a freed memory. That is, it *must*
81  verify that it has indeed pinned PTR, the shared pointer.
82 
83  6. When a thread wants to free some LOCAL_PTR, and it scans
84  all lists of pins to see whether it's pinned, it does it
85  upwards, from low pin numbers to high. Thus another thread
86  must copy an address from one pin to another in the same
87  direction - upwards, otherwise the scanning thread may
88  miss it.
89 
90  Implementation details:
91 
92  Pins are given away from a "pinbox". Pinbox is stack-based allocator.
93  It used dynarray for storing pins, new elements are allocated by dynarray
94  as necessary, old are pushed in the stack for reuse. ABA is solved by
95  versioning a pointer - because we use an array, a pointer to pins is 16 bit,
96  upper 16 bits are used for a version.
97 
98  It is assumed that pins belong to a THD and are not transferable
99  between THD's (LF_PINS::stack_ends_here being a primary reason
100  for this limitation).
101 */
102 #include <my_global.h>
103 #include <my_sys.h>
104 #include <lf.h>
105 
106 #define LF_PINBOX_MAX_PINS 65536
107 
108 static void _lf_pinbox_real_free(LF_PINS *pins);
109 
110 /*
111  Initialize a pinbox. Normally called from lf_alloc_init.
112  See the latter for details.
113 */
114 void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
115  lf_pinbox_free_func *free_func, void *free_func_arg)
116 {
117  DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
118  compile_time_assert(sizeof(LF_PINS) == 64);
119  lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
120  pinbox->pinstack_top_ver= 0;
121  pinbox->pins_in_array= 0;
122  pinbox->free_ptr_offset= free_ptr_offset;
123  pinbox->free_func= free_func;
124  pinbox->free_func_arg= free_func_arg;
125 }
126 
127 void lf_pinbox_destroy(LF_PINBOX *pinbox)
128 {
129  lf_dynarray_destroy(&pinbox->pinarray);
130 }
131 
132 /*
133  Get pins from a pinbox. Usually called via lf_alloc_get_pins() or
134  lf_hash_get_pins().
135 
136  SYNOPSYS
137  pinbox -
138 
139  DESCRIPTION
140  get a new LF_PINS structure from a stack of unused pins,
141  or allocate a new one out of dynarray.
142 
143  NOTE
144  It is assumed that pins belong to a thread and are not transferable
145  between threads.
146 */
147 LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox)
148 {
149  struct st_my_thread_var *var;
150  uint32 pins, next, top_ver;
151  LF_PINS *el;
152  /*
153  We have an array of max. 64k elements.
154  The highest index currently allocated is pinbox->pins_in_array.
155  Freed elements are in a lifo stack, pinstack_top_ver.
156  pinstack_top_ver is 32 bits; 16 low bits are the index in the
157  array, to the first element of the list. 16 high bits are a version
158  (every time the 16 low bits are updated, the 16 high bits are
159  incremented). Versioniong prevents the ABA problem.
160  */
161  top_ver= pinbox->pinstack_top_ver;
162  do
163  {
164  if (!(pins= top_ver % LF_PINBOX_MAX_PINS))
165  {
166  /* the stack of free elements is empty */
167  pins= my_atomic_add32((int32 volatile*) &pinbox->pins_in_array, 1)+1;
168  if (unlikely(pins >= LF_PINBOX_MAX_PINS))
169  return 0;
170  /*
171  note that the first allocated element has index 1 (pins==1).
172  index 0 is reserved to mean "NULL pointer"
173  */
174  el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinarray, pins);
175  if (unlikely(!el))
176  return 0;
177  break;
178  }
179  el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins);
180  next= el->link;
181  } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver,
182  (int32*) &top_ver,
183  top_ver-pins+next+LF_PINBOX_MAX_PINS));
184  /*
185  set el->link to the index of el in the dynarray (el->link has two usages:
186  - if element is allocated, it's its own index
187  - if element is free, it's its next element in the free stack
188  */
189  el->link= pins;
190  el->purgatory_count= 0;
191  el->pinbox= pinbox;
192  var= my_thread_var;
193  /*
194  Threads that do not call my_thread_init() should still be
195  able to use the LF_HASH.
196  */
197  el->stack_ends_here= (var ? & var->stack_ends_here : NULL);
198  return el;
199 }
200 
201 /*
202  Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or
203  lf_hash_put_pins().
204 
205  DESCRIPTION
206  empty the purgatory (XXX deadlock warning below!),
207  push LF_PINS structure to a stack
208 */
209 void _lf_pinbox_put_pins(LF_PINS *pins)
210 {
211  LF_PINBOX *pinbox= pins->pinbox;
212  uint32 top_ver, nr;
213  nr= pins->link;
214 
215 #ifndef DBUG_OFF
216  {
217  /* This thread should not hold any pin. */
218  int i;
219  for (i= 0; i < LF_PINBOX_PINS; i++)
220  DBUG_ASSERT(pins->pin[i] == 0);
221  }
222 #endif /* DBUG_OFF */
223 
224  /*
225  XXX this will deadlock if other threads will wait for
226  the caller to do something after _lf_pinbox_put_pins(),
227  and they would have pinned addresses that the caller wants to free.
228  Thus: only free pins when all work is done and nobody can wait for you!!!
229  */
230  while (pins->purgatory_count)
231  {
232  _lf_pinbox_real_free(pins);
233  if (pins->purgatory_count)
234  {
235  my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock);
236  pthread_yield();
237  my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock);
238  }
239  }
240  top_ver= pinbox->pinstack_top_ver;
241  do
242  {
243  pins->link= top_ver % LF_PINBOX_MAX_PINS;
244  } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver,
245  (int32*) &top_ver,
246  top_ver-pins->link+nr+LF_PINBOX_MAX_PINS));
247  return;
248 }
249 
250 static int ptr_cmp(void **a, void **b)
251 {
252  return *a < *b ? -1 : *a == *b ? 0 : 1;
253 }
254 
255 #define add_to_purgatory(PINS, ADDR) \
256  do \
257  { \
258  *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \
259  (PINS)->purgatory; \
260  (PINS)->purgatory= (ADDR); \
261  (PINS)->purgatory_count++; \
262  } while (0)
263 
264 /*
265  Free an object allocated via pinbox allocator
266 
267  DESCRIPTION
268  add an object to purgatory. if necessary, call _lf_pinbox_real_free()
269  to actually free something.
270 */
271 void _lf_pinbox_free(LF_PINS *pins, void *addr)
272 {
273  add_to_purgatory(pins, addr);
274  if (pins->purgatory_count % LF_PURGATORY_SIZE)
275  _lf_pinbox_real_free(pins);
276 }
277 
278 struct st_harvester {
279  void **granary;
280  int npins;
281 };
282 
283 /*
284  callback for _lf_dynarray_iterate:
285  scan all pins of all threads and accumulate all pins
286 */
287 static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
288 {
289  int i;
290  LF_PINS *el_end= el + MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
291  for (; el < el_end; el++)
292  {
293  for (i= 0; i < LF_PINBOX_PINS; i++)
294  {
295  void *p= el->pin[i];
296  if (p)
297  *hv->granary++= p;
298  }
299  }
300  /*
301  hv->npins may become negative below, but it means that
302  we're on the last dynarray page and harvest_pins() won't be
303  called again. We don't bother to make hv->npins() correct
304  (that is 0) in this case.
305  */
306  hv->npins-= LF_DYNARRAY_LEVEL_LENGTH;
307  return 0;
308 }
309 
310 /*
311  callback for _lf_dynarray_iterate:
312  scan all pins of all threads and see if addr is present there
313 */
314 static int match_pins(LF_PINS *el, void *addr)
315 {
316  int i;
317  LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
318  for (; el < el_end; el++)
319  for (i= 0; i < LF_PINBOX_PINS; i++)
320  if (el->pin[i] == addr)
321  return 1;
322  return 0;
323 }
324 
325 #if STACK_DIRECTION < 0
326 #define available_stack_size(CUR,END) (long) ((char*)(CUR) - (char*)(END))
327 #else
328 #define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR))
329 #endif
330 
331 #define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset)))
332 #define anext_node(X) next_node(&allocator->pinbox, (X))
333 
334 /*
335  Scan the purgatory and free everything that can be freed
336 */
337 static void _lf_pinbox_real_free(LF_PINS *pins)
338 {
339  int npins;
340  void *list;
341  void **addr= NULL;
342  void *first= NULL, *last= NULL;
343  LF_PINBOX *pinbox= pins->pinbox;
344 
345  npins= pinbox->pins_in_array+1;
346 
347 #ifdef HAVE_ALLOCA
348  if (pins->stack_ends_here != NULL)
349  {
350  int alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
351  /* create a sorted list of pinned addresses, to speed up searches */
352  if (available_stack_size(&pinbox, *pins->stack_ends_here) > alloca_size)
353  {
354  struct st_harvester hv;
355  addr= (void **) alloca(alloca_size);
356  hv.granary= addr;
357  hv.npins= npins;
358  /* scan the dynarray and accumulate all pinned addresses */
359  _lf_dynarray_iterate(&pinbox->pinarray,
360  (lf_dynarray_func)harvest_pins, &hv);
361 
362  npins= hv.granary-addr;
363  /* and sort them */
364  if (npins)
365  qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp);
366  }
367  }
368 #endif
369 
370  list= pins->purgatory;
371  pins->purgatory= 0;
372  pins->purgatory_count= 0;
373  while (list)
374  {
375  void *cur= list;
376  list= *(void **)((char *)cur+pinbox->free_ptr_offset);
377  if (npins)
378  {
379  if (addr) /* use binary search */
380  {
381  void **a, **b, **c;
382  for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2)
383  if (cur == *c)
384  a= b= c;
385  else if (cur > *c)
386  a= c;
387  else
388  b= c;
389  if (cur == *a || cur == *b)
390  goto found;
391  }
392  else /* no alloca - no cookie. linear search here */
393  {
394  if (_lf_dynarray_iterate(&pinbox->pinarray,
395  (lf_dynarray_func)match_pins, cur))
396  goto found;
397  }
398  }
399  /* not pinned - freeing */
400  if (last)
401  last= next_node(pinbox, last)= (uchar *)cur;
402  else
403  first= last= (uchar *)cur;
404  continue;
405 found:
406  /* pinned - keeping */
407  add_to_purgatory(pins, cur);
408  }
409  if (last)
410  pinbox->free_func(first, last, pinbox->free_func_arg);
411 }
412 
413 /* lock-free memory allocator for fixed-size objects */
414 
415 LF_REQUIRE_PINS(1)
416 
417 /*
418  callback for _lf_pinbox_real_free to free a list of unpinned objects -
419  add it back to the allocator stack
420 
421  DESCRIPTION
422  'first' and 'last' are the ends of the linked list of nodes:
423  first->el->el->....->el->last. Use first==last to free only one element.
424 */
425 static void alloc_free(uchar *first,
426  uchar volatile *last,
427  LF_ALLOCATOR *allocator)
428 {
429  /*
430  we need a union here to access type-punned pointer reliably.
431  otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop
432  */
433  union { uchar * node; void *ptr; } tmp;
434  tmp.node= allocator->top;
435  do
436  {
437  anext_node(last)= tmp.node;
438  } while (!my_atomic_casptr((void **)(char *)&allocator->top,
439  (void **)&tmp.ptr, first) && LF_BACKOFF);
440 }
441 
442 /*
443  initialize lock-free allocator
444 
445  SYNOPSYS
446  allocator -
447  size a size of an object to allocate
448  free_ptr_offset an offset inside the object to a sizeof(void *)
449  memory that is guaranteed to be unused after
450  the object is put in the purgatory. Unused by ANY
451  thread, not only the purgatory owner.
452  This memory will be used to link waiting-to-be-freed
453  objects in a purgatory list.
454 */
455 void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
456 {
457  lf_pinbox_init(&allocator->pinbox, free_ptr_offset,
458  (lf_pinbox_free_func *)alloc_free, allocator);
459  allocator->top= 0;
460  allocator->mallocs= 0;
461  allocator->element_size= size;
462  allocator->constructor= 0;
463  allocator->destructor= 0;
464  DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
465 }
466 
467 /*
468  destroy the allocator, free everything that's in it
469 
470  NOTE
471  As every other init/destroy function here and elsewhere it
472  is not thread safe. No, this function is no different, ensure
473  that no thread needs the allocator before destroying it.
474  We are not responsible for any damage that may be caused by
475  accessing the allocator when it is being or has been destroyed.
476  Oh yes, and don't put your cat in a microwave.
477 */
478 void lf_alloc_destroy(LF_ALLOCATOR *allocator)
479 {
480  uchar *node= allocator->top;
481  while (node)
482  {
483  uchar *tmp= anext_node(node);
484  if (allocator->destructor)
485  allocator->destructor(node);
486  my_free(node);
487  node= tmp;
488  }
489  lf_pinbox_destroy(&allocator->pinbox);
490  allocator->top= 0;
491 }
492 
493 /*
494  Allocate and return an new object.
495 
496  DESCRIPTION
497  Pop an unused object from the stack or malloc it is the stack is empty.
498  pin[0] is used, it's removed on return.
499 */
500 void *_lf_alloc_new(LF_PINS *pins)
501 {
502  LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
503  uchar *node;
504  for (;;)
505  {
506  do
507  {
508  node= allocator->top;
509  _lf_pin(pins, 0, node);
510  } while (node != allocator->top && LF_BACKOFF);
511  if (!node)
512  {
513  node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
514  if (allocator->constructor)
515  allocator->constructor(node);
516 #ifdef MY_LF_EXTRA_DEBUG
517  if (likely(node != 0))
518  my_atomic_add32(&allocator->mallocs, 1);
519 #endif
520  break;
521  }
522  if (my_atomic_casptr((void **)(char *)&allocator->top,
523  (void *)&node, anext_node(node)))
524  break;
525  }
526  _lf_unpin(pins, 0);
527  return node;
528 }
529 
530 /*
531  count the number of objects in a pool.
532 
533  NOTE
534  This is NOT thread-safe !!!
535 */
536 uint lf_alloc_pool_count(LF_ALLOCATOR *allocator)
537 {
538  uint i;
539  uchar *node;
540  for (node= allocator->top, i= 0; node; node= anext_node(node), i++)
541  /* no op */;
542  return i;
543 }
544