MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
waiting_threads.c
Go to the documentation of this file.
1 /* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
249 /*
250  Note that if your lock system satisfy the following condition:
251 
252  there exist four lock levels A, B, C, D, such as
253  A is compatible with B
254  A is not compatible with C
255  D is not compatible with B
256 
257  (example A=IX, B=IS, C=S, D=X)
258 
259  you need to include lock level in the resource identifier - a
260  thread waiting for lock of the type A on resource R and another
261  thread waiting for lock of the type B on resource R should wait on
262  different WT_RESOURCE structures, on different {lock, resource}
263  pairs. Otherwise the following is possible:
264 
265  thread1> take S-lock on R
266  thread2> take IS-lock on R
267  thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R.
268  thread3 is killed (or timeout or whatever)
269  WT_RESOURCE structure for R is still in the hash, as it has two owners
270  thread4> wants an IX-lock on R
271  WT_RESOURCE for R is found in the hash, thread4 starts waiting on it.
272  !! now thread4 is waiting for both thread1 and thread2
273  !! while, in fact, IX-lock and IS-lock are compatible and
274  !! thread4 should not wait for thread2.
275 */
276 
277 #include <waiting_threads.h>
278 #include <m_string.h>
279 
280 /* status variables */
281 
285 ulonglong wt_wait_table[WT_WAIT_STATS];
289 uint32 wt_wait_stats[WT_WAIT_STATS+1];
294 uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1];
295 uint32 wt_success_stats;
296 
297 static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock;
298 
299 #ifdef SAFE_STATISTICS
300 #define incr(VAR, LOCK) \
301  do { \
302  my_atomic_rwlock_wrlock(&(LOCK)); \
303  my_atomic_add32(&(VAR), 1); \
304  my_atomic_rwlock_wrunlock(&(LOCK)); \
305  } while(0)
306 #else
307 #define incr(VAR,LOCK) do { (VAR)++; } while(0)
308 #endif
309 
310 static void increment_success_stats()
311 {
312  incr(wt_success_stats, success_stats_lock);
313 }
314 
315 static void increment_cycle_stats(uint depth, uint slot)
316 {
317  if (depth >= WT_CYCLE_STATS)
318  depth= WT_CYCLE_STATS;
319  incr(wt_cycle_stats[slot][depth], cycle_stats_lock);
320 }
321 
322 static void increment_wait_stats(ulonglong waited,int ret)
323 {
324  uint i;
325  if ((ret) == ETIMEDOUT)
326  i= WT_WAIT_STATS;
327  else
328  for (i= 0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ;
329  incr(wt_wait_stats[i], wait_stats_lock);
330 }
331 
332 /*
333  'lock' protects 'owners', 'state', and 'waiter_count'
334  'id' is read-only
335 
336  a resource is picked up from a hash in a lock-free manner
337  it's returned pinned, so it cannot be freed at once
338  but it may be freed right after the pin is removed
339  to free a resource it should
340  1. have no owners
341  2. have no waiters
342 
343  two ways to access a resource:
344  1. find it in a hash
345  - it's returned pinned.
346  a) take a lock in exclusive mode
347  b) check the state, it should be ACTIVE to be usable
348  c) unpin
349  2. by a direct reference
350  - could only used if a resource cannot be freed
351  e.g. accessing a resource by thd->waiting_for is safe,
352  a resource cannot be freed as there's a thread waiting for it
353 */
355  WT_RESOURCE_ID id;
356  uint waiter_count;
357  enum { ACTIVE, FREE } state;
358 #ifndef DBUG_OFF
359  mysql_mutex_t *cond_mutex; /* a mutex for the 'cond' below */
360 #endif
361  /*
362  before the 'lock' all elements are mutable, after (and including) -
363  immutable in the sense that lf_hash_insert() won't memcpy() over them.
364  See wt_init().
365  */
366 #ifdef WT_RWLOCKS_USE_MUTEXES
367  /*
368  we need a special rwlock-like 'lock' to allow readers bypass
369  waiting writers, otherwise readers can deadlock. For example:
370 
371  A waits on resource x, owned by B, B waits on resource y, owned
372  by A, we have a cycle (A->x->B->y->A)
373  Both A and B start deadlock detection:
374 
375  A locks x B locks y
376  A goes deeper B goes deeper
377  A locks y B locks x
378 
379  with mutexes it would deadlock. With rwlocks it won't, as long
380  as both A and B are taking read locks (and they do).
381  But other threads may take write locks. Assume there's
382  C who wants to start waiting on x, and D who wants to start
383  waiting on y.
384 
385  A read-locks x B read-locks y
386  A goes deeper B goes deeper
387  => C write-locks x (to add a new edge) D write-locks y
388  .. C is blocked D is blocked
389  A read-locks y B read-locks x
390 
391  Now, if a read lock can bypass a pending wrote lock request, we're fine.
392  If it can not, we have a deadlock.
393 
394  writer starvation is technically possible, but unlikely, because
395  the contention is expected to be low.
396  */
397  struct {
398  mysql_cond_t cond;
399  mysql_mutex_t mutex;
400  uint readers: 16;
401  uint pending_writers: 15;
402  uint write_locked: 1;
403  } lock;
404 #else
405  rw_lock_t lock;
406 #endif
407  mysql_cond_t cond; /* the corresponding mutex is provided by the caller */
408  DYNAMIC_ARRAY owners;
409 };
410 
411 #ifdef WT_RWLOCKS_USE_MUTEXES
412 static void rc_rwlock_init(WT_RESOURCE *rc)
413 {
414  mysql_cond_init(0, &rc->lock.cond, 0);
415  mysql_mutex_init(0, &rc->lock.mutex, MY_MUTEX_INIT_FAST);
416 }
417 static void rc_rwlock_destroy(WT_RESOURCE *rc)
418 {
419  DBUG_ASSERT(rc->lock.write_locked == 0);
420  DBUG_ASSERT(rc->lock.readers == 0);
421  mysql_cond_destroy(&rc->lock.cond);
422  mysql_mutex_destroy(&rc->lock.mutex);
423 }
424 static void rc_rdlock(WT_RESOURCE *rc)
425 {
426  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value));
427  mysql_mutex_lock(&rc->lock.mutex);
428  while (rc->lock.write_locked)
429  mysql_cond_wait(&rc->lock.cond, &rc->lock.mutex);
430  rc->lock.readers++;
431  mysql_mutex_unlock(&rc->lock.mutex);
432  DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value));
433 }
434 static void rc_wrlock(WT_RESOURCE *rc)
435 {
436  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value));
437  mysql_mutex_lock(&rc->lock.mutex);
438  while (rc->lock.write_locked || rc->lock.readers)
439  mysql_cond_wait(&rc->lock.cond, &rc->lock.mutex);
440  rc->lock.write_locked= 1;
441  mysql_mutex_unlock(&rc->lock.mutex);
442  DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value));
443 }
444 static void rc_unlock(WT_RESOURCE *rc)
445 {
446  DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value));
447  mysql_mutex_lock(&rc->lock.mutex);
448  if (rc->lock.write_locked)
449  {
450  rc->lock.write_locked= 0;
451  mysql_cond_broadcast(&rc->lock.cond);
452  }
453  else if (--rc->lock.readers == 0)
454  mysql_cond_broadcast(&rc->lock.cond);
455  mysql_mutex_unlock(&rc->lock.mutex);
456 }
457 #else
458 static void rc_rwlock_init(WT_RESOURCE *rc)
459 {
460  my_rwlock_init(&rc->lock, 0);
461 }
462 static void rc_rwlock_destroy(WT_RESOURCE *rc)
463 {
464  rwlock_destroy(&rc->lock);
465 }
466 static void rc_rdlock(WT_RESOURCE *rc)
467 {
468  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value));
469  rw_rdlock(&rc->lock);
470  DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value));
471 }
472 static void rc_wrlock(WT_RESOURCE *rc)
473 {
474  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value));
475  rw_wrlock(&rc->lock);
476  DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value));
477 }
478 static void rc_unlock(WT_RESOURCE *rc)
479 {
480  DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value));
481  rw_unlock(&rc->lock);
482 }
483 #endif
484 
485 /*
486  All resources are stored in a lock-free hash. Different threads
487  may add new resources and perform deadlock detection concurrently.
488 */
489 static LF_HASH reshash;
490 
497 static void wt_resource_init(uchar *arg)
498 {
499  WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
500  DBUG_ENTER("wt_resource_init");
501 
502  memset(rc, 0, sizeof(*rc));
503  rc_rwlock_init(rc);
504  mysql_cond_init(0, &rc->cond, 0);
505  my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5);
506  DBUG_VOID_RETURN;
507 }
508 
515 static void wt_resource_destroy(uchar *arg)
516 {
517  WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
518  DBUG_ENTER("wt_resource_destroy");
519 
520  DBUG_ASSERT(rc->owners.elements == 0);
521  rc_rwlock_destroy(rc);
522  mysql_cond_destroy(&rc->cond);
523  delete_dynamic(&rc->owners);
524  DBUG_VOID_RETURN;
525 }
526 
527 void wt_init()
528 {
529  DBUG_ENTER("wt_init");
530  DBUG_ASSERT(reshash.alloc.constructor != wt_resource_init);
531 
532  lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0,
533  sizeof_WT_RESOURCE_ID, 0, 0);
534  reshash.alloc.constructor= wt_resource_init;
535  reshash.alloc.destructor= wt_resource_destroy;
536  /*
537  Note a trick: we initialize the hash with the real element size,
538  but fix it later to a shortened element size. This way
539  the allocator will allocate elements correctly, but
540  lf_hash_insert() will only overwrite part of the element with memcpy().
541  lock, condition, and dynamic array will be intact.
542  */
543  reshash.element_size= offsetof(WT_RESOURCE, lock);
544  memset(wt_wait_stats, 0, sizeof(wt_wait_stats));
545  memset(wt_cycle_stats, 0, sizeof(wt_cycle_stats));
546  wt_success_stats= 0;
547  { /* initialize wt_wait_table[]. from 1 us to 1 min, log e scale */
548  int i;
549  double from= log(1); /* 1 us */
550  double to= log(60e6); /* 1 min */
551  for (i= 0; i < WT_WAIT_STATS; i++)
552  {
553  wt_wait_table[i]= (ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from);
554  DBUG_ASSERT(i == 0 || wt_wait_table[i-1] != wt_wait_table[i]);
555  }
556  }
557  my_atomic_rwlock_init(&cycle_stats_lock);
558  my_atomic_rwlock_init(&success_stats_lock);
559  my_atomic_rwlock_init(&wait_stats_lock);
560  DBUG_VOID_RETURN;
561 }
562 
563 void wt_end()
564 {
565  DBUG_ENTER("wt_end");
566 
567  DBUG_ASSERT(reshash.count == 0);
568  lf_hash_destroy(&reshash);
569  my_atomic_rwlock_destroy(&cycle_stats_lock);
570  my_atomic_rwlock_destroy(&success_stats_lock);
571  my_atomic_rwlock_destroy(&wait_stats_lock);
572  DBUG_VOID_RETURN;
573 }
574 
594 void wt_thd_lazy_init(WT_THD *thd, const ulong *ds, const ulong *ts,
595  const ulong *dl, const ulong *tl)
596 {
597  DBUG_ENTER("wt_thd_lazy_init");
598  thd->waiting_for= 0;
599  thd->weight= 0;
600  thd->deadlock_search_depth_short= ds;
601  thd->timeout_short= ts;
602  thd->deadlock_search_depth_long= dl;
603  thd->timeout_long= tl;
604  /* dynamic array is also initialized lazily - without memory allocations */
605  my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5);
606 #ifndef DBUG_OFF
607  thd->name= my_thread_name();
608 #endif
609  DBUG_VOID_RETURN;
610 }
611 
620 static int fix_thd_pins(WT_THD *thd)
621 {
622  if (unlikely(thd->pins == 0))
623  {
624  thd->pins= lf_hash_get_pins(&reshash);
625 #ifndef DBUG_OFF
626  thd->name= my_thread_name();
627 #endif
628  }
629  return thd->pins == 0;
630 }
631 
632 void wt_thd_destroy(WT_THD *thd)
633 {
634  DBUG_ENTER("wt_thd_destroy");
635 
636  DBUG_ASSERT(thd->my_resources.elements == 0);
637  DBUG_ASSERT(thd->waiting_for == 0);
638 
639  if (thd->pins != 0)
640  lf_hash_put_pins(thd->pins);
641 
642  delete_dynamic(&thd->my_resources);
643  DBUG_VOID_RETURN;
644 }
651 my_bool wt_resource_id_memcmp(const void *a, const void *b)
652 {
653  /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */
654  compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong));
655  return memcmp(a, b, sizeof_WT_RESOURCE_ID);
656 }
657 
661 struct deadlock_arg {
662  WT_THD * const thd;
663  uint const max_depth;
666 };
667 
671 static void change_victim(WT_THD* found, struct deadlock_arg *arg)
672 {
673  if (found->weight < arg->victim->weight)
674  {
675  if (arg->victim != arg->thd)
676  {
677  rc_unlock(arg->victim->waiting_for); /* release the previous victim */
678  DBUG_ASSERT(arg->last_locked_rc == found->waiting_for);
679  }
680  arg->victim= found;
681  arg->last_locked_rc= 0;
682  }
683 }
684 
688 static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
689  uint depth)
690 {
691  WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for;
692  WT_THD *cursor;
693  uint i;
694  int ret= WT_OK;
695  DBUG_ENTER("deadlock_search");
696  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u",
697  arg->thd->name, blocker->name, depth));
698 
699  LF_REQUIRE_PINS(1);
700 
701  arg->last_locked_rc= 0;
702 
703  if (depth > arg->max_depth)
704  {
705  DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)"));
706  DBUG_RETURN(WT_DEPTH_EXCEEDED);
707  }
708 
709 retry:
710  /*
711  safe dereference as explained in lf_alloc-pin.c
712  (in short: protects against lf_alloc_free() in lf_hash_delete())
713  */
714  do
715  {
716  rc= *shared_ptr;
717  lf_pin(arg->thd->pins, 0, rc);
718  } while (rc != *shared_ptr && LF_BACKOFF);
719 
720  if (rc == 0)
721  {
722  DBUG_PRINT("wt", ("exit: OK (early)"));
723  DBUG_RETURN(0);
724  }
725 
726  rc_rdlock(rc);
727  if (rc->state != ACTIVE || *shared_ptr != rc)
728  {
729  /* blocker is not waiting on this resource anymore */
730  rc_unlock(rc);
731  lf_unpin(arg->thd->pins, 0);
732  goto retry;
733  }
734  /* as the state is locked, we can unpin now */
735  lf_unpin(arg->thd->pins, 0);
736 
737  /*
738  Below is not a pure depth-first search. It's a depth-first with a
739  slightest hint of breadth-first. Depth-first is:
740 
741  check(element, X):
742  foreach current in element->nodes[] do:
743  if current == X return error;
744  check(current, X);
745 
746  while we do
747 
748  check(element, X):
749  foreach current in element->nodes[] do:
750  if current == X return error;
751  foreach current in element->nodes[] do:
752  check(current, X);
753 
754  preferring shorter deadlocks over longer ones.
755  */
756  for (i= 0; i < rc->owners.elements; i++)
757  {
758  cursor= *dynamic_element(&rc->owners, i, WT_THD**);
759  /*
760  We're only looking for (and detecting) cycles that include 'arg->thd'.
761  That is, only deadlocks that *we* have created. For example,
762  thd->A->B->thd
763  (thd waits for A, A waits for B, while B is waiting for thd).
764  While walking the graph we can encounter other cicles, e.g.
765  thd->A->B->C->A
766  This will not be detected. Instead we will walk it in circles until
767  the search depth limit is reached (the latter guarantees that an
768  infinite loop is impossible). We expect the thread that has created
769  the cycle (one of A, B, and C) to detect its deadlock.
770  */
771  if (cursor == arg->thd)
772  {
773  ret= WT_DEADLOCK;
774  increment_cycle_stats(depth, arg->max_depth ==
775  *arg->thd->deadlock_search_depth_long);
776  arg->victim= cursor;
777  goto end;
778  }
779  }
780  for (i= 0; i < rc->owners.elements; i++)
781  {
782  cursor= *dynamic_element(&rc->owners, i, WT_THD**);
783  switch (deadlock_search(arg, cursor, depth+1)) {
784  case WT_OK:
785  break;
786  case WT_DEPTH_EXCEEDED:
787  ret= WT_DEPTH_EXCEEDED;
788  break;
789  case WT_DEADLOCK:
790  ret= WT_DEADLOCK;
791  change_victim(cursor, arg); /* also sets arg->last_locked_rc to 0 */
792  i= rc->owners.elements; /* jump out of the loop */
793  break;
794  default:
795  DBUG_ASSERT(0);
796  }
797  if (arg->last_locked_rc)
798  rc_unlock(arg->last_locked_rc);
799  }
800 end:
801  /*
802  Note that 'rc' is locked in this function, but it's never unlocked here.
803  Instead it's saved in arg->last_locked_rc and the *caller* is
804  expected to unlock it. It's done to support different killing
805  strategies. This is how it works:
806  Assuming a graph
807 
808  thd->A->B->C->thd
809 
810  deadlock_search() function starts from thd, locks it (in fact it locks not
811  a thd, but a resource it is waiting on, but below, for simplicity, I'll
812  talk about "locking a thd"). Then it goes down recursively, locks A, and so
813  on. Goes down recursively, locks B. Goes down recursively, locks C.
814  Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd.
815  Returns from the last deadlock_search() call. C stays locked!
816  Now it checks whether C is a more appropriate victim than 'thd'.
817  If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked.
818  Now it checks whether B is a more appropriate victim than arg->victim.
819  If yes - old arg->victim is unlocked and arg->victim=B,
820  otherwise B is unlocked. Return.
821  And so on.
822 
823  In short, a resource is locked in a frame. But it's not unlocked in the
824  same frame, it's unlocked by the caller, and only after the caller checks
825  that it doesn't need to use current WT_THD as a victim. If it does - the
826  lock is kept and the old victim's resource is unlocked. When the recursion
827  is unrolled and we are back to deadlock() function, there are only two
828  locks left - on thd and on the victim.
829  */
830  arg->last_locked_rc= rc;
831  DBUG_PRINT("wt", ("exit: %s",
832  ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" :
833  ret ? "WT_DEADLOCK" : "OK"));
834  DBUG_RETURN(ret);
835 }
836 
856 static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
857  uint max_depth)
858 {
859  struct deadlock_arg arg= {thd, max_depth, 0, 0};
860  int ret;
861  DBUG_ENTER("deadlock");
862  DBUG_ASSERT(depth < 2);
863  ret= deadlock_search(&arg, blocker, depth);
864  if (ret == WT_DEPTH_EXCEEDED)
865  {
866  increment_cycle_stats(WT_CYCLE_STATS, max_depth ==
867  *thd->deadlock_search_depth_long);
868  ret= WT_OK;
869  }
870  /*
871  if we started with depth==1, blocker was never considered for a victim
872  in deadlock_search(). Do it here.
873  */
874  if (ret == WT_DEADLOCK && depth)
875  change_victim(blocker, &arg);
876  if (arg.last_locked_rc)
877  {
878  /*
879  Special return code if there's nobody to wait for.
880 
881  depth == 0 means that we start the search from thd (thd == blocker).
882  ret == WT_OK means that no cycle was found and
883  arg.last_locked_rc == thd->waiting_for.
884  and arg.last_locked_rc->owners.elements == 0 means that
885  (applying the rule above) thd->waiting_for->owners.elements == 0,
886  and thd doesn't have anybody to wait for.
887  */
888  if (depth == 0 && ret == WT_OK && arg.last_locked_rc->owners.elements == 0)
889  {
890  DBUG_ASSERT(thd == blocker);
891  DBUG_ASSERT(arg.last_locked_rc == thd->waiting_for);
892  ret= WT_FREE_TO_GO;
893  }
894  rc_unlock(arg.last_locked_rc);
895  }
896  /* notify the victim, if appropriate */
897  if (ret == WT_DEADLOCK && arg.victim != thd)
898  {
899  DBUG_PRINT("wt", ("killing %s", arg.victim->name));
900  arg.victim->killed= 1;
901  mysql_cond_broadcast(&arg.victim->waiting_for->cond);
902  rc_unlock(arg.victim->waiting_for);
903  ret= WT_OK;
904  }
905  DBUG_RETURN(ret);
906 }
907 
908 
914 static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
915 {
916  uint keylen;
917  const void *key;
918  DBUG_ENTER("unlock_lock_and_free_resource");
919 
920  DBUG_ASSERT(rc->state == ACTIVE);
921 
922  if (rc->owners.elements || rc->waiter_count)
923  {
924  DBUG_PRINT("wt", ("nothing to do, %u owners, %u waiters",
925  rc->owners.elements, rc->waiter_count));
926  rc_unlock(rc);
927  DBUG_RETURN(0);
928  }
929 
930  if (fix_thd_pins(thd))
931  {
932  rc_unlock(rc);
933  DBUG_RETURN(1);
934  }
935 
936  /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */
937  {
938  key= &rc->id;
939  keylen= sizeof_WT_RESOURCE_ID;
940  }
941 
942  /*
943  To free the element correctly we need to:
944  1. take its lock (already done).
945  2. set the state to FREE
946  3. release the lock
947  4. remove from the hash
948  */
949  rc->state= FREE;
950  rc_unlock(rc);
951  DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1);
952 }
953 
954 
961 static int stop_waiting_locked(WT_THD *thd)
962 {
963  int ret;
964  WT_RESOURCE *rc= thd->waiting_for;
965  DBUG_ENTER("stop_waiting_locked");
966 
967  DBUG_ASSERT(rc->waiter_count);
968  DBUG_ASSERT(rc->state == ACTIVE);
969  rc->waiter_count--;
970  thd->waiting_for= 0;
971  ret= unlock_lock_and_free_resource(thd, rc);
972  DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK);
973 }
974 
980 static int stop_waiting(WT_THD *thd)
981 {
982  int ret;
983  WT_RESOURCE *rc= thd->waiting_for;
984  DBUG_ENTER("stop_waiting");
985 
986  if (!rc)
987  DBUG_RETURN(WT_OK);
988  /*
989  nobody's trying to free the resource now,
990  as its waiter_count is guaranteed to be non-zero
991  */
992  rc_wrlock(rc);
993  ret= stop_waiting_locked(thd);
994  DBUG_RETURN(ret);
995 }
996 
1012  const WT_RESOURCE_ID *resid)
1013 {
1014  uint i;
1015  WT_RESOURCE *rc;
1016  DBUG_ENTER("wt_thd_will_wait_for");
1017 
1018  LF_REQUIRE_PINS(3);
1019 
1020  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%lu",
1021  thd->name, blocker->name, (ulong)resid->value));
1022 
1023  if (fix_thd_pins(thd))
1024  DBUG_RETURN(WT_DEADLOCK);
1025 
1026  if (thd->waiting_for == 0)
1027  {
1028  uint keylen;
1029  const void *key;
1030  /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */
1031  {
1032  key= resid;
1033  keylen= sizeof_WT_RESOURCE_ID;
1034  }
1035 
1036  DBUG_PRINT("wt", ("first blocker"));
1037 
1038 retry:
1039  while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0)
1040  {
1041  WT_RESOURCE tmp;
1042 
1043  DBUG_PRINT("wt", ("failed to find rc in hash, inserting"));
1044  memset(&tmp, 0, sizeof(tmp));
1045  tmp.id= *resid;
1046  tmp.state= ACTIVE;
1047 
1048  if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */
1049  DBUG_RETURN(WT_DEADLOCK);
1050  /*
1051  Two cases: either lf_hash_insert() failed - because another thread
1052  has just inserted a resource with the same id - and we need to retry.
1053  Or lf_hash_insert() succeeded, and then we need to repeat
1054  lf_hash_search() to find a real address of the newly inserted element.
1055  That is, we don't care what lf_hash_insert() has returned.
1056  And we need to repeat the loop anyway.
1057  */
1058  }
1059  if (rc == MY_ERRPTR)
1060  DBUG_RETURN(WT_DEADLOCK);
1061 
1062  DBUG_PRINT("wt", ("found in hash rc=%p", rc));
1063 
1064  rc_wrlock(rc);
1065  if (rc->state != ACTIVE)
1066  {
1067  DBUG_PRINT("wt", ("but it's not active, retrying"));
1068  /* Somebody has freed the element while we weren't looking */
1069  rc_unlock(rc);
1070  lf_hash_search_unpin(thd->pins);
1071  goto retry;
1072  }
1073 
1074  lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */
1075  thd->waiting_for= rc;
1076  rc->waiter_count++;
1077  thd->killed= 0;
1078  }
1079  else
1080  {
1081  DBUG_ASSERT(thd->waiting_for->id.type == resid->type);
1082  DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0);
1083  DBUG_PRINT("wt", ("adding another blocker"));
1084 
1085  /*
1086  we can safely access the resource here, it's in the hash as it has
1087  non-zero waiter_count
1088  */
1089  rc= thd->waiting_for;
1090  rc_wrlock(rc);
1091  DBUG_ASSERT(rc->waiter_count);
1092  DBUG_ASSERT(rc->state == ACTIVE);
1093 
1094  if (thd->killed)
1095  {
1096  stop_waiting_locked(thd);
1097  DBUG_RETURN(WT_DEADLOCK);
1098  }
1099  }
1100  /*
1101  Another thread could be waiting on this resource for this very 'blocker'.
1102  In this case we should not add it to the list for the second time.
1103  */
1104  for (i= 0; i < rc->owners.elements; i++)
1105  if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker)
1106  break;
1107  if (i >= rc->owners.elements)
1108  {
1109  if (push_dynamic(&blocker->my_resources, (void*)&rc))
1110  {
1111  stop_waiting_locked(thd);
1112  DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */
1113  }
1114  if (push_dynamic(&rc->owners, (void*)&blocker))
1115  {
1116  pop_dynamic(&blocker->my_resources);
1117  stop_waiting_locked(thd);
1118  DBUG_RETURN(WT_DEADLOCK);
1119  }
1120  }
1121  rc_unlock(rc);
1122 
1123  if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short) != WT_OK)
1124  {
1125  stop_waiting(thd);
1126  DBUG_RETURN(WT_DEADLOCK);
1127  }
1128  DBUG_RETURN(WT_OK);
1129 }
1130 
1140 {
1141  int ret= WT_TIMEOUT;
1142  struct timespec timeout;
1143  ulonglong before, after, starttime;
1144  WT_RESOURCE *rc= thd->waiting_for;
1145  DBUG_ENTER("wt_thd_cond_timedwait");
1146  DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc));
1147 
1148 #ifndef DBUG_OFF
1149  if (rc->cond_mutex)
1150  DBUG_ASSERT(rc->cond_mutex == mutex);
1151  else
1152  rc->cond_mutex= mutex;
1153  mysql_mutex_assert_owner(mutex);
1154 #endif
1155 
1156  before= starttime= my_getsystime();
1157 
1158 #ifdef __WIN__
1159  /*
1160  only for the sake of Windows we distinguish between
1161  'before' and 'starttime':
1162 
1163  my_getsystime() returns high-resolution value, that cannot be used for
1164  waiting (it doesn't follow system clock changes), but is good for time
1165  intervals.
1166 
1167  GetSystemTimeAsFileTime() follows system clock, but is low-resolution
1168  and will result in lousy intervals.
1169  */
1170  GetSystemTimeAsFileTime((PFILETIME)&starttime);
1171 #endif
1172 
1173  rc_wrlock(rc);
1174  if (rc->owners.elements == 0)
1175  ret= WT_OK;
1176  rc_unlock(rc);
1177 
1178  set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*1000ULL);
1179  if (ret == WT_TIMEOUT && !thd->killed)
1180  ret= mysql_cond_timedwait(&rc->cond, mutex, &timeout);
1181  if (ret == WT_TIMEOUT && !thd->killed)
1182  {
1183  int r= deadlock(thd, thd, 0, *thd->deadlock_search_depth_long);
1184  if (r == WT_FREE_TO_GO)
1185  ret= WT_OK;
1186  else if (r != WT_OK)
1187  ret= WT_DEADLOCK;
1188  else if (*thd->timeout_long > *thd->timeout_short)
1189  {
1190  set_timespec_time_nsec(timeout, starttime, (*thd->timeout_long)*1000ULL);
1191  if (!thd->killed)
1192  ret= mysql_cond_timedwait(&rc->cond, mutex, &timeout);
1193  }
1194  }
1195  after= my_getsystime();
1196  if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */
1197  ret= WT_DEADLOCK;
1198  increment_wait_stats(after-before, ret);
1199  if (ret == WT_OK)
1200  increment_success_stats();
1201  DBUG_RETURN(ret);
1202 }
1203 
1213 void wt_thd_release(WT_THD *thd, const WT_RESOURCE_ID *resid)
1214 {
1215  uint i;
1216  DBUG_ENTER("wt_thd_release");
1217 
1218  for (i= 0; i < thd->my_resources.elements; i++)
1219  {
1220  WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
1221  if (!resid || (resid->type->compare(&rc->id, resid) == 0))
1222  {
1223  uint j;
1224 
1225  rc_wrlock(rc);
1226  /*
1227  nobody's trying to free the resource now,
1228  as its owners[] array is not empty (at least thd must be there)
1229  */
1230  DBUG_ASSERT(rc->state == ACTIVE);
1231  for (j= 0; j < rc->owners.elements; j++)
1232  if (*dynamic_element(&rc->owners, j, WT_THD**) == thd)
1233  break;
1234  DBUG_ASSERT(j < rc->owners.elements);
1235  delete_dynamic_element(&rc->owners, j);
1236  if (rc->owners.elements == 0)
1237  {
1238  mysql_cond_broadcast(&rc->cond);
1239 #ifndef DBUG_OFF
1240  if (rc->cond_mutex)
1241  mysql_mutex_assert_owner(rc->cond_mutex);
1242 #endif
1243  }
1244  unlock_lock_and_free_resource(thd, rc);
1245  if (resid)
1246  {
1247  delete_dynamic_element(&thd->my_resources, i);
1248  DBUG_VOID_RETURN;
1249  }
1250  }
1251  }
1252  if (!resid)
1253  reset_dynamic(&thd->my_resources);
1254  DBUG_VOID_RETURN;
1255 }
1256