MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.c
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  * Hash table
4  *
5  * The hash function used here is by Bob Jenkins, 1996:
6  * <http://burtleburtle.net/bob/hash/doobs.html>
7  * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net.
8  * You may use this code any way you wish, private, educational,
9  * or commercial. It's free."
10  *
11  */
12 #include "config.h"
13 #include "memcached.h"
14 
15 /*
16  * Since the hash function does bit manipulation, it needs to know
17  * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
18  * are set in the configure script.
19  */
20 #ifdef WORDS_BIGENDIAN
21 # define HASH_LITTLE_ENDIAN 0
22 # define HASH_BIG_ENDIAN 1
23 #else
24 # define HASH_LITTLE_ENDIAN 1
25 # define HASH_BIG_ENDIAN 0
26 #endif
27 
28 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
29 
30 /*
31 -------------------------------------------------------------------------------
32 mix -- mix 3 32-bit values reversibly.
33 
34 This is reversible, so any information in (a,b,c) before mix() is
35 still in (a,b,c) after mix().
36 
37 If four pairs of (a,b,c) inputs are run through mix(), or through
38 mix() in reverse, there are at least 32 bits of the output that
39 are sometimes the same for one pair and different for another pair.
40 This was tested for:
41 * pairs that differed by one bit, by two bits, in any combination
42  of top bits of (a,b,c), or in any combination of bottom bits of
43  (a,b,c).
44 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
45  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
46  is commonly produced by subtraction) look like a single 1-bit
47  difference.
48 * the base values were pseudorandom, all zero but one bit set, or
49  all zero plus a counter that starts at zero.
50 
51 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
52 satisfy this are
53  4 6 8 16 19 4
54  9 15 3 18 27 15
55  14 9 3 7 17 3
56 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
57 for "differ" defined as + with a one-bit base and a two-bit delta. I
58 used http://burtleburtle.net/bob/hash/avalanche.html to choose
59 the operations, constants, and arrangements of the variables.
60 
61 This does not achieve avalanche. There are input bits of (a,b,c)
62 that fail to affect some output bits of (a,b,c), especially of a. The
63 most thoroughly mixed value is c, but it doesn't really even achieve
64 avalanche in c.
65 
66 This allows some parallelism. Read-after-writes are good at doubling
67 the number of bits affected, so the goal of mixing pulls in the opposite
68 direction as the goal of parallelism. I did what I could. Rotates
69 seem to cost as much as shifts on every machine I could lay my hands
70 on, and rotates are much kinder to the top and bottom bits, so I used
71 rotates.
72 -------------------------------------------------------------------------------
73 */
74 #define mix(a,b,c) \
75 { \
76  a -= c; a ^= rot(c, 4); c += b; \
77  b -= a; b ^= rot(a, 6); a += c; \
78  c -= b; c ^= rot(b, 8); b += a; \
79  a -= c; a ^= rot(c,16); c += b; \
80  b -= a; b ^= rot(a,19); a += c; \
81  c -= b; c ^= rot(b, 4); b += a; \
82 }
83 
84 /*
85 -------------------------------------------------------------------------------
86 final -- final mixing of 3 32-bit values (a,b,c) into c
87 
88 Pairs of (a,b,c) values differing in only a few bits will usually
89 produce values of c that look totally different. This was tested for
90 * pairs that differed by one bit, by two bits, in any combination
91  of top bits of (a,b,c), or in any combination of bottom bits of
92  (a,b,c).
93 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
94  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
95  is commonly produced by subtraction) look like a single 1-bit
96  difference.
97 * the base values were pseudorandom, all zero but one bit set, or
98  all zero plus a counter that starts at zero.
99 
100 These constants passed:
101  14 11 25 16 4 14 24
102  12 14 25 16 4 14 24
103 and these came close:
104  4 8 15 26 3 22 24
105  10 8 15 26 3 22 24
106  11 8 15 26 3 22 24
107 -------------------------------------------------------------------------------
108 */
109 #define final(a,b,c) \
110 { \
111  c ^= b; c -= rot(b,14); \
112  a ^= c; a -= rot(c,11); \
113  b ^= a; b -= rot(a,25); \
114  c ^= b; c -= rot(b,16); \
115  a ^= c; a -= rot(c,4); \
116  b ^= a; b -= rot(a,14); \
117  c ^= b; c -= rot(b,24); \
118 }
119 
120 #if HASH_LITTLE_ENDIAN == 1
121 uint32_t hash(
122  const void *key, /* the key to hash */
123  size_t length, /* length of the key */
124  const uint32_t initval) /* initval */
125 {
126  uint32_t a,b,c; /* internal state */
127  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
128 
129  /* Set up the internal state */
130  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
131 
132  u.ptr = key;
133  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
134  const uint32_t *k = key; /* read 32-bit chunks */
135 #ifdef VALGRIND
136  const uint8_t *k8;
137 #endif /* ifdef VALGRIND */
138 
139  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
140  while (length > 12)
141  {
142  a += k[0];
143  b += k[1];
144  c += k[2];
145  mix(a,b,c);
146  length -= 12;
147  k += 3;
148  }
149 
150  /*----------------------------- handle the last (probably partial) block */
151  /*
152  * "k[2]&0xffffff" actually reads beyond the end of the string, but
153  * then masks off the part it's not allowed to read. Because the
154  * string is aligned, the masked-off tail is in the same word as the
155  * rest of the string. Every machine with memory protection I've seen
156  * does it on word boundaries, so is OK with this. But VALGRIND will
157  * still catch it and complain. The masking trick does make the hash
158  * noticably faster for short strings (like English words).
159  */
160 #ifndef VALGRIND
161 
162  switch(length)
163  {
164  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
165  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
166  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
167  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
168  case 8 : b+=k[1]; a+=k[0]; break;
169  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
170  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
171  case 5 : b+=k[1]&0xff; a+=k[0]; break;
172  case 4 : a+=k[0]; break;
173  case 3 : a+=k[0]&0xffffff; break;
174  case 2 : a+=k[0]&0xffff; break;
175  case 1 : a+=k[0]&0xff; break;
176  case 0 : return c; /* zero length strings require no mixing */
177  }
178 
179 #else /* make valgrind happy */
180 
181  k8 = (const uint8_t *)k;
182  switch(length)
183  {
184  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
185  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
186  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
187  case 9 : c+=k8[8]; /* fall through */
188  case 8 : b+=k[1]; a+=k[0]; break;
189  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
190  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
191  case 5 : b+=k8[4]; /* fall through */
192  case 4 : a+=k[0]; break;
193  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
194  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
195  case 1 : a+=k8[0]; break;
196  case 0 : return c; /* zero length strings require no mixing */
197  }
198 
199 #endif /* !valgrind */
200 
201  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
202  const uint16_t *k = key; /* read 16-bit chunks */
203  const uint8_t *k8;
204 
205  /*--------------- all but last block: aligned reads and different mixing */
206  while (length > 12)
207  {
208  a += k[0] + (((uint32_t)k[1])<<16);
209  b += k[2] + (((uint32_t)k[3])<<16);
210  c += k[4] + (((uint32_t)k[5])<<16);
211  mix(a,b,c);
212  length -= 12;
213  k += 6;
214  }
215 
216  /*----------------------------- handle the last (probably partial) block */
217  k8 = (const uint8_t *)k;
218  switch(length)
219  {
220  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
221  b+=k[2]+(((uint32_t)k[3])<<16);
222  a+=k[0]+(((uint32_t)k[1])<<16);
223  break;
224  case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */
225  case 10: c+=k[4]; /* @fallthrough@ */
226  b+=k[2]+(((uint32_t)k[3])<<16);
227  a+=k[0]+(((uint32_t)k[1])<<16);
228  break;
229  case 9 : c+=k8[8]; /* @fallthrough */
230  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
231  a+=k[0]+(((uint32_t)k[1])<<16);
232  break;
233  case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */
234  case 6 : b+=k[2];
235  a+=k[0]+(((uint32_t)k[1])<<16);
236  break;
237  case 5 : b+=k8[4]; /* @fallthrough */
238  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
239  break;
240  case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */
241  case 2 : a+=k[0];
242  break;
243  case 1 : a+=k8[0];
244  break;
245  case 0 : return c; /* zero length strings require no mixing */
246  }
247 
248  } else { /* need to read the key one byte at a time */
249  const uint8_t *k = key;
250 
251  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
252  while (length > 12)
253  {
254  a += k[0];
255  a += ((uint32_t)k[1])<<8;
256  a += ((uint32_t)k[2])<<16;
257  a += ((uint32_t)k[3])<<24;
258  b += k[4];
259  b += ((uint32_t)k[5])<<8;
260  b += ((uint32_t)k[6])<<16;
261  b += ((uint32_t)k[7])<<24;
262  c += k[8];
263  c += ((uint32_t)k[9])<<8;
264  c += ((uint32_t)k[10])<<16;
265  c += ((uint32_t)k[11])<<24;
266  mix(a,b,c);
267  length -= 12;
268  k += 12;
269  }
270 
271  /*-------------------------------- last block: affect all 32 bits of (c) */
272  switch(length) /* all the case statements fall through */
273  {
274  case 12: c+=((uint32_t)k[11])<<24;
275  case 11: c+=((uint32_t)k[10])<<16;
276  case 10: c+=((uint32_t)k[9])<<8;
277  case 9 : c+=k[8];
278  case 8 : b+=((uint32_t)k[7])<<24;
279  case 7 : b+=((uint32_t)k[6])<<16;
280  case 6 : b+=((uint32_t)k[5])<<8;
281  case 5 : b+=k[4];
282  case 4 : a+=((uint32_t)k[3])<<24;
283  case 3 : a+=((uint32_t)k[2])<<16;
284  case 2 : a+=((uint32_t)k[1])<<8;
285  case 1 : a+=k[0];
286  break;
287  case 0 : return c; /* zero length strings require no mixing */
288  }
289  }
290 
291  final(a,b,c);
292  return c; /* zero length strings require no mixing */
293 }
294 
295 #elif HASH_BIG_ENDIAN == 1
296 /*
297  * hashbig():
298  * This is the same as hashword() on big-endian machines. It is different
299  * from hashlittle() on all machines. hashbig() takes advantage of
300  * big-endian byte ordering.
301  */
302 uint32_t hash( const void *key, size_t length, const uint32_t initval)
303 {
304  uint32_t a,b,c;
305  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
306 
307  /* Set up the internal state */
308  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
309 
310  u.ptr = key;
311  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
312  const uint32_t *k = key; /* read 32-bit chunks */
313 #ifdef VALGRIND
314  const uint8_t *k8;
315 #endif /* ifdef VALGRIND */
316 
317  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
318  while (length > 12)
319  {
320  a += k[0];
321  b += k[1];
322  c += k[2];
323  mix(a,b,c);
324  length -= 12;
325  k += 3;
326  }
327 
328  /*----------------------------- handle the last (probably partial) block */
329  /*
330  * "k[2]<<8" actually reads beyond the end of the string, but
331  * then shifts out the part it's not allowed to read. Because the
332  * string is aligned, the illegal read is in the same word as the
333  * rest of the string. Every machine with memory protection I've seen
334  * does it on word boundaries, so is OK with this. But VALGRIND will
335  * still catch it and complain. The masking trick does make the hash
336  * noticably faster for short strings (like English words).
337  */
338 #ifndef VALGRIND
339 
340  switch(length)
341  {
342  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
343  case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
344  case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
345  case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
346  case 8 : b+=k[1]; a+=k[0]; break;
347  case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
348  case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
349  case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
350  case 4 : a+=k[0]; break;
351  case 3 : a+=k[0]&0xffffff00; break;
352  case 2 : a+=k[0]&0xffff0000; break;
353  case 1 : a+=k[0]&0xff000000; break;
354  case 0 : return c; /* zero length strings require no mixing */
355  }
356 
357 #else /* make valgrind happy */
358 
359  k8 = (const uint8_t *)k;
360  switch(length) /* all the case statements fall through */
361  {
362  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
363  case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
364  case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
365  case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
366  case 8 : b+=k[1]; a+=k[0]; break;
367  case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
368  case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
369  case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
370  case 4 : a+=k[0]; break;
371  case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
372  case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
373  case 1 : a+=((uint32_t)k8[0])<<24; break;
374  case 0 : return c;
375  }
376 
377 #endif /* !VALGRIND */
378 
379  } else { /* need to read the key one byte at a time */
380  const uint8_t *k = key;
381 
382  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
383  while (length > 12)
384  {
385  a += ((uint32_t)k[0])<<24;
386  a += ((uint32_t)k[1])<<16;
387  a += ((uint32_t)k[2])<<8;
388  a += ((uint32_t)k[3]);
389  b += ((uint32_t)k[4])<<24;
390  b += ((uint32_t)k[5])<<16;
391  b += ((uint32_t)k[6])<<8;
392  b += ((uint32_t)k[7]);
393  c += ((uint32_t)k[8])<<24;
394  c += ((uint32_t)k[9])<<16;
395  c += ((uint32_t)k[10])<<8;
396  c += ((uint32_t)k[11]);
397  mix(a,b,c);
398  length -= 12;
399  k += 12;
400  }
401 
402  /*-------------------------------- last block: affect all 32 bits of (c) */
403  switch(length) /* all the case statements fall through */
404  {
405  case 12: c+=k[11];
406  case 11: c+=((uint32_t)k[10])<<8;
407  case 10: c+=((uint32_t)k[9])<<16;
408  case 9 : c+=((uint32_t)k[8])<<24;
409  case 8 : b+=k[7];
410  case 7 : b+=((uint32_t)k[6])<<8;
411  case 6 : b+=((uint32_t)k[5])<<16;
412  case 5 : b+=((uint32_t)k[4])<<24;
413  case 4 : a+=k[3];
414  case 3 : a+=((uint32_t)k[2])<<8;
415  case 2 : a+=((uint32_t)k[1])<<16;
416  case 1 : a+=((uint32_t)k[0])<<24;
417  break;
418  case 0 : return c;
419  }
420  }
421 
422  final(a,b,c);
423  return c;
424 }
425 #else /* HASH_XXX_ENDIAN == 1 */
426 #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
427 #endif /* HASH_XXX_ENDIAN == 1 */