MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
my_rdtsc.c
1 /* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /*
17  rdtsc3 -- multi-platform timer code
18  pgulutzan@mysql.com, 2005-08-29
19  modified 2008-11-02
20 
21  Functions:
22 
23  my_timer_cycles ulonglong cycles
24  my_timer_nanoseconds ulonglong nanoseconds
25  my_timer_microseconds ulonglong "microseconds"
26  my_timer_milliseconds ulonglong milliseconds
27  my_timer_ticks ulonglong ticks
28  my_timer_init initialization / test
29 
30  We'll call the first 5 functions (the ones that return
31  a ulonglong) "my_timer_xxx" functions.
32  Each my_timer_xxx function returns a 64-bit timing value
33  since an arbitrary 'epoch' start. Since the only purpose
34  is to determine elapsed times, wall-clock time-of-day
35  is not known and not relevant.
36 
37  The my_timer_init function is necessary for initializing.
38  It returns information (underlying routine name,
39  frequency, resolution, overhead) about all my_timer_xxx
40  functions. A program should call my_timer_init once,
41  use the information to decide what my_timer_xxx function
42  to use, and subsequently call that function by function
43  pointer.
44 
45  A typical use would be:
46  my_timer_init() ... once, at program start
47  ...
48  time1= my_timer_xxx() ... time before start
49  [code that's timed]
50  time2= my_timer_xxx() ... time after end
51  elapsed_time= (time2 - time1) - overhead
52 */
53 
54 #include "my_global.h"
55 #include "my_rdtsc.h"
56 
57 #if defined(_WIN32)
58 #include <stdio.h>
59 #include "windows.h"
60 #else
61 #include <stdio.h>
62 #endif
63 
64 #if !defined(_WIN32)
65 #if TIME_WITH_SYS_TIME
66 #include <sys/time.h>
67 #include <time.h> /* for clock_gettime */
68 #else
69 #if HAVE_SYS_TIME_H
70 #include <sys/time.h>
71 #elif defined(HAVE_TIME_H)
72 #include <time.h>
73 #endif
74 #endif
75 #endif
76 
77 #if defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
78 #include <asm/msr.h> /* for rdtscll */
79 #endif
80 
81 #if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
82 #include <sys/timeb.h> /* for ftime */
83 #endif
84 
85 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
86 #include <sys/times.h> /* for times */
87 #endif
88 
89 #if defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
90 #include <ia64intrin.h> /* for __GetReg */
91 #endif
92 
93 #if defined(__APPLE__) && defined(__MACH__)
94 #include <mach/mach_time.h>
95 #endif
96 
97 #if defined(__SUNPRO_CC) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
98 extern "C" ulonglong my_timer_cycles_il_sparc64();
99 #elif defined(__SUNPRO_CC) && defined(_ILP32) && !defined(__SunOS_5_7)
100 extern "C" ulonglong my_timer_cycles_il_sparc32();
101 #elif defined(__SUNPRO_CC) && defined(__i386) && defined(_ILP32)
102 extern "C" ulonglong my_timer_cycles_il_i386();
103 #elif defined(__SUNPRO_CC) && defined(__x86_64) && defined(_LP64)
104 extern "C" ulonglong my_timer_cycles_il_x86_64();
105 #elif defined(__SUNPRO_C) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
106 ulonglong my_timer_cycles_il_sparc64();
107 #elif defined(__SUNPRO_C) && defined(_ILP32) && !defined(__SunOS_5_7)
108 ulonglong my_timer_cycles_il_sparc32();
109 #elif defined(__SUNPRO_C) && defined(__i386) && defined(_ILP32)
110 ulonglong my_timer_cycles_il_i386();
111 #elif defined(__SUNPRO_C) && defined(__x86_64) && defined(_LP64)
112 ulonglong my_timer_cycles_il_x86_64();
113 #endif
114 
115 #if defined(__INTEL_COMPILER)
116 /*
117  icc warning #1011 is:
118  missing return statement at end of non-void function
119 */
120 #pragma warning (disable:1011)
121 #endif
122 
123 /*
124  For cycles, we depend on RDTSC for x86 platforms,
125  or on time buffer (which is not really a cycle count
126  but a separate counter with less than nanosecond
127  resolution) for most PowerPC platforms, or on
128  gethrtime which is okay for hpux and solaris, or on
129  clock_gettime(CLOCK_SGI_CYCLE) for Irix platforms,
130  or on read_real_time for aix platforms. There is
131  nothing for Alpha platforms, they would be tricky.
132 */
133 
134 ulonglong my_timer_cycles(void)
135 {
136 #if defined(__GNUC__) && defined(__i386__)
137  /* This works much better if compiled with "gcc -O3". */
138  ulonglong result;
139  __asm__ __volatile__ ("rdtsc" : "=A" (result));
140  return result;
141 #elif defined(__SUNPRO_C) && defined(__i386)
142  __asm("rdtsc");
143 #elif defined(__GNUC__) && defined(__x86_64__)
144  ulonglong result;
145  __asm__ __volatile__ ("rdtsc\n\t" \
146  "shlq $32,%%rdx\n\t" \
147  "orq %%rdx,%%rax"
148  : "=a" (result) :: "%edx");
149  return result;
150 #elif defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
151  {
152  ulonglong result;
153  rdtscll(result);
154  return result;
155  }
156 #elif defined(_WIN32) && defined(_M_IX86)
157  __asm {rdtsc};
158 #elif defined(_WIN64) && defined(_M_X64)
159  /* For 64-bit Windows: unsigned __int64 __rdtsc(); */
160  return __rdtsc();
161 #elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
162  return (ulonglong) __getReg(_IA64_REG_AR_ITC); /* (3116) */
163 #elif defined(__GNUC__) && defined(__ia64__)
164  {
165  ulonglong result;
166  __asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
167  return result;
168  }
169 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (defined(__64BIT__) || defined(_ARCH_PPC64))
170  {
171  ulonglong result;
172  __asm __volatile__ ("mftb %0" : "=r" (result));
173  return result;
174  }
175 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
176  {
177  /*
178  mftbu means "move from time-buffer-upper to result".
179  The loop is saying: x1=upper, x2=lower, x3=upper,
180  if x1!=x3 there was an overflow so repeat.
181  */
182  unsigned int x1, x2, x3;
183  ulonglong result;
184  for (;;)
185  {
186  __asm __volatile__ ( "mftbu %0" : "=r"(x1) );
187  __asm __volatile__ ( "mftb %0" : "=r"(x2) );
188  __asm __volatile__ ( "mftbu %0" : "=r"(x3) );
189  if (x1 == x3) break;
190  }
191  result = x1;
192  return ( result << 32 ) | x2;
193  }
194 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
195  return (my_timer_cycles_il_sparc64());
196 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(_ILP32) && !defined(__SunOS_5_7)
197  return (my_timer_cycles_il_sparc32());
198 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
199  /* This is probably redundant for __SUNPRO_C. */
200  return (my_timer_cycles_il_i386());
201 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
202  return (my_timer_cycles_il_x86_64());
203 #elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64) && (__GNUC__>2)
204  {
205  ulonglong result;
206  __asm __volatile__ ("rd %%tick,%0" : "=r" (result));
207  return result;
208  }
209 #elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64) && (__GNUC__>2)
210  {
211  union {
212  ulonglong wholeresult;
213  struct {
214  ulong high;
215  ulong low;
216  } splitresult;
217  } result;
218  __asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
219  return result.wholeresult;
220  }
221 #elif defined(__sgi) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE)
222  {
223  struct timespec tp;
224  clock_gettime(CLOCK_SGI_CYCLE, &tp);
225  return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
226  }
227 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
228  /* gethrtime may appear as either cycle or nanosecond counter */
229  return (ulonglong) gethrtime();
230 #else
231  return 0;
232 #endif
233 }
234 
235 #if defined(__INTEL_COMPILER)
236 /* re-enable warning#1011 which was only for my_timer_cycles() */
237 /* There may be an icc bug which means we must leave disabled. */
238 #pragma warning (default:1011)
239 #endif
240 
241 /*
242  For nanoseconds, most platforms have nothing available that
243  (a) doesn't require bringing in a 40-kb librt.so library
244  (b) really has nanosecond resolution.
245 */
246 
247 ulonglong my_timer_nanoseconds(void)
248 {
249 #if defined(HAVE_READ_REAL_TIME)
250  {
251  timebasestruct_t tr;
252  read_real_time(&tr, TIMEBASE_SZ);
253  return (ulonglong) tr.tb_high * 1000000000 + (ulonglong) tr.tb_low;
254  }
255 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
256  /* SunOS 5.10+, Solaris, HP-UX: hrtime_t gethrtime(void) */
257  return (ulonglong) gethrtime();
258 #elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_REALTIME)
259  {
260  struct timespec tp;
261  clock_gettime(CLOCK_REALTIME, &tp);
262  return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
263  }
264 #elif defined(__APPLE__) && defined(__MACH__)
265  {
266  ulonglong tm;
267  static mach_timebase_info_data_t timebase_info= {0,0};
268  if (timebase_info.denom == 0)
269  (void) mach_timebase_info(&timebase_info);
270  tm= mach_absolute_time();
271  return (tm * timebase_info.numer) / timebase_info.denom;
272  }
273 #else
274  return 0;
275 #endif
276 }
277 
278 /*
279  For microseconds, gettimeofday() is available on
280  almost all platforms. On Windows we use
281  QueryPerformanceCounter which will usually tick over
282  3.5 million times per second, and we don't throw
283  away the extra precision. (On Windows Server 2003
284  the frequency is same as the cycle frequency.)
285 */
286 
287 ulonglong my_timer_microseconds(void)
288 {
289 #if defined(HAVE_GETTIMEOFDAY)
290  {
291  static ulonglong last_value= 0;
292  struct timeval tv;
293  if (gettimeofday(&tv, NULL) == 0)
294  last_value= (ulonglong) tv.tv_sec * 1000000 + (ulonglong) tv.tv_usec;
295  else
296  {
297  /*
298  There are reports that gettimeofday(2) can have intermittent failures
299  on some platform, see for example Bug#36819.
300  We are not trying again or looping, just returning the best value possible
301  under the circumstances ...
302  */
303  last_value++;
304  }
305  return last_value;
306  }
307 #elif defined(_WIN32)
308  {
309  /* QueryPerformanceCounter usually works with about 1/3 microsecond. */
310  LARGE_INTEGER t_cnt;
311 
312  QueryPerformanceCounter(&t_cnt);
313  return (ulonglong) t_cnt.QuadPart;
314  }
315 #else
316  return 0;
317 #endif
318 }
319 
320 /*
321  For milliseconds, we use ftime() if it's supported
322  or time()*1000 if it's not. With modern versions of
323  Windows and with HP Itanium, resolution is 10-15
324  milliseconds.
325 */
326 
327 ulonglong my_timer_milliseconds(void)
328 {
329 #if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
330  /* ftime() is obsolete but maybe the platform is old */
331  struct timeb ft;
332  ftime(&ft);
333  return (ulonglong)ft.time * 1000 + (ulonglong)ft.millitm;
334 #elif defined(HAVE_TIME)
335  return (ulonglong) time(NULL) * 1000;
336 #elif defined(_WIN32)
337  FILETIME ft;
338  GetSystemTimeAsFileTime( &ft );
339  return ((ulonglong)ft.dwLowDateTime +
340  (((ulonglong)ft.dwHighDateTime) << 32))/10000;
341 #else
342  return 0;
343 #endif
344 }
345 
346 /*
347  For ticks, which we handle with times(), the frequency
348  is usually 100/second and the overhead is surprisingly
349  bad, sometimes even worse than gettimeofday's overhead.
350 */
351 
352 ulonglong my_timer_ticks(void)
353 {
354 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
355  {
356  struct tms times_buf;
357  return (ulonglong) times(&times_buf);
358  }
359 #elif defined(_WIN32)
360  return (ulonglong) GetTickCount();
361 #else
362  return 0;
363 #endif
364 }
365 
366 /*
367  The my_timer_init() function and its sub-functions
368  have several loops which call timers. If there's
369  something wrong with a timer -- which has never
370  happened in tests -- we want the loop to end after
371  an arbitrary number of iterations, and my_timer_info
372  will show a discouraging result. The arbitrary
373  number is 1,000,000.
374 */
375 #define MY_TIMER_ITERATIONS 1000000
376 
377 /*
378  Calculate overhead. Called from my_timer_init().
379  Usually best_timer_overhead = cycles.overhead or
380  nanoseconds.overhead, so returned amount is in
381  cycles or nanoseconds. We repeat the calculation
382  ten times, so that we can disregard effects of
383  caching or interrupts. Result is quite consistent
384  for cycles, at least. But remember it's a minimum.
385 */
386 
387 static void my_timer_init_overhead(ulonglong *overhead,
388  ulonglong (*cycle_timer)(void),
389  ulonglong (*this_timer)(void),
390  ulonglong best_timer_overhead)
391 {
392  ulonglong time1, time2;
393  int i;
394 
395  /* *overhead, least of 20 calculations - cycles.overhead */
396  for (i= 0, *overhead= 1000000000; i < 20; ++i)
397  {
398  time1= cycle_timer();
399  this_timer(); /* rather than 'time_tmp= timer();' */
400  time2= cycle_timer() - time1;
401  if (*overhead > time2)
402  *overhead= time2;
403  }
404  *overhead-= best_timer_overhead;
405 }
406 
407 /*
408  Calculate Resolution. Called from my_timer_init().
409  If a timer goes up by jumps, e.g. 1050, 1075, 1100, ...
410  then the best resolution is the minimum jump, e.g. 25.
411  If it's always divisible by 1000 then it's just a
412  result of multiplication of a lower-precision timer
413  result, e.g. nanoseconds are often microseconds * 1000.
414  If the minimum jump is less than an arbitrary passed
415  figure (a guess based on maximum overhead * 2), ignore.
416  Usually we end up with nanoseconds = 1 because it's too
417  hard to detect anything <= 100 nanoseconds.
418  Often GetTickCount() has resolution = 15.
419  We don't check with ticks because they take too long.
420 */
421 static ulonglong my_timer_init_resolution(ulonglong (*this_timer)(void),
422  ulonglong overhead_times_2)
423 {
424  ulonglong time1, time2;
425  ulonglong best_jump;
426  int i, jumps, divisible_by_1000, divisible_by_1000000;
427 
428  divisible_by_1000= divisible_by_1000000= 0;
429  best_jump= 1000000;
430  for (i= jumps= 0; jumps < 3 && i < MY_TIMER_ITERATIONS * 10; ++i)
431  {
432  time1= this_timer();
433  time2= this_timer();
434  time2-= time1;
435  if (time2)
436  {
437  ++jumps;
438  if (!(time2 % 1000))
439  {
440  ++divisible_by_1000;
441  if (!(time2 % 1000000))
442  ++divisible_by_1000000;
443  }
444  if (best_jump > time2)
445  best_jump= time2;
446  /* For milliseconds, one jump is enough. */
447  if (overhead_times_2 == 0)
448  break;
449  }
450  }
451  if (jumps == 3)
452  {
453  if (jumps == divisible_by_1000000)
454  return 1000000;
455  if (jumps == divisible_by_1000)
456  return 1000;
457  }
458  if (best_jump > overhead_times_2)
459  return best_jump;
460  return 1;
461 }
462 
463 /*
464  Calculate cycle frequency by seeing how many cycles pass
465  in a 200-microsecond period. I tried with 10-microsecond
466  periods originally, and the result was often very wrong.
467 */
468 
469 static ulonglong my_timer_init_frequency(MY_TIMER_INFO *mti)
470 {
471  int i;
472  ulonglong time1, time2, time3, time4;
473  time1= my_timer_cycles();
474  time2= my_timer_microseconds();
475  time3= time2; /* Avoids a Microsoft/IBM compiler warning */
476  for (i= 0; i < MY_TIMER_ITERATIONS; ++i)
477  {
478  time3= my_timer_microseconds();
479  if (time3 - time2 > 200) break;
480  }
481  time4= my_timer_cycles() - mti->cycles.overhead;
482  time4-= mti->microseconds.overhead;
483  return (mti->microseconds.frequency * (time4 - time1)) / (time3 - time2);
484 }
485 
486 /*
487  Call my_timer_init before the first call to my_timer_xxx().
488  If something must be initialized, it happens here.
489  Set: what routine is being used e.g. "asm_x86"
490  Set: function, overhead, actual frequency, resolution.
491 */
492 
493 void my_timer_init(MY_TIMER_INFO *mti)
494 {
495  ulonglong (*best_timer)(void);
496  ulonglong best_timer_overhead;
497  ulonglong time1, time2;
498  int i;
499 
500  /* cycles */
501  mti->cycles.frequency= 1000000000;
502 #if defined(__GNUC__) && defined(__i386__)
503  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
504 #elif defined(__SUNPRO_C) && defined(__i386)
505  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
506 #elif defined(__GNUC__) && defined(__x86_64__)
507  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_64;
508 #elif defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
509  mti->cycles.routine= MY_TIMER_ROUTINE_RDTSCLL;
510 #elif defined(_WIN32) && defined(_M_IX86)
511  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_WIN;
512 #elif defined(_WIN64) && defined(_M_X64)
513  mti->cycles.routine= MY_TIMER_ROUTINE_RDTSC;
514 #elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
515  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_IA64;
516 #elif defined(__GNUC__) && defined(__ia64__)
517  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_IA64;
518 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (defined(__64BIT__) || defined(_ARCH_PPC64))
519  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC64;
520 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
521  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC;
522 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
523  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64;
524 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(_ILP32) && !defined(__SunOS_5_7)
525  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32;
526 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
527  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_I386;
528 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
529  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64;
530 #elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64) && (__GNUC__>2)
531  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC64;
532 #elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64) && (__GNUC__>2)
533  mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC32;
534 #elif defined(__sgi) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE)
535  mti->cycles.routine= MY_TIMER_ROUTINE_SGI_CYCLE;
536 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
537  mti->cycles.routine= MY_TIMER_ROUTINE_GETHRTIME;
538 #else
539  mti->cycles.routine= 0;
540 #endif
541 
542  if (!mti->cycles.routine || !my_timer_cycles())
543  {
544  mti->cycles.routine= 0;
545  mti->cycles.resolution= 0;
546  mti->cycles.frequency= 0;
547  mti->cycles.overhead= 0;
548  }
549 
550  /* nanoseconds */
551  mti->nanoseconds.frequency= 1000000000; /* initial assumption */
552 #if defined(HAVE_READ_REAL_TIME)
553  mti->nanoseconds.routine= MY_TIMER_ROUTINE_READ_REAL_TIME;
554 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
555  mti->nanoseconds.routine= MY_TIMER_ROUTINE_GETHRTIME;
556 #elif defined(HAVE_CLOCK_GETTIME)
557  mti->nanoseconds.routine= MY_TIMER_ROUTINE_CLOCK_GETTIME;
558 #elif defined(__APPLE__) && defined(__MACH__)
559  mti->nanoseconds.routine= MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME;
560 #else
561  mti->nanoseconds.routine= 0;
562 #endif
563  if (!mti->nanoseconds.routine || !my_timer_nanoseconds())
564  {
565  mti->nanoseconds.routine= 0;
566  mti->nanoseconds.resolution= 0;
567  mti->nanoseconds.frequency= 0;
568  mti->nanoseconds.overhead= 0;
569  }
570 
571  /* microseconds */
572  mti->microseconds.frequency= 1000000; /* initial assumption */
573 #if defined(HAVE_GETTIMEOFDAY)
574  mti->microseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
575 #elif defined(_WIN32)
576  {
577  LARGE_INTEGER li;
578  /* Windows: typical frequency = 3579545, actually 1/3 microsecond. */
579  if (!QueryPerformanceFrequency(&li))
580  mti->microseconds.routine= 0;
581  else
582  {
583  mti->microseconds.frequency= li.QuadPart;
584  mti->microseconds.routine= MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER;
585  }
586  }
587 #else
588  mti->microseconds.routine= 0;
589 #endif
590  if (!mti->microseconds.routine || !my_timer_microseconds())
591  {
592  mti->microseconds.routine= 0;
593  mti->microseconds.resolution= 0;
594  mti->microseconds.frequency= 0;
595  mti->microseconds.overhead= 0;
596  }
597 
598  /* milliseconds */
599  mti->milliseconds.frequency= 1000; /* initial assumption */
600 #if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
601  mti->milliseconds.routine= MY_TIMER_ROUTINE_FTIME;
602 #elif defined(_WIN32)
603  mti->milliseconds.routine= MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME;
604 #elif defined(HAVE_TIME)
605  mti->milliseconds.routine= MY_TIMER_ROUTINE_TIME;
606 #else
607  mti->milliseconds.routine= 0;
608 #endif
609  if (!mti->milliseconds.routine || !my_timer_milliseconds())
610  {
611  mti->milliseconds.routine= 0;
612  mti->milliseconds.resolution= 0;
613  mti->milliseconds.frequency= 0;
614  mti->milliseconds.overhead= 0;
615  }
616 
617  /* ticks */
618  mti->ticks.frequency= 100; /* permanent assumption */
619 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
620  mti->ticks.routine= MY_TIMER_ROUTINE_TIMES;
621 #elif defined(_WIN32)
622  mti->ticks.routine= MY_TIMER_ROUTINE_GETTICKCOUNT;
623 #else
624  mti->ticks.routine= 0;
625 #endif
626  if (!mti->ticks.routine || !my_timer_ticks())
627  {
628  mti->ticks.routine= 0;
629  mti->ticks.resolution= 0;
630  mti->ticks.frequency= 0;
631  mti->ticks.overhead= 0;
632  }
633 
634  /*
635  Calculate overhead in terms of the timer that
636  gives the best resolution: cycles or nanoseconds.
637  I doubt it ever will be as bad as microseconds.
638  */
639  if (mti->cycles.routine)
640  best_timer= &my_timer_cycles;
641  else
642  {
643  if (mti->nanoseconds.routine)
644  {
645  best_timer= &my_timer_nanoseconds;
646  }
647  else
648  best_timer= &my_timer_microseconds;
649  }
650 
651  /* best_timer_overhead = least of 20 calculations */
652  for (i= 0, best_timer_overhead= 1000000000; i < 20; ++i)
653  {
654  time1= best_timer();
655  time2= best_timer() - time1;
656  if (best_timer_overhead > time2)
657  best_timer_overhead= time2;
658  }
659  if (mti->cycles.routine)
660  my_timer_init_overhead(&mti->cycles.overhead,
661  best_timer,
662  &my_timer_cycles,
663  best_timer_overhead);
664  if (mti->nanoseconds.routine)
665  my_timer_init_overhead(&mti->nanoseconds.overhead,
666  best_timer,
667  &my_timer_nanoseconds,
668  best_timer_overhead);
669  if (mti->microseconds.routine)
670  my_timer_init_overhead(&mti->microseconds.overhead,
671  best_timer,
672  &my_timer_microseconds,
673  best_timer_overhead);
674  if (mti->milliseconds.routine)
675  my_timer_init_overhead(&mti->milliseconds.overhead,
676  best_timer,
677  &my_timer_milliseconds,
678  best_timer_overhead);
679  if (mti->ticks.routine)
680  my_timer_init_overhead(&mti->ticks.overhead,
681  best_timer,
682  &my_timer_ticks,
683  best_timer_overhead);
684 
685 /*
686  Calculate resolution for nanoseconds or microseconds
687  or milliseconds, by seeing if it's always divisible
688  by 1000, and by noticing how much jumping occurs.
689  For ticks, just assume the resolution is 1.
690 */
691  if (mti->cycles.routine)
692  mti->cycles.resolution= 1;
693  if (mti->nanoseconds.routine)
694  mti->nanoseconds.resolution=
695  my_timer_init_resolution(&my_timer_nanoseconds, 20000);
696  if (mti->microseconds.routine)
698  my_timer_init_resolution(&my_timer_microseconds, 20);
699  if (mti->milliseconds.routine)
700  {
701  if (mti->milliseconds.routine == MY_TIMER_ROUTINE_TIME)
702  mti->milliseconds.resolution= 1000;
703  else
705  my_timer_init_resolution(&my_timer_milliseconds, 0);
706  }
707  if (mti->ticks.routine)
708  mti->ticks.resolution= 1;
709 
710 /*
711  Calculate cycles frequency,
712  if we have both a cycles routine and a microseconds routine.
713  In tests, this usually results in a figure within 2% of
714  what "cat /proc/cpuinfo" says.
715  If the microseconds routine is QueryPerformanceCounter
716  (i.e. it's Windows), and the microseconds frequency is >
717  500,000,000 (i.e. it's Windows Server so it uses RDTSC)
718  and the microseconds resolution is > 100 (i.e. dreadful),
719  then calculate cycles frequency = microseconds frequency.
720 */
721  if (mti->cycles.routine
722  && mti->microseconds.routine)
723  {
724  if (mti->microseconds.routine ==
725  MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER
726  && mti->microseconds.frequency > 500000000
727  && mti->microseconds.resolution > 100)
729  else
730  {
731  ulonglong time1, time2;
732  time1= my_timer_init_frequency(mti);
733  /* Repeat once in case there was an interruption. */
734  time2= my_timer_init_frequency(mti);
735  if (time1 < time2) mti->cycles.frequency= time1;
736  else mti->cycles.frequency= time2;
737  }
738  }
739 
740 /*
741  Calculate milliseconds frequency =
742  (cycles-frequency/#-of-cycles) * #-of-milliseconds,
743  if we have both a milliseconds routine and a cycles
744  routine.
745  This will be inaccurate if milliseconds resolution > 1.
746  This is probably only useful when testing new platforms.
747 */
748  if (mti->milliseconds.routine
749  && mti->milliseconds.resolution < 1000
750  && mti->microseconds.routine
751  && mti->cycles.routine)
752  {
753  int i;
754  ulonglong time1, time2, time3, time4;
755  time1= my_timer_cycles();
756  time2= my_timer_milliseconds();
757  time3= time2; /* Avoids a Microsoft/IBM compiler warning */
758  for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
759  {
760  time3= my_timer_milliseconds();
761  if (time3 - time2 > 10) break;
762  }
763  time4= my_timer_cycles();
764  mti->milliseconds.frequency=
765  (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
766  }
767 
768 /*
769  Calculate ticks.frequency =
770  (cycles-frequency/#-of-cycles * #-of-ticks,
771  if we have both a ticks routine and a cycles
772  routine,
773  This is probably only useful when testing new platforms.
774 */
775  if (mti->ticks.routine
776  && mti->microseconds.routine
777  && mti->cycles.routine)
778  {
779  int i;
780  ulonglong time1, time2, time3, time4;
781  time1= my_timer_cycles();
782  time2= my_timer_ticks();
783  time3= time2; /* Avoids a Microsoft/IBM compiler warning */
784  for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
785  {
786  time3= my_timer_ticks();
787  if (time3 - time2 > 10) break;
788  }
789  time4= my_timer_cycles();
790  mti->ticks.frequency=
791  (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
792  }
793 }
794 
795 /*
796  Additional Comments
797  -------------------
798 
799  This is for timing, i.e. finding out how long a piece of code
800  takes. If you want time of day matching a wall clock, the
801  my_timer_xxx functions won't help you.
802 
803  The best timer is the one with highest frequency, lowest
804  overhead, and resolution=1. The my_timer_info() routine will tell
805  you at runtime which timer that is. Usually it will be
806  my_timer_cycles() but be aware that, although it's best,
807  it has possible flaws and dangers. Depending on platform:
808  - The frequency might change. We don't test for this. It
809  happens on laptops for power saving, and on blade servers
810  for avoiding overheating.
811  - The overhead that my_timer_init() returns is the minimum.
812  In fact it could be slightly greater because of caching or
813  because you call the routine by address, as recommended.
814  It could be hugely greater if there's an interrupt.
815  - The x86 cycle counter, RDTSC doesn't "serialize". That is,
816  if there is out-of-order execution, rdtsc might be processed
817  after an instruction that logically follows it.
818  (We could force serialization, but that would be slower.)
819  - It is possible to set a flag which renders RDTSC
820  inoperative. Somebody responsible for the kernel
821  of the operating system would have to make this
822  decision. For the platforms we've tested with, there's
823  no such problem.
824  - With a multi-processor arrangement, it's possible
825  to get the cycle count from one processor in
826  thread X, and the cycle count from another processor
827  in thread Y. They may not always be in synch.
828  - You can't depend on a cycle counter being available for
829  all platforms. On Alphas, the
830  cycle counter is only 32-bit, so it would overflow quickly,
831  so we don't bother with it. On platforms that we haven't
832  tested, there might be some if/endif combination that we
833  didn't expect, or some assembler routine that we didn't
834  supply.
835 
836  The recommended way to use the timer routines is:
837  1. Somewhere near the beginning of the program, call
838  my_timer_init(). This should only be necessary once,
839  although you can call it again if you think that the
840  frequency has changed.
841  2. Determine the best timer based on frequency, resolution,
842  overhead -- all things that my_timer_init() returns.
843  Preserve the address of the timer and the my_timer_into
844  results in an easily-accessible place.
845  3. Instrument the code section that you're monitoring, thus:
846  time1= my_timer_xxx();
847  Instrumented code;
848  time2= my_timer_xxx();
849  elapsed_time= (time2 - time1) - overhead;
850  If the timer is always on, then overhead is always there,
851  so don't subtract it.
852  4. Save the elapsed time, or add it to a totaller.
853  5. When all timing processes are complete, transfer the
854  saved / totalled elapsed time to permanent storage.
855  Optionally you can convert cycles to microseconds at
856  this point. (Don't do so every time you calculate
857  elapsed_time! That would waste time and lose precision!)
858  For converting cycles to microseconds, use the frequency
859  that my_timer_init() returns. You'll also need to convert
860  if the my_timer_microseconds() function is the Windows
861  function QueryPerformanceCounter(), since that's sometimes
862  a counter with precision slightly better than microseconds.
863 
864  Since we recommend calls by function pointer, we supply
865  no inline functions.
866 
867  Some comments on the many candidate routines for timing ...
868 
869  clock() -- We don't use because it would overflow frequently.
870 
871  clock_gettime() -- In tests, clock_gettime often had
872  resolution = 1000.
873 
874  ftime() -- A "man ftime" says: "This function is obsolete.
875  Don't use it." On every platform that we tested, if ftime()
876  was available, then so was gettimeofday(), and gettimeofday()
877  overhead was always at least as good as ftime() overhead.
878 
879  gettimeofday() -- available on most platforms, though not
880  on Windows. There is a hardware timer (sometimes a Programmable
881  Interrupt Timer or "PIT") (sometimes a "HPET") used for
882  interrupt generation. When it interrupts (a "tick" or "jiffy",
883  typically 1 centisecond) it sets xtime. For gettimeofday, a
884  Linux kernel routine usually gets xtime and then gets rdtsc
885  to get elapsed nanoseconds since the last tick. On Red Hat
886  Enterprise Linux 3, there was once a bug which caused the
887  resolution to be 1000, i.e. one centisecond. We never check
888  for time-zone change.
889 
890  getnstimeofday() -- something to watch for in future Linux
891 
892  do_gettimeofday() -- exists on Linux but not for "userland"
893 
894  get_cycles() -- a multi-platform function, worth watching
895  in future Linux versions. But we found platform-specific
896  functions which were better documented in operating-system
897  manuals. And get_cycles() can fail or return a useless
898  32-bit number. It might be available on some platforms,
899  such as arm, which we didn't test. Using
900  "include <linux/timex.h>" or "include <asm/timex.h>"
901  can lead to autoconf or compile errors, depending on system.
902 
903  rdtsc, __rdtsc, rdtscll: available for x86 with Linux BSD,
904  Solaris, Windows. See "possible flaws and dangers" comments.
905 
906  times(): what we use for ticks. Should just read the last
907  (xtime) tick count, therefore should be fast, but usually
908  isn't.
909 
910  GetTickCount(): we use this for my_timer_ticks() on
911  Windows. Actually it really is a tick counter, so resolution
912  >= 10 milliseconds unless you have a very old Windows version.
913  With Windows 95 or 98 or ME, timeGetTime() has better resolution than
914  GetTickCount (1ms rather than 55ms). But with Windows NT or XP or 2000,
915  they're both getting from a variable in the Process Environment Block
916  (PEB), and the variable is set by the programmable interrupt timer, so
917  the resolution is the same (usually 10-15 milliseconds). Also timeGetTime
918  is slower on old machines:
919  http://www.doumo.jp/aon-java/jsp/postgretips/tips.jsp?tips=74.
920  Also timeGetTime requires linking winmm.lib,
921  Therefore we use GetTickCount.
922  It will overflow every 49 days because the return is 32-bit.
923  There is also a GetTickCount64 but it requires Vista or Windows Server 2008.
924  (As for GetSystemTimeAsFileTime, its precision is spurious, it
925  just reads the tick variable like the other functions do.
926  However, we don't expect it to overflow every 49 days, so we
927  will prefer it for my_timer_milliseconds().)
928 
929  QueryPerformanceCounter() we use this for my_timer_microseconds()
930  on Windows. 1-PIT-tick (often 1/3-microsecond). Usually reads
931  the PIT so it's slow. On some Windows variants, uses RDTSC.
932 
933  GetLocalTime() this is available on Windows but we don't use it.
934 
935  getclock(): documented for Alpha, but not found during tests.
936 
937  mach_absolute_time() and UpTime() are recommended for Apple.
938  Inititally they weren't tried, because asm_ppc seems to do the job.
939  But now we use mach_absolute_time for nanoseconds.
940 
941  Any clock-based timer can be affected by NPT (ntpd program),
942  which means:
943  - full-second correction can occur for leap second
944  - tiny corrections can occcur approimately every 11 minutes
945  (but I think they only affect the RTC which isn't the PIT).
946 
947  We define "precision" as "frequency" and "high precision" is
948  "frequency better than 1 microsecond". We define "resolution"
949  as a synonym for "granularity". We define "accuracy" as
950  "closeness to the truth" as established by some authoritative
951  clock, but we can't measure accuracy.
952 
953  Do not expect any of our timers to be monotonic; we
954  won't guarantee that they return constantly-increasing
955  unique numbers.
956 
957  We tested with AIX, Solaris (x86 + Sparc), Linux (x86 +
958  Itanium), Windows, 64-bit Windows, QNX, FreeBSD, HPUX,
959  Irix, Mac. We didn't test with SCO.
960 
961 */
962