Skip to content

Commit 9f073d7

Browse files
authored
Merge pull request #2926 from Zzzoom/amd64_timer_perf
Improve x86-64 timer performance
2 parents c62498a + ccea3de commit 9f073d7

File tree

2 files changed

+9
-25
lines changed

2 files changed

+9
-25
lines changed

opal/include/opal/sys/amd64/timer.h

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t;
3131

3232
#if OPAL_GCC_INLINE_ASSEMBLY
3333

34-
/**
35-
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
36-
*/
34+
/* TODO: add AMD mfence version and dispatch at init */
3735
static inline opal_timer_t
3836
opal_sys_timer_get_cycles(void)
3937
{
40-
unsigned l, h;
41-
#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP
42-
__asm__ __volatile__ ("cpuid\n\t"
38+
uint32_t l, h;
39+
__asm__ __volatile__ ("lfence\n\t"
4340
"rdtsc\n\t"
44-
: "=a" (l), "=d" (h)
45-
:: "rbx", "rcx");
46-
#else
47-
/* If we need higher accuracy we should implement the algorithm proposed
48-
* on the Intel document referenced above. However, in the context of MPI
49-
* this function will be used as the backend for MPI_Wtime and as such
50-
* can afford a small inaccuracy.
51-
*/
52-
__asm__ __volatile__ ("rdtscp\n\t"
53-
"mov %%edx, %0\n\t"
54-
"mov %%eax, %1\n\t"
55-
"cpuid\n\t"
56-
: "=r" (h), "=r" (l)
57-
:: "rax", "rbx", "rcx", "rdx");
58-
#endif
41+
: "=a" (l), "=d" (h));
5942
return ((opal_timer_t)l) | (((opal_timer_t)h) << 32);
6043
}
6144

opal/runtime/opal_progress.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "opal/runtime/opal_params.h"
3838

3939
#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
40+
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)
4041

4142
#if OPAL_ENABLE_DEBUG
4243
bool opal_progress_debug = false;
@@ -189,11 +190,11 @@ opal_progress(void)
189190
if( opal_progress_event_flag != 0 ) {
190191
#if OPAL_HAVE_WORKING_EVENTOPS
191192
#if OPAL_PROGRESS_USE_TIMERS
192-
#if OPAL_TIMER_USEC_NATIVE
193+
#if OPAL_PROGRESS_ONLY_USEC_NATIVE
193194
opal_timer_t now = opal_timer_base_get_usec();
194195
#else
195196
opal_timer_t now = opal_timer_base_get_cycles();
196-
#endif /* OPAL_TIMER_USEC_NATIVE */
197+
#endif /* OPAL_PROGRESS_ONLY_USEC_NATIVE */
197198
/* trip the event library if we've reached our tick rate and we are
198199
enabled */
199200
if (now - event_progress_last_time > event_progress_delta ) {
@@ -316,7 +317,7 @@ opal_progress_set_event_poll_rate(int polltime)
316317

317318
#if OPAL_PROGRESS_USE_TIMERS
318319
event_progress_delta = 0;
319-
# if OPAL_TIMER_USEC_NATIVE
320+
# if OPAL_PROGRESS_ONLY_USEC_NATIVE
320321
event_progress_last_time = opal_timer_base_get_usec();
321322
# else
322323
event_progress_last_time = opal_timer_base_get_cycles();
@@ -343,7 +344,7 @@ opal_progress_set_event_poll_rate(int polltime)
343344
#endif
344345
}
345346

346-
#if OPAL_PROGRESS_USE_TIMERS && !OPAL_TIMER_USEC_NATIVE
347+
#if OPAL_PROGRESS_USE_TIMERS && !OPAL_PROGRESS_ONLY_USEC_NATIVE
347348
/* going to use cycles for counter. Adjust specified usec into cycles */
348349
event_progress_delta = event_progress_delta * opal_timer_base_get_freq() / 1000000;
349350
#endif

0 commit comments

Comments
 (0)