Skip to content

Commit ccea3de

Browse files
committed
amd64 timers: use lfence instead of cpuid for serialization
Signed-off-by: Carlos Bederián <[email protected]>
1 parent 4009ba6 commit ccea3de

File tree

1 file changed

+4
-21
lines changed

1 file changed

+4
-21
lines changed

opal/include/opal/sys/amd64/timer.h

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t;
3131

3232
#if OPAL_GCC_INLINE_ASSEMBLY
3333

34-
/**
35-
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
36-
*/
34+
/* TODO: add AMD mfence version and dispatch at init */
3735
static inline opal_timer_t
3836
opal_sys_timer_get_cycles(void)
3937
{
40-
unsigned l, h;
41-
#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP
42-
__asm__ __volatile__ ("cpuid\n\t"
38+
uint32_t l, h;
39+
__asm__ __volatile__ ("lfence\n\t"
4340
"rdtsc\n\t"
44-
: "=a" (l), "=d" (h)
45-
:: "rbx", "rcx");
46-
#else
47-
/* If we need higher accuracy we should implement the algorithm proposed
48-
* on the Intel document referenced above. However, in the context of MPI
49-
* this function will be used as the backend for MPI_Wtime and as such
50-
* can afford a small inaccuracy.
51-
*/
52-
__asm__ __volatile__ ("rdtscp\n\t"
53-
"mov %%edx, %0\n\t"
54-
"mov %%eax, %1\n\t"
55-
"cpuid\n\t"
56-
: "=r" (h), "=r" (l)
57-
:: "rax", "rbx", "rcx", "rdx");
58-
#endif
41+
: "=a" (l), "=d" (h));
5942
return ((opal_timer_t)l) | (((opal_timer_t)h) << 32);
6043
}
6144

0 commit comments

Comments
 (0)