diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp index 1a61d3188a820..e63c1096163af 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp @@ -23,6 +23,10 @@ #include #include +#if defined(__x86_64__) +#include // for __rdtsc() +#endif + #ifdef __linux__ #include // mmap() @@ -38,6 +42,10 @@ (uint32_t)'D') #define LLVM_PERF_JIT_VERSION 1 +// bit 0: set if the jitdump file is using an architecture-specific timestamp +// clock source +#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) + using namespace llvm; using namespace llvm::orc; @@ -54,6 +62,9 @@ struct PerfState { // output data stream std::unique_ptr Dumpstream; + // use arch-specific timestamp instead of CLOCK_MONOTONIC + bool UseArchTimestamp = false; + // perf mmap marker void *MarkerAddr = NULL; }; @@ -102,7 +113,15 @@ static inline uint64_t timespec_to_ns(const struct timespec *TS) { return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec; } -static inline uint64_t perf_get_timestamp() { +static inline uint64_t perf_get_timestamp(bool use_arch_timestamp) { + if (use_arch_timestamp) { +#if defined(__x86_64__) + return __rdtsc(); +#else + return 0; +#endif + } + timespec TS; if (clock_gettime(CLOCK_MONOTONIC, &TS)) return 0; @@ -116,7 +135,7 @@ static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { << DebugRecord.Entries.size() << " entries\n"); [[maybe_unused]] size_t Written = 0; DIR Dir{RecHeader{static_cast(DebugRecord.Prefix.Id), - DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, + DebugRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)}, DebugRecord.CodeAddr, DebugRecord.Entries.size()}; State->Dumpstream->write(reinterpret_cast(&Dir), sizeof(Dir)); Written += sizeof(Dir); @@ -136,7 +155,7 @@ static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { << CodeRecord.CodeSize << " and code index " << CodeRecord.CodeIndex << "\n"); CLR Clr{RecHeader{static_cast(CodeRecord.Prefix.Id), - CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, + CodeRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)}, State->Pid, Tid, CodeRecord.Vma, @@ -160,7 +179,7 @@ writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { << UnwindRecord.EHFrameHdrSize << " and mapped size " << UnwindRecord.MappedSize << "\n"; UWR Uwr{RecHeader{static_cast(UnwindRecord.Prefix.Id), - UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, + UnwindRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)}, UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, UnwindRecord.MappedSize}; LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, " @@ -246,7 +265,8 @@ static Expected
FillMachine(PerfState &State) { Hdr.Version = LLVM_PERF_JIT_VERSION; Hdr.TotalSize = sizeof(Hdr); Hdr.Pid = State.Pid; - Hdr.Timestamp = perf_get_timestamp(); + Hdr.Timestamp = perf_get_timestamp(State.UseArchTimestamp); + Hdr.Flags = State.UseArchTimestamp ? JITDUMP_FLAGS_ARCH_TIMESTAMP : 0; char Id[16]; struct { @@ -330,8 +350,15 @@ static Error InitDebuggingDir(PerfState &State) { static Error registerJITLoaderPerfStartImpl() { PerfState Tentative; Tentative.Pid = sys::Process::getProcessId(); + + if (const char *UseArchTimestampEnv = getenv("JITDUMP_USE_ARCH_TIMESTAMP")) { + if (strcmp(UseArchTimestampEnv, "1") == 0 && perf_get_timestamp(true)) { + Tentative.UseArchTimestamp = true; + } + } + // check if clock-source is supported - if (!perf_get_timestamp()) + if (!Tentative.UseArchTimestamp && !perf_get_timestamp(false)) return make_error("kernel does not support CLOCK_MONOTONIC", inconvertibleErrorCode()); @@ -385,7 +412,7 @@ static Error registerJITLoaderPerfEndImpl() { RecHeader Close; Close.Id = static_cast(PerfJITRecordType::JIT_CODE_CLOSE); Close.TotalSize = sizeof(Close); - Close.Timestamp = perf_get_timestamp(); + Close.Timestamp = perf_get_timestamp(State->UseArchTimestamp); State->Dumpstream->write(reinterpret_cast(&Close), sizeof(Close)); if (State->MarkerAddr) diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp index 4d14a606b98b0..e050cf7eead1f 100644 --- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp @@ -36,6 +36,10 @@ #include // clock_gettime(), time(), localtime_r() */ #include // for read(), close() +#if defined(__x86_64__) +#include // for __rdtsc() +#endif + using namespace llvm; using namespace llvm::object; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; @@ -100,6 +104,9 @@ class PerfJITEventListener : public JITEventListener { // perf mmap marker void *MarkerAddr = NULL; + // use arch-specific timestamp instead of CLOCK_MONOTONIC + bool UseArchTimestamp = false; + // perf support ready bool SuccessfullyInitialized = false; @@ -168,10 +175,18 @@ static inline uint64_t timespec_to_ns(const struct timespec *ts) { return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; } -static inline uint64_t perf_get_timestamp(void) { +static inline uint64_t perf_get_timestamp(bool use_arch_timestamp) { struct timespec ts; int ret; + if (use_arch_timestamp) { +#if defined(__x86_64__) + return __rdtsc(); +#else + return 0; +#endif + } + ret = clock_gettime(CLOCK_MONOTONIC, &ts); if (ret) return 0; @@ -181,8 +196,16 @@ static inline uint64_t perf_get_timestamp(void) { PerfJITEventListener::PerfJITEventListener() : Pid(sys::Process::getProcessId()) { + + // check if arch-specific timestamp should be used + if (const char *UseArchTimestampEnv = getenv("JITDUMP_USE_ARCH_TIMESTAMP")) { + if (strcmp(UseArchTimestampEnv, "1") == 0 && perf_get_timestamp(true)) { + UseArchTimestamp = true; + } + } + // check if clock-source is supported - if (!perf_get_timestamp()) { + if (!UseArchTimestamp && !perf_get_timestamp(false)) { errs() << "kernel does not support CLOCK_MONOTONIC\n"; return; } @@ -221,7 +244,8 @@ PerfJITEventListener::PerfJITEventListener() Header.Version = LLVM_PERF_JIT_VERSION; Header.TotalSize = sizeof(Header); Header.Pid = Pid; - Header.Timestamp = perf_get_timestamp(); + Header.Timestamp = perf_get_timestamp(UseArchTimestamp); + Header.Flags = UseArchTimestamp ? JITDUMP_FLAGS_ARCH_TIMESTAMP : 0; Dumpstream->write(reinterpret_cast(&Header), sizeof(Header)); // Everything initialized, can do profiling now. @@ -417,7 +441,7 @@ void PerfJITEventListener::NotifyCode(Expected &Symbol, rec.Prefix.TotalSize = sizeof(rec) + // debug record itself Symbol->size() + 1 + // symbol name CodeSize; // and code - rec.Prefix.Timestamp = perf_get_timestamp(); + rec.Prefix.Timestamp = perf_get_timestamp(UseArchTimestamp); rec.CodeSize = CodeSize; rec.Vma = CodeAddr; @@ -446,7 +470,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, LLVMPerfJitRecordDebugInfo rec; rec.Prefix.Id = JIT_CODE_DEBUG_INFO; rec.Prefix.TotalSize = sizeof(rec); // will be increased further - rec.Prefix.Timestamp = perf_get_timestamp(); + rec.Prefix.Timestamp = perf_get_timestamp(UseArchTimestamp); rec.CodeAddr = CodeAddr; rec.NrEntry = Lines.size();