Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include <mutex>
#include <optional>

#if defined(__x86_64__)
#include <x86intrin.h> // for __rdtsc()
#endif

#ifdef __linux__

#include <sys/mman.h> // mmap()
Expand All @@ -38,6 +42,10 @@
(uint32_t)'D')
#define LLVM_PERF_JIT_VERSION 1

// bit 0: set if the jitdump file is using an architecture-specific timestamp
// clock source
#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)

using namespace llvm;
using namespace llvm::orc;

Expand All @@ -54,6 +62,9 @@ struct PerfState {
// output data stream
std::unique_ptr<raw_fd_ostream> Dumpstream;

// use arch-specific timestamp instead of CLOCK_MONOTONIC
bool UseArchTimestamp = false;

// perf mmap marker
void *MarkerAddr = NULL;
};
Expand Down Expand Up @@ -102,7 +113,15 @@ static inline uint64_t timespec_to_ns(const struct timespec *TS) {
return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
}

static inline uint64_t perf_get_timestamp() {
static inline uint64_t perf_get_timestamp(bool use_arch_timestamp) {
if (use_arch_timestamp) {
#if defined(__x86_64__)
return __rdtsc();
#else
return 0;
#endif
}

timespec TS;
if (clock_gettime(CLOCK_MONOTONIC, &TS))
return 0;
Expand All @@ -116,7 +135,7 @@ static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
<< DebugRecord.Entries.size() << " entries\n");
[[maybe_unused]] size_t Written = 0;
DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
DebugRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)},
DebugRecord.CodeAddr, DebugRecord.Entries.size()};
State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
Written += sizeof(Dir);
Expand All @@ -136,7 +155,7 @@ static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
<< CodeRecord.CodeSize << " and code index "
<< CodeRecord.CodeIndex << "\n");
CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
CodeRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)},
State->Pid,
Tid,
CodeRecord.Vma,
Expand All @@ -160,7 +179,7 @@ writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
<< UnwindRecord.EHFrameHdrSize << " and mapped size "
<< UnwindRecord.MappedSize << "\n";
UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
UnwindRecord.Prefix.TotalSize, perf_get_timestamp(State->UseArchTimestamp)},
UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
UnwindRecord.MappedSize};
LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
Expand Down Expand Up @@ -246,7 +265,8 @@ static Expected<Header> FillMachine(PerfState &State) {
Hdr.Version = LLVM_PERF_JIT_VERSION;
Hdr.TotalSize = sizeof(Hdr);
Hdr.Pid = State.Pid;
Hdr.Timestamp = perf_get_timestamp();
Hdr.Timestamp = perf_get_timestamp(State.UseArchTimestamp);
Hdr.Flags = State.UseArchTimestamp ? JITDUMP_FLAGS_ARCH_TIMESTAMP : 0;

char Id[16];
struct {
Expand Down Expand Up @@ -330,8 +350,15 @@ static Error InitDebuggingDir(PerfState &State) {
static Error registerJITLoaderPerfStartImpl() {
PerfState Tentative;
Tentative.Pid = sys::Process::getProcessId();

if (const char *UseArchTimestampEnv = getenv("JITDUMP_USE_ARCH_TIMESTAMP")) {
if (strcmp(UseArchTimestampEnv, "1") == 0 && perf_get_timestamp(true)) {
Tentative.UseArchTimestamp = true;
}
}

// check if clock-source is supported
if (!perf_get_timestamp())
if (!Tentative.UseArchTimestamp && !perf_get_timestamp(false))
return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
inconvertibleErrorCode());

Expand Down Expand Up @@ -385,7 +412,7 @@ static Error registerJITLoaderPerfEndImpl() {
RecHeader Close;
Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
Close.TotalSize = sizeof(Close);
Close.Timestamp = perf_get_timestamp();
Close.Timestamp = perf_get_timestamp(State->UseArchTimestamp);
State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
sizeof(Close));
if (State->MarkerAddr)
Expand Down
34 changes: 29 additions & 5 deletions llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
#include <time.h> // clock_gettime(), time(), localtime_r() */
#include <unistd.h> // for read(), close()

#if defined(__x86_64__)
#include <x86intrin.h> // for __rdtsc()
#endif

using namespace llvm;
using namespace llvm::object;
typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
Expand Down Expand Up @@ -100,6 +104,9 @@ class PerfJITEventListener : public JITEventListener {
// perf mmap marker
void *MarkerAddr = NULL;

// use arch-specific timestamp instead of CLOCK_MONOTONIC
bool UseArchTimestamp = false;

// perf support ready
bool SuccessfullyInitialized = false;

Expand Down Expand Up @@ -168,10 +175,18 @@ static inline uint64_t timespec_to_ns(const struct timespec *ts) {
return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
}

static inline uint64_t perf_get_timestamp(void) {
static inline uint64_t perf_get_timestamp(bool use_arch_timestamp) {
struct timespec ts;
int ret;

if (use_arch_timestamp) {
#if defined(__x86_64__)
return __rdtsc();
#else
return 0;
#endif
}

ret = clock_gettime(CLOCK_MONOTONIC, &ts);
if (ret)
return 0;
Expand All @@ -181,8 +196,16 @@ static inline uint64_t perf_get_timestamp(void) {

PerfJITEventListener::PerfJITEventListener()
: Pid(sys::Process::getProcessId()) {

// check if arch-specific timestamp should be used
if (const char *UseArchTimestampEnv = getenv("JITDUMP_USE_ARCH_TIMESTAMP")) {
if (strcmp(UseArchTimestampEnv, "1") == 0 && perf_get_timestamp(true)) {
UseArchTimestamp = true;
}
}

// check if clock-source is supported
if (!perf_get_timestamp()) {
if (!UseArchTimestamp && !perf_get_timestamp(false)) {
errs() << "kernel does not support CLOCK_MONOTONIC\n";
return;
}
Expand Down Expand Up @@ -221,7 +244,8 @@ PerfJITEventListener::PerfJITEventListener()
Header.Version = LLVM_PERF_JIT_VERSION;
Header.TotalSize = sizeof(Header);
Header.Pid = Pid;
Header.Timestamp = perf_get_timestamp();
Header.Timestamp = perf_get_timestamp(UseArchTimestamp);
Header.Flags = UseArchTimestamp ? JITDUMP_FLAGS_ARCH_TIMESTAMP : 0;
Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));

// Everything initialized, can do profiling now.
Expand Down Expand Up @@ -417,7 +441,7 @@ void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
Symbol->size() + 1 + // symbol name
CodeSize; // and code
rec.Prefix.Timestamp = perf_get_timestamp();
rec.Prefix.Timestamp = perf_get_timestamp(UseArchTimestamp);

rec.CodeSize = CodeSize;
rec.Vma = CodeAddr;
Expand Down Expand Up @@ -446,7 +470,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
LLVMPerfJitRecordDebugInfo rec;
rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
rec.Prefix.TotalSize = sizeof(rec); // will be increased further
rec.Prefix.Timestamp = perf_get_timestamp();
rec.Prefix.Timestamp = perf_get_timestamp(UseArchTimestamp);
rec.CodeAddr = CodeAddr;
rec.NrEntry = Lines.size();

Expand Down