Skip to content

Commit 4fcd1a8

Browse files
committed
[llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking.
This is mostly for the benefit of the LBR latency mode. Right now, it performs no checking. If this is run on non-supported hardware, it will produce all zeroes for latency. Differential Revision: https://reviews.llvm.org/D85254
1 parent dd4fb7c commit 4fcd1a8

File tree

6 files changed

+86
-25
lines changed

6 files changed

+86
-25
lines changed

llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ else:
1919
try:
2020
with open(os.devnull, 'w') as quiet:
2121
check_llvm_exegesis_uops_result = subprocess.call(
22-
[llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
22+
[llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
2323
check_llvm_exegesis_latency_result = subprocess.call(
24-
[llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
24+
[llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
2525
except OSError:
2626
print('could not exec llvm-exegesis')
2727
config.unsupported = True

llvm/tools/llvm-exegesis/lib/Target.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ class ExegesisTarget {
142142
return {&Instr};
143143
}
144144

145+
// Checks hardware and software support for current benchmark mode.
146+
// Returns an error if the target host does not have support to run the
147+
// benchmark.
148+
virtual Error checkFeatureSupport() const { return Error::success(); }
149+
145150
// Creates a snippet generator for the given mode.
146151
std::unique_ptr<SnippetGenerator>
147152
createSnippetGenerator(InstructionBenchmark::ModeE Mode,

llvm/tools/llvm-exegesis/lib/X86/Target.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,23 @@ class ExegesisX86Target : public ExegesisTarget {
674674
return Arch == Triple::x86_64 || Arch == Triple::x86;
675675
}
676676

677+
Error checkFeatureSupport() const override {
678+
// LBR is the only feature we conditionally support now.
679+
// So if LBR is not requested, then we should be able to run the benchmarks.
680+
if (LbrSamplingPeriod == 0)
681+
return Error::success();
682+
683+
#if defined(__linux__) && defined(HAVE_LIBPFM) && \
684+
defined(LIBPFM_HAS_FIELD_CYCLES)
685+
// If the kernel supports it, the hardware still may not have it.
686+
return X86LbrCounter::checkLbrSupport();
687+
#else
688+
return llvm::make_error<llvm::StringError>(
689+
"LBR not supported on this kernel and/or platform",
690+
llvm::errc::not_supported);
691+
#endif
692+
}
693+
677694
static const unsigned kUnavailableRegisters[4];
678695
};
679696

llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#endif // HAVE_LIBPFM
2222

2323
#include <atomic>
24+
#include <chrono>
2425
#include <cstddef>
2526
#include <cstdint>
2627
#include <limits>
@@ -35,6 +36,8 @@
3536
namespace llvm {
3637
namespace exegesis {
3738

39+
// Number of entries in the LBR.
40+
static constexpr int kLbrEntries = 16;
3841
static constexpr size_t kBufferPages = 8;
3942
static const size_t kDataBufferSize = kBufferPages * getpagesize();
4043

@@ -70,7 +73,6 @@ static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
7073
static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
7174
const void *From, const void *To,
7275
llvm::SmallVector<int64_t, 4> *CycleArray) {
73-
assert(From != nullptr && To != nullptr);
7476
const char *DataPtr = DataBuf;
7577
while (DataPtr < DataBuf + DataSize) {
7678
struct perf_event_header Header;
@@ -149,21 +151,47 @@ void X86LbrCounter::start() {
149151
ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
150152
}
151153

154+
llvm::Error X86LbrCounter::checkLbrSupport() {
155+
// Do a sample read and check if the results contain non-zero values.
156+
157+
X86LbrCounter counter(X86LbrPerfEvent(123));
158+
counter.start();
159+
160+
// Prevent the compiler from unrolling the loop and get rid of all the
161+
// branches. We need at least 16 iterations.
162+
int Sum = 0;
163+
int V = 1;
164+
165+
volatile int *P = &V;
166+
auto TimeLimit =
167+
std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
168+
169+
for (int I = 0;
170+
I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
171+
++I) {
172+
Sum += *P;
173+
}
174+
175+
counter.stop();
176+
177+
auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
178+
if (ResultOrError)
179+
if (!ResultOrError.get().empty())
180+
// If there is at least one non-zero entry, then LBR is supported.
181+
for (const int64_t &Value : ResultOrError.get())
182+
if (Value != 0)
183+
return Error::success();
184+
185+
return llvm::make_error<llvm::StringError>(
186+
"LBR format with cycles is not suppported on the host.",
187+
llvm::errc::not_supported);
188+
}
189+
152190
llvm::Expected<llvm::SmallVector<int64_t, 4>>
153191
X86LbrCounter::readOrError(StringRef FunctionBytes) const {
154-
// The max number of time-outs/retries before we give up.
155-
static constexpr int kMaxTimeouts = 160;
156-
157192
// Disable the event before reading
158193
ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
159194

160-
// Parses the LBR buffer and fills CycleArray with the sequence of cycle
161-
// counts from the buffer.
162-
llvm::SmallVector<int64_t, 4> CycleArray;
163-
std::unique_ptr<char[]> DataBuf(new char[kDataBufferSize]);
164-
int NumTimeouts = 0;
165-
int PollResult = 0;
166-
167195
// Find the boundary of the function so that we could filter the LBRs
168196
// to keep only the relevant records.
169197
if (FunctionBytes.empty())
@@ -172,6 +200,21 @@ X86LbrCounter::readOrError(StringRef FunctionBytes) const {
172200
const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
173201
const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
174202
FunctionBytes.size());
203+
return doReadCounter(From, To);
204+
}
205+
206+
llvm::Expected<llvm::SmallVector<int64_t, 4>>
207+
X86LbrCounter::doReadCounter(const void *From, const void *To) const {
208+
// The max number of time-outs/retries before we give up.
209+
static constexpr int kMaxTimeouts = 160;
210+
211+
// Parses the LBR buffer and fills CycleArray with the sequence of cycle
212+
// counts from the buffer.
213+
llvm::SmallVector<int64_t, 4> CycleArray;
214+
auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
215+
int NumTimeouts = 0;
216+
int PollResult = 0;
217+
175218
while (PollResult <= 0) {
176219
PollResult = pollLbrPerfEvent(FileDescriptor);
177220
if (PollResult > 0)

llvm/tools/llvm-exegesis/lib/X86/X86Counter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ class X86LbrPerfEvent : public pfm::PerfEvent {
3333

3434
class X86LbrCounter : public pfm::Counter {
3535
public:
36+
static llvm::Error checkLbrSupport();
37+
3638
explicit X86LbrCounter(pfm::PerfEvent &&Event);
3739

3840
virtual ~X86LbrCounter();
@@ -43,6 +45,9 @@ class X86LbrCounter : public pfm::Counter {
4345
readOrError(StringRef FunctionBytes) const override;
4446

4547
private:
48+
llvm::Expected<llvm::SmallVector<int64_t, 4>>
49+
doReadCounter(const void *From, const void *To) const;
50+
4651
void *MMappedBuffer = nullptr;
4752
};
4853

llvm/tools/llvm-exegesis/llvm-exegesis.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,6 @@ static cl::opt<std::string>
160160
cl::desc(""), cl::cat(AnalysisOptions),
161161
cl::init(""));
162162

163-
static cl::list<std::string>
164-
AllowedHostCpus("allowed-host-cpu",
165-
cl::desc("If specified, only run the benchmark if the host "
166-
"CPU matches the names"),
167-
cl::cat(Options), cl::ZeroOrMore);
168-
169163
static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
170164
"analysis-display-unstable-clusters",
171165
cl::desc("if there is more than one benchmark for an opcode, said "
@@ -302,12 +296,9 @@ void benchmarkMain() {
302296

303297
const LLVMState State(CpuName);
304298

305-
llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU();
306-
for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end();
307-
++Begin) {
308-
if (ActualCpu != *Begin)
309-
ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu));
310-
}
299+
// Preliminary check to ensure features needed for requested
300+
// benchmark mode are present on target CPU and/or OS.
301+
ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
311302

312303
const std::unique_ptr<BenchmarkRunner> Runner =
313304
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(

0 commit comments

Comments
 (0)