Skip to content

Commit 9597ff8

Browse files
committed
Add support for using PGO for polygeist alternatives op
1 parent eabb719 commit 9597ff8

File tree

10 files changed

+532
-181
lines changed

10 files changed

+532
-181
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib)
2525

2626
find_program(XXD_BIN xxd)
2727

28+
# TODO should depend on OS
29+
set(POLYGEIST_PGO_DATA_DIR "/var/tmp/polygeist/pgo/" CACHE STRING "Directory for PGO data")
30+
2831
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
2932
project(polygeist LANGUAGES CXX C)
3033

include/polygeist/Passes/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
55
#include "mlir/Pass/Pass.h"
66
#include <memory>
7+
8+
enum PolygeistAlternativesMode { PAM_Static, PAM_PGO_Profile, PAM_PGO_Opt };
9+
710
namespace mlir {
811
class PatternRewriter;
912
class RewritePatternSet;

lib/polygeist/ExecutionEngine/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ if(POLYGEIST_ENABLE_CUDA)
5252
${bc_flags}
5353
${infile} -o ${bc_outfile}
5454
-I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
55+
-DPOLYGEIST_PGO_DATA_DIR="${POLYGEIST_PGO_DATA_DIR}"
5556
DEPENDS ${infile}
5657
COMMENT "Building LLVM bitcode ${bc_outfile}"
5758
VERBATIM

lib/polygeist/ExecutionEngine/CudaRuntimeWrappers.cpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include <filesystem>
1818
#include <fstream>
1919
#include <iostream>
20+
#include <map>
21+
#include <mutex>
22+
#include <time.h>
2023

2124
#include "cuda.h"
2225
#include "cuda_runtime.h"
@@ -76,6 +79,112 @@ class ScopedContext {
7679
~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
7780
};
7881

82+
class PGOState {
83+
public:
84+
enum Type { Start, End };
85+
struct State {
86+
struct timespec start_clock;
87+
};
88+
89+
// TODO define these in cmake files (depending on target OS and used in the
90+
// compiler too)
91+
static constexpr const char *dirname = POLYGEIST_PGO_DATA_DIR;
92+
static constexpr const char *alternativeEnvVar =
93+
"POLYGEIST_PGO_KERNEL_ALTERNATIVE";
94+
95+
inline static int alternative;
96+
inline thread_local static std::mutex mutex;
97+
inline thread_local static std::map<std::string, State *> states;
98+
99+
std::string kernelId;
100+
int totalAlternatives;
101+
102+
PGOState(const char *kernelId, int totalAlternatives)
103+
: kernelId(kernelId), totalAlternatives(totalAlternatives) {}
104+
void end() {
105+
struct timespec end_clock;
106+
cudaDeviceSynchronize();
107+
clock_gettime(CLOCK_MONOTONIC, &end_clock);
108+
109+
std::unique_lock<std::mutex> lock(mutex);
110+
if (states.count(kernelId) == 0) {
111+
std::cerr << "No kernel with id " << kernelId << "running" << std::endl;
112+
exit(1);
113+
}
114+
State *state = states[kernelId];
115+
struct timespec tmp_clock {
116+
end_clock.tv_sec - state->start_clock.tv_sec,
117+
end_clock.tv_nsec - state->start_clock.tv_nsec
118+
};
119+
double elapsed =
120+
(tmp_clock.tv_sec + ((double)tmp_clock.tv_nsec) * .000000001);
121+
122+
// Only write to file if we are profiling a valid alternative
123+
if (0 <= alternative && alternative < totalAlternatives) {
124+
// TODO error handling
125+
std::ofstream ofile;
126+
ofile.open(std::string(dirname) + kernelId,
127+
std::ios::out | std::ios::app);
128+
ofile << alternative << " " << elapsed << std::endl;
129+
ofile.close();
130+
}
131+
132+
delete state;
133+
states.erase(states.find(kernelId));
134+
}
135+
136+
void start() {
137+
std::unique_lock<std::mutex> lock(mutex);
138+
State *state = new State();
139+
if (states.count(kernelId) == 1) {
140+
std::cerr << "Two kernels with id " << kernelId
141+
<< "running at the same time" << std::endl;
142+
exit(1);
143+
}
144+
states[kernelId] = state;
145+
// Start timing
146+
cudaDeviceSynchronize();
147+
clock_gettime(CLOCK_MONOTONIC, &state->start_clock);
148+
}
149+
150+
int getAlternative() {
151+
static int init = [&] {
152+
if (char *i = getenv(alternativeEnvVar)) {
153+
this->alternative = atoi(i);
154+
} else {
155+
std::cerr << alternativeEnvVar << " not defined" << std::endl;
156+
exit(1);
157+
}
158+
std::filesystem::create_directories(dirname);
159+
return 0;
160+
}();
161+
if (0 <= alternative && alternative < totalAlternatives)
162+
return alternative;
163+
else
164+
return 0;
165+
}
166+
167+
~PGOState() {}
168+
};
169+
170+
extern "C" MLIR_CUDA_WRAPPERS_EXPORT int32_t
171+
mgpurtPGOGetAlternative(const char *kernelID, int totalAlternatives) {
172+
PGOState pgoState(kernelID, totalAlternatives);
173+
return pgoState.getAlternative();
174+
}
175+
176+
extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
177+
mgpurtPGOStart(const char *kernelID, int totalAlternatives) {
178+
PGOState pgoState(kernelID, totalAlternatives);
179+
pgoState.start();
180+
}
181+
182+
extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpurtPGOEnd(const char *kernelID,
183+
int totalAlternatives) {
184+
PGOState pgoState(kernelID, totalAlternatives);
185+
pgoState.end();
186+
}
187+
79188
extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
80189
mgpurtLaunchKernel(void *function, intptr_t gridX, intptr_t gridY,
81190
intptr_t gridZ, intptr_t blockX, intptr_t blockY,

lib/polygeist/Passes/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ add_mlir_dialect_library(MLIRPolygeistTransforms
5151
MLIROpenMPToLLVM
5252
)
5353

54+
target_compile_definitions(obj.MLIRPolygeistTransforms
55+
PRIVATE
56+
POLYGEIST_PGO_DATA_DIR="${POLYGEIST_PGO_DATA_DIR}"
57+
)
58+
5459
if(POLYGEIST_ENABLE_CUDA)
5560
find_package(CUDA)
5661
enable_language(CUDA)

0 commit comments

Comments
 (0)