Skip to content

Commit fb4279a

Browse files
author
ymweiss
committed
[feat] Add original argument order mapping to kernel metadata
Polygeist reorders kernel arguments (scalars before pointers) when lowering to GPU dialect. This commit extracts the original argument order from the host wrapper function and includes it in the kernel metadata JSON. The mapping allows the host stub generator to create launchers that accept arguments in original order (matching hipLaunchKernelGGL) and reorder them when packing for the device kernel. Also fixes extractBaseKernelName to handle kernel names that contain "_kernel" (e.g., vecadd_kernel_kernel94... now correctly extracts vecadd_kernel).
1 parent b3bd080 commit fb4279a

File tree

1 file changed

+99
-9
lines changed

1 file changed

+99
-9
lines changed

lib/polygeist/Passes/ConvertGPUToVortex.cpp

Lines changed: 99 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,30 @@ constexpr uint32_t VX_CSR_LOCAL_MEM_BASE = 0xFC3;
5454

5555
/// Extract base kernel name by removing Polygeist variant suffix
5656
/// Example: _Z12launch_basicPiS_ji_kernel94565344022848 -> _Z12launch_basicPiS_ji
57+
/// Example: __polygeist_launch_vecadd_kernel_kernel94... -> __polygeist_launch_vecadd_kernel
5758
static StringRef extractBaseKernelName(StringRef mangledName) {
58-
size_t pos = mangledName.find("_kernel");
59-
if (pos != StringRef::npos) {
60-
// Find where the numeric suffix starts after "_kernel"
59+
// Search from the end for "_kernel" followed by digits
60+
// This handles cases like "vecadd_kernel_kernel94..." where the kernel name
61+
// itself contains "_kernel"
62+
size_t searchStart = 0;
63+
size_t lastValidPos = StringRef::npos;
64+
65+
while (true) {
66+
size_t pos = mangledName.find("_kernel", searchStart);
67+
if (pos == StringRef::npos)
68+
break;
69+
6170
size_t suffixStart = pos + 7; // Length of "_kernel"
6271
if (suffixStart < mangledName.size() &&
6372
std::isdigit(mangledName[suffixStart])) {
64-
return mangledName.substr(0, pos);
73+
// Found "_kernel" followed by digit - this is a potential suffix
74+
lastValidPos = pos;
6575
}
76+
searchStart = pos + 1;
77+
}
78+
79+
if (lastValidPos != StringRef::npos) {
80+
return mangledName.substr(0, lastValidPos);
6681
}
6782
return mangledName;
6883
}
@@ -812,7 +827,8 @@ static std::string generateKernelArgsHeader(const KernelMetadata &meta) {
812827
}
813828

814829
/// Generate JSON string for kernel metadata (for runtime dynamic loading)
815-
static std::string generateMetadataJSON(const KernelMetadata &meta) {
830+
static std::string generateMetadataJSON(const KernelMetadata &meta,
831+
const std::vector<unsigned> &originalOrder = {}) {
816832
std::ostringstream json;
817833
json << "{\n";
818834
json << " \"kernel_name\": \"" << meta.kernelName << "\",\n";
@@ -834,6 +850,19 @@ static std::string generateMetadataJSON(const KernelMetadata &meta) {
834850

835851
json << " ],\n";
836852
json << " \"total_args_size\": " << meta.totalArgsSize << ",\n";
853+
854+
// Include original argument order mapping if available
855+
// This maps from original (hipLaunchKernelGGL) order to device order
856+
if (!originalOrder.empty()) {
857+
json << " \"original_arg_order\": [";
858+
for (size_t i = 0; i < originalOrder.size(); ++i) {
859+
json << originalOrder[i];
860+
if (i < originalOrder.size() - 1)
861+
json << ", ";
862+
}
863+
json << "],\n";
864+
}
865+
837866
json << " \"architecture\": \"rv32\"\n";
838867
json << "}\n";
839868

@@ -843,8 +872,10 @@ static std::string generateMetadataJSON(const KernelMetadata &meta) {
843872
/// Extract metadata from a GPU function and write metadata files
844873
/// Generates both .meta.json (for runtime) and _args.h (for compile-time)
845874
/// If outputDir is empty, uses current working directory
875+
/// Uses pre-built originalArgIsPointer map for computing argument order mapping
846876
static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
847-
StringRef outputDir) {
877+
StringRef outputDir,
878+
const llvm::StringMap<std::vector<bool>> &originalArgIsPointer) {
848879
if (!funcOp.isKernel())
849880
return;
850881

@@ -897,6 +928,42 @@ static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
897928

898929
meta.totalArgsSize = offset;
899930

931+
// Look up pre-computed original argument types from host wrapper
932+
// Base name should match the host wrapper function name
933+
std::vector<unsigned> originalOrder;
934+
935+
auto it = originalArgIsPointer.find(baseName);
936+
if (it != originalArgIsPointer.end()) {
937+
const std::vector<bool> &hostIsPointer = it->second;
938+
939+
if (hostIsPointer.size() == meta.arguments.size()) {
940+
// Build mapping from original order to device order
941+
// Device order: scalars first, then pointers (preserving relative order)
942+
// Original order: as declared in kernel signature
943+
944+
// Count scalars in host (original) order
945+
unsigned numScalars = 0;
946+
for (bool isPtr : hostIsPointer) {
947+
if (!isPtr) numScalars++;
948+
}
949+
950+
// Build the mapping: original_arg_order[device_idx] = original_idx
951+
originalOrder.resize(hostIsPointer.size());
952+
unsigned deviceScalarIdx = 0;
953+
unsigned devicePtrIdx = numScalars;
954+
955+
for (unsigned origIdx = 0; origIdx < hostIsPointer.size(); ++origIdx) {
956+
if (!hostIsPointer[origIdx]) {
957+
// Scalar - goes to front of device args
958+
originalOrder[deviceScalarIdx++] = origIdx;
959+
} else {
960+
// Pointer - goes to back of device args
961+
originalOrder[devicePtrIdx++] = origIdx;
962+
}
963+
}
964+
}
965+
}
966+
900967
// Determine output directory
901968
SmallString<256> outDir;
902969
if (outputDir.empty()) {
@@ -905,7 +972,7 @@ static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
905972
outDir = outputDir;
906973
}
907974

908-
// Write JSON metadata file
975+
// Write JSON metadata file (with original order mapping if available)
909976
{
910977
SmallString<256> jsonPath(outDir);
911978
llvm::sys::path::append(jsonPath, meta.kernelName + ".meta.json");
@@ -916,7 +983,7 @@ static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
916983
llvm::errs() << "Error writing metadata file " << jsonPath << ": "
917984
<< ec.message() << "\n";
918985
} else {
919-
outFile << generateMetadataJSON(meta);
986+
outFile << generateMetadataJSON(meta, originalOrder);
920987
outFile.close();
921988
llvm::outs() << "Wrote kernel metadata: " << jsonPath << "\n";
922989
}
@@ -958,6 +1025,28 @@ struct ConvertGPUToVortexPass
9581025
MLIRContext *context = &getContext();
9591026
ModuleOp module = getOperation();
9601027

1028+
// FIRST: Build argument order map from host wrapper functions BEFORE any changes
1029+
// This maps kernel base name -> list of (isPointer, type) for original args
1030+
llvm::StringMap<std::vector<bool>> originalArgIsPointer;
1031+
1032+
// Find host wrapper functions (func.func @__polygeist_launch_<name>)
1033+
for (auto funcOp : module.getOps<func::FuncOp>()) {
1034+
StringRef funcName = funcOp.getName();
1035+
if (!funcName.startswith("__polygeist_launch_"))
1036+
continue;
1037+
1038+
// Host wrapper args: user args... + blocks + threads (last 2 are launch params)
1039+
auto hostArgTypes = funcOp.getArgumentTypes();
1040+
unsigned numHostUserArgs = hostArgTypes.size() > 2 ? hostArgTypes.size() - 2 : 0;
1041+
1042+
std::vector<bool> isPointerVec;
1043+
for (unsigned i = 0; i < numHostUserArgs; ++i) {
1044+
isPointerVec.push_back(hostArgTypes[i].isa<MemRefType>() ||
1045+
hostArgTypes[i].isa<LLVM::LLVMPointerType>());
1046+
}
1047+
originalArgIsPointer[funcName] = std::move(isPointerVec);
1048+
}
1049+
9611050
// PREPROCESSING: Consolidate Polygeist auto-tuning artifacts
9621051
// This must happen before any conversion patterns are applied
9631052
consolidatePolygeistAlternatives(module);
@@ -967,10 +1056,11 @@ struct ConvertGPUToVortexPass
9671056
// Files are written to current working directory:
9681057
// - <kernel_name>.meta.json (for runtime dynamic loading)
9691058
// - <kernel_name>_args.h (for compile-time type-safe usage)
1059+
// Pass pre-built argument order map for original argument positions
9701060
module.walk([&](gpu::GPUModuleOp gpuModule) {
9711061
for (auto gpuFunc : gpuModule.getOps<gpu::GPUFuncOp>()) {
9721062
if (gpuFunc.isKernel()) {
973-
emitKernelMetadata(gpuFunc, "" /* use current directory */);
1063+
emitKernelMetadata(gpuFunc, "" /* use current directory */, originalArgIsPointer);
9741064
}
9751065
}
9761066
});

0 commit comments

Comments
 (0)