@@ -54,15 +54,30 @@ constexpr uint32_t VX_CSR_LOCAL_MEM_BASE = 0xFC3;
5454
5555// / Extract base kernel name by removing Polygeist variant suffix
5656// / Example: _Z12launch_basicPiS_ji_kernel94565344022848 -> _Z12launch_basicPiS_ji
57+ // / Example: __polygeist_launch_vecadd_kernel_kernel94... -> __polygeist_launch_vecadd_kernel
5758static StringRef extractBaseKernelName (StringRef mangledName) {
58- size_t pos = mangledName.find (" _kernel" );
59- if (pos != StringRef::npos) {
60- // Find where the numeric suffix starts after "_kernel"
59+ // Search from the end for "_kernel" followed by digits
60+ // This handles cases like "vecadd_kernel_kernel94..." where the kernel name
61+ // itself contains "_kernel"
62+ size_t searchStart = 0 ;
63+ size_t lastValidPos = StringRef::npos;
64+
65+ while (true ) {
66+ size_t pos = mangledName.find (" _kernel" , searchStart);
67+ if (pos == StringRef::npos)
68+ break ;
69+
6170 size_t suffixStart = pos + 7 ; // Length of "_kernel"
6271 if (suffixStart < mangledName.size () &&
6372 std::isdigit (mangledName[suffixStart])) {
64- return mangledName.substr (0 , pos);
73+ // Found "_kernel" followed by digit - this is a potential suffix
74+ lastValidPos = pos;
6575 }
76+ searchStart = pos + 1 ;
77+ }
78+
79+ if (lastValidPos != StringRef::npos) {
80+ return mangledName.substr (0 , lastValidPos);
6681 }
6782 return mangledName;
6883}
@@ -812,7 +827,8 @@ static std::string generateKernelArgsHeader(const KernelMetadata &meta) {
812827}
813828
814829// / Generate JSON string for kernel metadata (for runtime dynamic loading)
815- static std::string generateMetadataJSON (const KernelMetadata &meta) {
830+ static std::string generateMetadataJSON (const KernelMetadata &meta,
831+ const std::vector<unsigned > &originalOrder = {}) {
816832 std::ostringstream json;
817833 json << " {\n " ;
818834 json << " \" kernel_name\" : \" " << meta.kernelName << " \" ,\n " ;
@@ -834,6 +850,19 @@ static std::string generateMetadataJSON(const KernelMetadata &meta) {
834850
835851 json << " ],\n " ;
836852 json << " \" total_args_size\" : " << meta.totalArgsSize << " ,\n " ;
853+
854+ // Include original argument order mapping if available
855+ // This maps from original (hipLaunchKernelGGL) order to device order
856+ if (!originalOrder.empty ()) {
857+ json << " \" original_arg_order\" : [" ;
858+ for (size_t i = 0 ; i < originalOrder.size (); ++i) {
859+ json << originalOrder[i];
860+ if (i < originalOrder.size () - 1 )
861+ json << " , " ;
862+ }
863+ json << " ],\n " ;
864+ }
865+
837866 json << " \" architecture\" : \" rv32\"\n " ;
838867 json << " }\n " ;
839868
@@ -843,8 +872,10 @@ static std::string generateMetadataJSON(const KernelMetadata &meta) {
843872// / Extract metadata from a GPU function and write metadata files
844873// / Generates both .meta.json (for runtime) and _args.h (for compile-time)
845874// / If outputDir is empty, uses current working directory
875+ // / Uses pre-built originalArgIsPointer map for computing argument order mapping
846876static void emitKernelMetadata (gpu::GPUFuncOp funcOp,
847- StringRef outputDir) {
877+ StringRef outputDir,
878+ const llvm::StringMap<std::vector<bool >> &originalArgIsPointer) {
848879 if (!funcOp.isKernel ())
849880 return ;
850881
@@ -897,6 +928,42 @@ static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
897928
898929 meta.totalArgsSize = offset;
899930
931+ // Look up pre-computed original argument types from host wrapper
932+ // Base name should match the host wrapper function name
933+ std::vector<unsigned > originalOrder;
934+
935+ auto it = originalArgIsPointer.find (baseName);
936+ if (it != originalArgIsPointer.end ()) {
937+ const std::vector<bool > &hostIsPointer = it->second ;
938+
939+ if (hostIsPointer.size () == meta.arguments .size ()) {
940+ // Build mapping from original order to device order
941+ // Device order: scalars first, then pointers (preserving relative order)
942+ // Original order: as declared in kernel signature
943+
944+ // Count scalars in host (original) order
945+ unsigned numScalars = 0 ;
946+ for (bool isPtr : hostIsPointer) {
947+ if (!isPtr) numScalars++;
948+ }
949+
950+ // Build the mapping: original_arg_order[device_idx] = original_idx
951+ originalOrder.resize (hostIsPointer.size ());
952+ unsigned deviceScalarIdx = 0 ;
953+ unsigned devicePtrIdx = numScalars;
954+
955+ for (unsigned origIdx = 0 ; origIdx < hostIsPointer.size (); ++origIdx) {
956+ if (!hostIsPointer[origIdx]) {
957+ // Scalar - goes to front of device args
958+ originalOrder[deviceScalarIdx++] = origIdx;
959+ } else {
960+ // Pointer - goes to back of device args
961+ originalOrder[devicePtrIdx++] = origIdx;
962+ }
963+ }
964+ }
965+ }
966+
900967 // Determine output directory
901968 SmallString<256 > outDir;
902969 if (outputDir.empty ()) {
@@ -905,7 +972,7 @@ static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
905972 outDir = outputDir;
906973 }
907974
908- // Write JSON metadata file
975+ // Write JSON metadata file (with original order mapping if available)
909976 {
910977 SmallString<256 > jsonPath (outDir);
911978 llvm::sys::path::append (jsonPath, meta.kernelName + " .meta.json" );
@@ -916,7 +983,7 @@ static void emitKernelMetadata(gpu::GPUFuncOp funcOp,
916983 llvm::errs () << " Error writing metadata file " << jsonPath << " : "
917984 << ec.message () << " \n " ;
918985 } else {
919- outFile << generateMetadataJSON (meta);
986+ outFile << generateMetadataJSON (meta, originalOrder );
920987 outFile.close ();
921988 llvm::outs () << " Wrote kernel metadata: " << jsonPath << " \n " ;
922989 }
@@ -958,6 +1025,28 @@ struct ConvertGPUToVortexPass
9581025 MLIRContext *context = &getContext ();
9591026 ModuleOp module = getOperation ();
9601027
1028+ // FIRST: Build argument order map from host wrapper functions BEFORE any changes
1029+ // This maps kernel base name -> list of (isPointer, type) for original args
1030+ llvm::StringMap<std::vector<bool >> originalArgIsPointer;
1031+
1032+ // Find host wrapper functions (func.func @__polygeist_launch_<name>)
1033+ for (auto funcOp : module .getOps <func::FuncOp>()) {
1034+ StringRef funcName = funcOp.getName ();
1035+ if (!funcName.startswith (" __polygeist_launch_" ))
1036+ continue ;
1037+
1038+ // Host wrapper args: user args... + blocks + threads (last 2 are launch params)
1039+ auto hostArgTypes = funcOp.getArgumentTypes ();
1040+ unsigned numHostUserArgs = hostArgTypes.size () > 2 ? hostArgTypes.size () - 2 : 0 ;
1041+
1042+ std::vector<bool > isPointerVec;
1043+ for (unsigned i = 0 ; i < numHostUserArgs; ++i) {
1044+ isPointerVec.push_back (hostArgTypes[i].isa <MemRefType>() ||
1045+ hostArgTypes[i].isa <LLVM::LLVMPointerType>());
1046+ }
1047+ originalArgIsPointer[funcName] = std::move (isPointerVec);
1048+ }
1049+
9611050 // PREPROCESSING: Consolidate Polygeist auto-tuning artifacts
9621051 // This must happen before any conversion patterns are applied
9631052 consolidatePolygeistAlternatives (module );
@@ -967,10 +1056,11 @@ struct ConvertGPUToVortexPass
9671056 // Files are written to current working directory:
9681057 // - <kernel_name>.meta.json (for runtime dynamic loading)
9691058 // - <kernel_name>_args.h (for compile-time type-safe usage)
1059+ // Pass pre-built argument order map for original argument positions
9701060 module .walk ([&](gpu::GPUModuleOp gpuModule) {
9711061 for (auto gpuFunc : gpuModule.getOps <gpu::GPUFuncOp>()) {
9721062 if (gpuFunc.isKernel ()) {
973- emitKernelMetadata (gpuFunc, " " /* use current directory */ );
1063+ emitKernelMetadata (gpuFunc, " " /* use current directory */ , originalArgIsPointer );
9741064 }
9751065 }
9761066 });
0 commit comments