Skip to content

Commit 36f98f5

Browse files
authored
Support cudaGetDeviceProperties hip translation (#350)
1 parent b894587 commit 36f98f5

File tree

3 files changed

+530
-69
lines changed

3 files changed

+530
-69
lines changed

lib/polygeist/ExecutionEngine/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ if(POLYGEIST_ENABLE_CUDA)
1818
-O3 -nocudalib -Xclang -no-opaque-pointers
1919
)
2020

21+
set(cuda_includes
22+
-I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
23+
)
24+
2125
foreach(src ${src_files})
2226
get_filename_component(infile ${src} ABSOLUTE)
2327
get_filename_component(filename ${src} NAME)
@@ -30,10 +34,11 @@ if(POLYGEIST_ENABLE_CUDA)
3034
COMMAND ${CLANG_TOOL}
3135
${bc_flags}
3236
${infile} -o ${bc_outfile}
33-
-I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
37+
${cuda_includes}
3438
-DPOLYGEIST_PGO_DEFAULT_DATA_DIR="${POLYGEIST_PGO_DEFAULT_DATA_DIR}"
3539
-DPOLYGEIST_PGO_ALTERNATIVE_ENV_VAR="${POLYGEIST_PGO_ALTERNATIVE_ENV_VAR}"
3640
-DPOLYGEIST_PGO_DATA_DIR_ENV_VAR="${POLYGEIST_PGO_DATA_DIR_ENV_VAR}"
41+
-DPOLYGEIST_ENABLE_CUDA=${POLYGEIST_ENABLE_CUDA}
3742
DEPENDS ${infile}
3843
COMMENT "Building LLVM bitcode ${bc_outfile}"
3944
VERBATIM
@@ -81,6 +86,7 @@ if(POLYGEIST_ENABLE_ROCM)
8186
-DPOLYGEIST_PGO_DEFAULT_DATA_DIR="${POLYGEIST_PGO_DEFAULT_DATA_DIR}"
8287
-DPOLYGEIST_PGO_ALTERNATIVE_ENV_VAR="${POLYGEIST_PGO_ALTERNATIVE_ENV_VAR}"
8388
-DPOLYGEIST_PGO_DATA_DIR_ENV_VAR="${POLYGEIST_PGO_DATA_DIR_ENV_VAR}"
89+
-DPOLYGEIST_ENABLE_CUDA=${POLYGEIST_ENABLE_CUDA}
8490
)
8591

8692
foreach(src ${src_files})
@@ -92,6 +98,7 @@ if(POLYGEIST_ENABLE_ROCM)
9298
add_custom_command(OUTPUT ${bc_outfile}
9399
COMMAND ${CLANG_TOOL}
94100
${bc_flags}
101+
${cuda_includes}
95102
${infile} -o ${bc_outfile}
96103
DEPENDS ${infile}
97104
COMMENT "Building LLVM bitcode ${bc_outfile}"

lib/polygeist/ExecutionEngine/RocmRuntimeWrappers.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,101 @@ extern "C" MLIR_HIP_WRAPPERS_EXPORT void
130130
__mgpurtUnregisterFatBinary(void **fatCubinHandle) {
131131
return __hipUnregisterFatBinary(fatCubinHandle);
132132
}
133+
134+
#if POLYGEIST_ENABLE_CUDA
135+
136+
#pragma push_macro("__forceinline__")
137+
#define __VECTOR_TYPES_H__
138+
#include <cuda_runtime_api.h>
139+
#undef __VECTOR_TYPES_H__
140+
#pragma pop_macro("__forceinline__")
141+
142+
extern "C" MLIR_HIP_WRAPPERS_EXPORT int32_t
143+
mgpurtCudaGetDeviceProperties(struct cudaDeviceProp *cudaProp, int device) {
144+
struct hipDeviceProp_t hipProp;
145+
int err = ERR_HIP_REPORT_IF_ERROR(hipGetDeviceProperties(&hipProp, device));
146+
147+
// Reassign all corresponding fields to the hip props, the commented ones dont
148+
// exist in hip one-for-one
149+
#define __polygeist_assign_field(f) \
150+
memcpy(&(cudaProp->f), &(hipProp.f), sizeof(cudaProp->f))
151+
__polygeist_assign_field(name);
152+
// __polygeist_assign_field(uuid);
153+
__polygeist_assign_field(totalGlobalMem);
154+
__polygeist_assign_field(sharedMemPerBlock);
155+
__polygeist_assign_field(regsPerBlock);
156+
__polygeist_assign_field(warpSize);
157+
__polygeist_assign_field(memPitch);
158+
__polygeist_assign_field(maxThreadsPerBlock);
159+
__polygeist_assign_field(maxThreadsDim);
160+
__polygeist_assign_field(maxGridSize);
161+
__polygeist_assign_field(clockRate);
162+
__polygeist_assign_field(totalConstMem);
163+
__polygeist_assign_field(major);
164+
__polygeist_assign_field(minor);
165+
__polygeist_assign_field(textureAlignment);
166+
__polygeist_assign_field(texturePitchAlignment);
167+
// __polygeist_assign_field(deviceOverlap);
168+
__polygeist_assign_field(multiProcessorCount);
169+
__polygeist_assign_field(kernelExecTimeoutEnabled);
170+
__polygeist_assign_field(integrated);
171+
__polygeist_assign_field(canMapHostMemory);
172+
__polygeist_assign_field(computeMode);
173+
__polygeist_assign_field(maxTexture1D);
174+
// __polygeist_assign_field(maxTexture1DMipmap);
175+
__polygeist_assign_field(maxTexture1DLinear);
176+
__polygeist_assign_field(maxTexture2D);
177+
// __polygeist_assign_field(maxTexture2DMipmap);
178+
// __polygeist_assign_field(maxTexture2DLinear);
179+
// __polygeist_assign_field(maxTexture2DGather);
180+
__polygeist_assign_field(maxTexture3D);
181+
// __polygeist_assign_field(maxTexture3DAlt);
182+
// __polygeist_assign_field(maxTextureCubemap);
183+
// __polygeist_assign_field(maxTexture1DLayered);
184+
// __polygeist_assign_field(maxTexture2DLayered);
185+
// __polygeist_assign_field(maxTextureCubemapLayered);
186+
// __polygeist_assign_field(maxSurface1D);
187+
// __polygeist_assign_field(maxSurface2D);
188+
// __polygeist_assign_field(maxSurface3D);
189+
// __polygeist_assign_field(maxSurface1DLayered);
190+
// __polygeist_assign_field(maxSurface2DLayered);
191+
// __polygeist_assign_field(maxSurfaceCubemap);
192+
// __polygeist_assign_field(maxSurfaceCubemapLayered);
193+
// __polygeist_assign_field(surfaceAlignment);
194+
__polygeist_assign_field(concurrentKernels);
195+
__polygeist_assign_field(ECCEnabled);
196+
__polygeist_assign_field(pciBusID);
197+
__polygeist_assign_field(pciDeviceID);
198+
__polygeist_assign_field(pciDomainID);
199+
__polygeist_assign_field(tccDriver);
200+
// __polygeist_assign_field(asyncEngineCount);
201+
// __polygeist_assign_field(unifiedAddressing);
202+
__polygeist_assign_field(memoryClockRate);
203+
__polygeist_assign_field(memoryBusWidth);
204+
__polygeist_assign_field(l2CacheSize);
205+
// __polygeist_assign_field(persistingL2CacheMaxSize);
206+
__polygeist_assign_field(maxThreadsPerMultiProcessor);
207+
// __polygeist_assign_field(streamPrioritiesSupported);
208+
// __polygeist_assign_field(globalL1CacheSupported);
209+
// __polygeist_assign_field(localL1CacheSupported);
210+
// __polygeist_assign_field(sharedMemPerMultiprocessor);
211+
// __polygeist_assign_field(regsPerMultiprocessor);
212+
__polygeist_assign_field(managedMemory);
213+
__polygeist_assign_field(isMultiGpuBoard);
214+
// __polygeist_assign_field(multiGpuBoardGroupID);
215+
// __polygeist_assign_field(singleToDoublePrecisionPerfRatio);
216+
__polygeist_assign_field(pageableMemoryAccess);
217+
__polygeist_assign_field(concurrentManagedAccess);
218+
// __polygeist_assign_field(computePreemptionSupported);
219+
// __polygeist_assign_field(canUseHostPointerForRegisteredMem);
220+
__polygeist_assign_field(cooperativeLaunch);
221+
__polygeist_assign_field(cooperativeMultiDeviceLaunch);
222+
__polygeist_assign_field(pageableMemoryAccessUsesHostPageTables);
223+
__polygeist_assign_field(directManagedMemAccessFromHost);
224+
// __polygeist_assign_field(accessPolicyMaxWindowSize);
225+
#undef __polygeist_assign_field
226+
227+
return err;
228+
}
229+
230+
#endif

0 commit comments

Comments
 (0)