Skip to content

Commit d2f85d0

Browse files
Jose M Monsalve Diazshiltian
authored andcommitted
[OpenMP][Libomptarget] Adding print_device_info to RTL and omptarget
This patch introduces a function in the device's plugin to print the device information. This patch relates to another patch that introduces a CLI tool to obtain the device information from the omplibrary directly. It is inspired by PGI's pgaccelinfo. The modifications are as follows: 1. Introduce the optional `void __tgt_rtl_print_device_info(RTLdevID)` function into the RTL. 2. Introduce the `bool __tgt_print_device_info(devID)` function into `omptarget` interface. Returns false if the RTL is not implemented 3. Added `bool printDeviceInfo(RTLDevID)` to the `DeviceTy` 4. Implement the `__tgt_rtl_print_device_info` for CUDA. Added additional CUDA Runtime calls. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106751
1 parent 5ab6aed commit d2f85d0

File tree

12 files changed

+216
-0
lines changed

12 files changed

+216
-0
lines changed

openmp/libomptarget/include/omptarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
333333

334334
void __tgt_set_info_flag(uint32_t);
335335

336+
int __tgt_print_device_info(int64_t device_id);
336337
#ifdef __cplusplus
337338
}
338339
#endif

openmp/libomptarget/include/omptargetplugin.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
142142
// Set plugin's internal information flag externally.
143143
void __tgt_rtl_set_info_flag(uint32_t);
144144

145+
// Print the device information
146+
void __tgt_rtl_print_device_info(int32_t ID);
147+
145148
#ifdef __cplusplus
146149
}
147150
#endif

openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ DLWRAP(cuDeviceGetAttribute, 3);
2828
DLWRAP(cuDeviceGetCount, 1);
2929
DLWRAP(cuFuncGetAttribute, 3);
3030

31+
// Device info
32+
DLWRAP(cuDeviceGetName, 3);
33+
DLWRAP(cuDeviceTotalMem, 2);
34+
DLWRAP(cuDriverGetVersion, 1);
35+
3136
DLWRAP(cuGetErrorString, 2);
3237
DLWRAP(cuLaunchKernel, 11);
3338

openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
7272
CUresult cuDeviceGetCount(int *);
7373
CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
7474

75+
// Device info
76+
CUresult cuDeviceGetName(char *, int, CUdevice *);
77+
CUresult cuDeviceTotalMem(size_t *, CUdevice *);
78+
CUresult cuDriverGetVersion(int *);
79+
7580
CUresult cuGetErrorString(CUresult, const char **);
7681
CUresult cuInit(unsigned);
7782
CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,

openmp/libomptarget/plugins/cuda/src/rtl.cpp

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
} while (false)
6262
#endif // OMPTARGET_DEBUG
6363

64+
#define BOOL2TEXT(b) ((b) ? "Yes" : "No")
65+
6466
#include "elf_common.h"
6567

6668
/// Keep entries table per device.
@@ -1157,6 +1159,178 @@ class DeviceRTLTy {
11571159
}
11581160
return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL;
11591161
}
1162+
1163+
void printDeviceInfo(int32_t device_id) {
1164+
char TmpChar[1000];
1165+
std::string TmpStr;
1166+
size_t TmpSt;
1167+
int TmpInt, TmpInt2, TmpInt3;
1168+
1169+
CUdevice Device;
1170+
checkResult(cuDeviceGet(&Device, device_id),
1171+
"Error returned from cuCtxGetDevice\n");
1172+
1173+
cuDriverGetVersion(&TmpInt);
1174+
printf(" CUDA Driver Version: \t\t%d \n", TmpInt);
1175+
printf(" CUDA Device Number: \t\t%d \n", device_id);
1176+
checkResult(cuDeviceGetName(TmpChar, 1000, Device),
1177+
"Error returned from cuDeviceGetName\n");
1178+
printf(" Device Name: \t\t\t%s \n", TmpChar);
1179+
checkResult(cuDeviceTotalMem(&TmpSt, Device),
1180+
"Error returned from cuDeviceTotalMem\n");
1181+
printf(" Global Memory Size: \t\t%zu bytes \n", TmpSt);
1182+
checkResult(cuDeviceGetAttribute(
1183+
&TmpInt, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, Device),
1184+
"Error returned from cuDeviceGetAttribute\n");
1185+
printf(" Number of Multiprocessors: \t\t%d \n", TmpInt);
1186+
checkResult(
1187+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, Device),
1188+
"Error returned from cuDeviceGetAttribute\n");
1189+
printf(" Concurrent Copy and Execution: \t%s \n", BOOL2TEXT(TmpInt));
1190+
checkResult(cuDeviceGetAttribute(
1191+
&TmpInt, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, Device),
1192+
"Error returned from cuDeviceGetAttribute\n");
1193+
printf(" Total Constant Memory: \t\t%d bytes\n", TmpInt);
1194+
checkResult(
1195+
cuDeviceGetAttribute(
1196+
&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, Device),
1197+
"Error returned from cuDeviceGetAttribute\n");
1198+
printf(" Max Shared Memory per Block: \t%d bytes \n", TmpInt);
1199+
checkResult(cuDeviceGetAttribute(
1200+
&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, Device),
1201+
"Error returned from cuDeviceGetAttribute\n");
1202+
printf(" Registers per Block: \t\t%d \n", TmpInt);
1203+
checkResult(
1204+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_WARP_SIZE, Device),
1205+
"Error returned from cuDeviceGetAttribute\n");
1206+
printf(" Warp Size: \t\t\t\t%d Threads \n", TmpInt);
1207+
checkResult(cuDeviceGetAttribute(
1208+
&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Device),
1209+
"Error returned from cuDeviceGetAttribute\n");
1210+
printf(" Maximum Threads per Block: \t\t%d \n", TmpInt);
1211+
checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
1212+
Device),
1213+
"Error returned from cuDeviceGetAttribute\n");
1214+
checkResult(cuDeviceGetAttribute(&TmpInt2,
1215+
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, Device),
1216+
"Error returned from cuDeviceGetAttribute\n");
1217+
checkResult(cuDeviceGetAttribute(&TmpInt3,
1218+
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, Device),
1219+
"Error returned from cuDeviceGetAttribute\n");
1220+
printf(" Maximum Block Dimensions: \t\t%d, %d, %d \n", TmpInt, TmpInt2,
1221+
TmpInt3);
1222+
checkResult(
1223+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, Device),
1224+
"Error returned from cuDeviceGetAttribute\n");
1225+
checkResult(cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
1226+
Device),
1227+
"Error returned from cuDeviceGetAttribute\n");
1228+
checkResult(cuDeviceGetAttribute(&TmpInt3, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
1229+
Device),
1230+
"Error returned from cuDeviceGetAttribute\n");
1231+
printf(" Maximum Grid Dimensions: \t\t%d x %d x %d \n", TmpInt, TmpInt2,
1232+
TmpInt3);
1233+
checkResult(
1234+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_PITCH, Device),
1235+
"Error returned from cuDeviceGetAttribute\n");
1236+
printf(" Maximum Memory Pitch: \t\t%d bytes \n", TmpInt);
1237+
checkResult(cuDeviceGetAttribute(
1238+
&TmpInt, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, Device),
1239+
"Error returned from cuDeviceGetAttribute\n");
1240+
printf(" Texture Alignment: \t\t\t%d bytes \n", TmpInt);
1241+
checkResult(
1242+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, Device),
1243+
"Error returned from cuDeviceGetAttribute\n");
1244+
printf(" Clock Rate: \t\t\t%d kHz\n", TmpInt);
1245+
checkResult(cuDeviceGetAttribute(
1246+
&TmpInt, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, Device),
1247+
"Error returned from cuDeviceGetAttribute\n");
1248+
printf(" Execution Timeout: \t\t\t%s \n", BOOL2TEXT(TmpInt));
1249+
checkResult(
1250+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_INTEGRATED, Device),
1251+
"Error returned from cuDeviceGetAttribute\n");
1252+
printf(" Integrated Device: \t\t\t%s \n", BOOL2TEXT(TmpInt));
1253+
checkResult(cuDeviceGetAttribute(
1254+
&TmpInt, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, Device),
1255+
"Error returned from cuDeviceGetAttribute\n");
1256+
printf(" Can Map Host Memory: \t\t%s \n", BOOL2TEXT(TmpInt));
1257+
checkResult(
1258+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, Device),
1259+
"Error returned from cuDeviceGetAttribute\n");
1260+
if (TmpInt == CU_COMPUTEMODE_DEFAULT)
1261+
TmpStr = "DEFAULT";
1262+
else if (TmpInt == CU_COMPUTEMODE_PROHIBITED)
1263+
TmpStr = "PROHIBITED";
1264+
else if (TmpInt == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
1265+
TmpStr = "EXCLUSIVE PROCESS";
1266+
else
1267+
TmpStr = "unknown";
1268+
printf(" Compute Mode: \t\t\t%s \n", TmpStr.c_str());
1269+
checkResult(cuDeviceGetAttribute(
1270+
&TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, Device),
1271+
"Error returned from cuDeviceGetAttribute\n");
1272+
printf(" Concurrent Kernels: \t\t%s \n", BOOL2TEXT(TmpInt));
1273+
checkResult(
1274+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, Device),
1275+
"Error returned from cuDeviceGetAttribute\n");
1276+
printf(" ECC Enabled: \t\t\t%s \n", BOOL2TEXT(TmpInt));
1277+
checkResult(cuDeviceGetAttribute(
1278+
&TmpInt, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, Device),
1279+
"Error returned from cuDeviceGetAttribute\n");
1280+
printf(" Memory Clock Rate: \t\t\t%d kHz\n", TmpInt);
1281+
checkResult(cuDeviceGetAttribute(
1282+
&TmpInt, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, Device),
1283+
"Error returned from cuDeviceGetAttribute\n");
1284+
printf(" Memory Bus Width: \t\t\t%d bits\n", TmpInt);
1285+
checkResult(
1286+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, Device),
1287+
"Error returned from cuDeviceGetAttribute\n");
1288+
printf(" L2 Cache Size: \t\t\t%d bytes \n", TmpInt);
1289+
checkResult(
1290+
cuDeviceGetAttribute(
1291+
&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, Device),
1292+
"Error returned from cuDeviceGetAttribute\n");
1293+
printf(" Max Threads Per SMP: \t\t%d \n", TmpInt);
1294+
checkResult(cuDeviceGetAttribute(
1295+
&TmpInt, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, Device),
1296+
"Error returned from cuDeviceGetAttribute\n");
1297+
printf(" Async Engines: \t\t\t%s (%d) \n", BOOL2TEXT(TmpInt), TmpInt);
1298+
checkResult(cuDeviceGetAttribute(
1299+
&TmpInt, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, Device),
1300+
"Error returned from cuDeviceGetAttribute\n");
1301+
printf(" Unified Addressing: \t\t%s \n", BOOL2TEXT(TmpInt));
1302+
checkResult(
1303+
cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, Device),
1304+
"Error returned from cuDeviceGetAttribute\n");
1305+
printf(" Managed Memory: \t\t\t%s \n", BOOL2TEXT(TmpInt));
1306+
checkResult(
1307+
cuDeviceGetAttribute(
1308+
&TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, Device),
1309+
"Error returned from cuDeviceGetAttribute\n");
1310+
printf(" Concurrent Managed Memory: \t\t%s \n", BOOL2TEXT(TmpInt));
1311+
checkResult(
1312+
cuDeviceGetAttribute(
1313+
&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, Device),
1314+
"Error returned from cuDeviceGetAttribute\n");
1315+
printf(" Preemption Supported: \t\t%s \n", BOOL2TEXT(TmpInt));
1316+
checkResult(cuDeviceGetAttribute(
1317+
&TmpInt, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, Device),
1318+
"Error returned from cuDeviceGetAttribute\n");
1319+
printf(" Cooperative Launch: \t\t%s \n", BOOL2TEXT(TmpInt));
1320+
checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD,
1321+
Device),
1322+
"Error returned from cuDeviceGetAttribute\n");
1323+
printf(" Multi-Device Boars: \t\t%s \n", BOOL2TEXT(TmpInt));
1324+
checkResult(cuDeviceGetAttribute(&TmpInt,
1325+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1326+
Device),
1327+
"Error returned from cuDeviceGetAttribute\n");
1328+
checkResult(cuDeviceGetAttribute(&TmpInt2,
1329+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
1330+
Device),
1331+
"Error returned from cuDeviceGetAttribute\n");
1332+
printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
1333+
}
11601334
};
11611335

11621336
DeviceRTLTy DeviceRTL;
@@ -1357,6 +1531,11 @@ void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
13571531
InfoLevel.store(NewInfoLevel);
13581532
}
13591533

1534+
void __tgt_rtl_print_device_info(int32_t device_id) {
1535+
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
1536+
DeviceRTL.printDeviceInfo(device_id);
1537+
}
1538+
13601539
#ifdef __cplusplus
13611540
}
13621541
#endif

openmp/libomptarget/plugins/exports

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ VERS1.0 {
2323
__tgt_rtl_unregister_lib;
2424
__tgt_rtl_supports_empty_images;
2525
__tgt_rtl_set_info_flag;
26+
__tgt_rtl_print_device_info;
2627
local:
2728
*;
2829
};

openmp/libomptarget/src/device.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,14 @@ int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr,
511511
TgtOffsets, TgtVarsSize, AsyncInfo);
512512
}
513513

514+
// Run region on device
515+
bool DeviceTy::printDeviceInfo(int32_t RTLDevId) {
516+
if (!RTL->print_device_info)
517+
return false;
518+
RTL->print_device_info(RTLDevId);
519+
return true;
520+
}
521+
514522
// Run team region on device.
515523
int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
516524
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,

openmp/libomptarget/src/device.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,10 @@ struct DeviceTy {
275275
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
276276
int32_t synchronize(AsyncInfoTy &AsyncInfo);
277277

278+
/// Calls the corresponding print in the \p RTLDEVID
279+
/// device RTL to obtain the information of the specific device.
280+
bool printDeviceInfo(int32_t RTLDevID);
281+
278282
private:
279283
// Call to RTL
280284
void init(); // To be called only via DeviceTy::initOnce()

openmp/libomptarget/src/exports

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ VERS1.0 {
4040
llvm_omp_target_alloc_shared;
4141
llvm_omp_target_alloc_device;
4242
__tgt_set_info_flag;
43+
__tgt_print_device_info;
4344
local:
4445
*;
4546
};

openmp/libomptarget/src/interface.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,3 +466,8 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
466466
R.set_info_flag(NewInfoLevel);
467467
}
468468
}
469+
470+
EXTERN int __tgt_print_device_info(int64_t device_id) {
471+
return PM->Devices[device_id].printDeviceInfo(
472+
PM->Devices[device_id].RTLDeviceID);
473+
}

0 commit comments

Comments
 (0)