|
61 | 61 | } while (false)
|
62 | 62 | #endif // OMPTARGET_DEBUG
|
63 | 63 |
|
| 64 | +#define BOOL2TEXT(b) ((b) ? "Yes" : "No") |
| 65 | + |
64 | 66 | #include "elf_common.h"
|
65 | 67 |
|
66 | 68 | /// Keep entries table per device.
|
@@ -1157,6 +1159,178 @@ class DeviceRTLTy {
|
1157 | 1159 | }
|
1158 | 1160 | return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL;
|
1159 | 1161 | }
|
| 1162 | + |
| 1163 | + void printDeviceInfo(int32_t device_id) { |
| 1164 | + char TmpChar[1000]; |
| 1165 | + std::string TmpStr; |
| 1166 | + size_t TmpSt; |
| 1167 | + int TmpInt, TmpInt2, TmpInt3; |
| 1168 | + |
| 1169 | + CUdevice Device; |
| 1170 | + checkResult(cuDeviceGet(&Device, device_id), |
| 1171 | + "Error returned from cuCtxGetDevice\n"); |
| 1172 | + |
| 1173 | + cuDriverGetVersion(&TmpInt); |
| 1174 | + printf(" CUDA Driver Version: \t\t%d \n", TmpInt); |
| 1175 | + printf(" CUDA Device Number: \t\t%d \n", device_id); |
| 1176 | + checkResult(cuDeviceGetName(TmpChar, 1000, Device), |
| 1177 | + "Error returned from cuDeviceGetName\n"); |
| 1178 | + printf(" Device Name: \t\t\t%s \n", TmpChar); |
| 1179 | + checkResult(cuDeviceTotalMem(&TmpSt, Device), |
| 1180 | + "Error returned from cuDeviceTotalMem\n"); |
| 1181 | + printf(" Global Memory Size: \t\t%zu bytes \n", TmpSt); |
| 1182 | + checkResult(cuDeviceGetAttribute( |
| 1183 | + &TmpInt, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, Device), |
| 1184 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1185 | + printf(" Number of Multiprocessors: \t\t%d \n", TmpInt); |
| 1186 | + checkResult( |
| 1187 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, Device), |
| 1188 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1189 | + printf(" Concurrent Copy and Execution: \t%s \n", BOOL2TEXT(TmpInt)); |
| 1190 | + checkResult(cuDeviceGetAttribute( |
| 1191 | + &TmpInt, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, Device), |
| 1192 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1193 | + printf(" Total Constant Memory: \t\t%d bytes\n", TmpInt); |
| 1194 | + checkResult( |
| 1195 | + cuDeviceGetAttribute( |
| 1196 | + &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, Device), |
| 1197 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1198 | + printf(" Max Shared Memory per Block: \t%d bytes \n", TmpInt); |
| 1199 | + checkResult(cuDeviceGetAttribute( |
| 1200 | + &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, Device), |
| 1201 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1202 | + printf(" Registers per Block: \t\t%d \n", TmpInt); |
| 1203 | + checkResult( |
| 1204 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_WARP_SIZE, Device), |
| 1205 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1206 | + printf(" Warp Size: \t\t\t\t%d Threads \n", TmpInt); |
| 1207 | + checkResult(cuDeviceGetAttribute( |
| 1208 | + &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Device), |
| 1209 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1210 | + printf(" Maximum Threads per Block: \t\t%d \n", TmpInt); |
| 1211 | + checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, |
| 1212 | + Device), |
| 1213 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1214 | + checkResult(cuDeviceGetAttribute(&TmpInt2, |
| 1215 | + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, Device), |
| 1216 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1217 | + checkResult(cuDeviceGetAttribute(&TmpInt3, |
| 1218 | + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, Device), |
| 1219 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1220 | + printf(" Maximum Block Dimensions: \t\t%d, %d, %d \n", TmpInt, TmpInt2, |
| 1221 | + TmpInt3); |
| 1222 | + checkResult( |
| 1223 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, Device), |
| 1224 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1225 | + checkResult(cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, |
| 1226 | + Device), |
| 1227 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1228 | + checkResult(cuDeviceGetAttribute(&TmpInt3, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, |
| 1229 | + Device), |
| 1230 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1231 | + printf(" Maximum Grid Dimensions: \t\t%d x %d x %d \n", TmpInt, TmpInt2, |
| 1232 | + TmpInt3); |
| 1233 | + checkResult( |
| 1234 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_PITCH, Device), |
| 1235 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1236 | + printf(" Maximum Memory Pitch: \t\t%d bytes \n", TmpInt); |
| 1237 | + checkResult(cuDeviceGetAttribute( |
| 1238 | + &TmpInt, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, Device), |
| 1239 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1240 | + printf(" Texture Alignment: \t\t\t%d bytes \n", TmpInt); |
| 1241 | + checkResult( |
| 1242 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, Device), |
| 1243 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1244 | + printf(" Clock Rate: \t\t\t%d kHz\n", TmpInt); |
| 1245 | + checkResult(cuDeviceGetAttribute( |
| 1246 | + &TmpInt, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, Device), |
| 1247 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1248 | + printf(" Execution Timeout: \t\t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1249 | + checkResult( |
| 1250 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_INTEGRATED, Device), |
| 1251 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1252 | + printf(" Integrated Device: \t\t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1253 | + checkResult(cuDeviceGetAttribute( |
| 1254 | + &TmpInt, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, Device), |
| 1255 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1256 | + printf(" Can Map Host Memory: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1257 | + checkResult( |
| 1258 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, Device), |
| 1259 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1260 | + if (TmpInt == CU_COMPUTEMODE_DEFAULT) |
| 1261 | + TmpStr = "DEFAULT"; |
| 1262 | + else if (TmpInt == CU_COMPUTEMODE_PROHIBITED) |
| 1263 | + TmpStr = "PROHIBITED"; |
| 1264 | + else if (TmpInt == CU_COMPUTEMODE_EXCLUSIVE_PROCESS) |
| 1265 | + TmpStr = "EXCLUSIVE PROCESS"; |
| 1266 | + else |
| 1267 | + TmpStr = "unknown"; |
| 1268 | + printf(" Compute Mode: \t\t\t%s \n", TmpStr.c_str()); |
| 1269 | + checkResult(cuDeviceGetAttribute( |
| 1270 | + &TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, Device), |
| 1271 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1272 | + printf(" Concurrent Kernels: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1273 | + checkResult( |
| 1274 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, Device), |
| 1275 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1276 | + printf(" ECC Enabled: \t\t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1277 | + checkResult(cuDeviceGetAttribute( |
| 1278 | + &TmpInt, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, Device), |
| 1279 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1280 | + printf(" Memory Clock Rate: \t\t\t%d kHz\n", TmpInt); |
| 1281 | + checkResult(cuDeviceGetAttribute( |
| 1282 | + &TmpInt, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, Device), |
| 1283 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1284 | + printf(" Memory Bus Width: \t\t\t%d bits\n", TmpInt); |
| 1285 | + checkResult( |
| 1286 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, Device), |
| 1287 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1288 | + printf(" L2 Cache Size: \t\t\t%d bytes \n", TmpInt); |
| 1289 | + checkResult( |
| 1290 | + cuDeviceGetAttribute( |
| 1291 | + &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, Device), |
| 1292 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1293 | + printf(" Max Threads Per SMP: \t\t%d \n", TmpInt); |
| 1294 | + checkResult(cuDeviceGetAttribute( |
| 1295 | + &TmpInt, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, Device), |
| 1296 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1297 | + printf(" Async Engines: \t\t\t%s (%d) \n", BOOL2TEXT(TmpInt), TmpInt); |
| 1298 | + checkResult(cuDeviceGetAttribute( |
| 1299 | + &TmpInt, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, Device), |
| 1300 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1301 | + printf(" Unified Addressing: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1302 | + checkResult( |
| 1303 | + cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, Device), |
| 1304 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1305 | + printf(" Managed Memory: \t\t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1306 | + checkResult( |
| 1307 | + cuDeviceGetAttribute( |
| 1308 | + &TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, Device), |
| 1309 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1310 | + printf(" Concurrent Managed Memory: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1311 | + checkResult( |
| 1312 | + cuDeviceGetAttribute( |
| 1313 | + &TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, Device), |
| 1314 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1315 | + printf(" Preemption Supported: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1316 | + checkResult(cuDeviceGetAttribute( |
| 1317 | + &TmpInt, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, Device), |
| 1318 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1319 | + printf(" Cooperative Launch: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1320 | + checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, |
| 1321 | + Device), |
| 1322 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1323 | + printf(" Multi-Device Boars: \t\t%s \n", BOOL2TEXT(TmpInt)); |
| 1324 | + checkResult(cuDeviceGetAttribute(&TmpInt, |
| 1325 | + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, |
| 1326 | + Device), |
| 1327 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1328 | + checkResult(cuDeviceGetAttribute(&TmpInt2, |
| 1329 | + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, |
| 1330 | + Device), |
| 1331 | + "Error returned from cuDeviceGetAttribute\n"); |
| 1332 | + printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2); |
| 1333 | + } |
1160 | 1334 | };
|
1161 | 1335 |
|
1162 | 1336 | DeviceRTLTy DeviceRTL;
|
@@ -1357,6 +1531,11 @@ void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
|
1357 | 1531 | InfoLevel.store(NewInfoLevel);
|
1358 | 1532 | }
|
1359 | 1533 |
|
| 1534 | +void __tgt_rtl_print_device_info(int32_t device_id) { |
| 1535 | + assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); |
| 1536 | + DeviceRTL.printDeviceInfo(device_id); |
| 1537 | +} |
| 1538 | + |
1360 | 1539 | #ifdef __cplusplus
|
1361 | 1540 | }
|
1362 | 1541 | #endif
|
0 commit comments