Skip to content

Commit f24c64f

Browse files
authored
Amd/dev/rlieberm/reland offload (llvm#4327)
2 parents 0e9e394 + 0bb67e0 commit f24c64f

File tree

3 files changed

+262
-32
lines changed

3 files changed

+262
-32
lines changed

offload/tools/deviceinfo/CMakeLists.txt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@ add_openmp_tool(llvm-offload-device-info llvm-offload-device-info.cpp)
44

55
llvm_update_compile_flags(llvm-offload-device-info)
66

7-
target_include_directories(llvm-offload-device-info PRIVATE
8-
${LIBOMPTARGET_INCLUDE_DIR}
9-
)
107
target_link_libraries(llvm-offload-device-info PRIVATE
11-
omp
12-
omptarget
8+
LLVMOffload
139
)
Lines changed: 261 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,277 @@
1-
//===- llvm-offload-device-info.cpp - Device info as seen by LLVM/Offload -===//
1+
//===- llvm-offload-device-info.cpp - Print liboffload properties ---------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This is a command line utility that, by using LLVM/Offload, and the device
10-
// plugins, list devices information as seen by the runtime.
9+
// This is a command line utility that, by using the new liboffload API, prints
10+
// all devices and properties
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "omptarget.h"
15-
#include <cstdio>
14+
#include <OffloadAPI.h>
15+
#include <iostream>
16+
#include <vector>
1617

17-
int main(int argc, char **argv) {
18-
__tgt_bin_desc EmptyDesc = {0, nullptr, nullptr, nullptr};
19-
__tgt_register_lib(&EmptyDesc);
20-
__tgt_init_all_rtls();
18+
#define OFFLOAD_ERR(X) \
19+
if (auto Err = X) { \
20+
return Err; \
21+
}
22+
23+
enum class PrintKind {
24+
NORMAL,
25+
FP_FLAGS,
26+
};
27+
28+
template <typename T, PrintKind PK = PrintKind::NORMAL>
29+
void doWrite(std::ostream &S, T &&Val) {
30+
S << Val;
31+
}
32+
33+
template <>
34+
void doWrite<ol_platform_backend_t>(std::ostream &S,
35+
ol_platform_backend_t &&Val) {
36+
switch (Val) {
37+
case OL_PLATFORM_BACKEND_UNKNOWN:
38+
S << "UNKNOWN";
39+
break;
40+
case OL_PLATFORM_BACKEND_CUDA:
41+
S << "CUDA";
42+
break;
43+
case OL_PLATFORM_BACKEND_AMDGPU:
44+
S << "AMDGPU";
45+
break;
46+
case OL_PLATFORM_BACKEND_HOST:
47+
S << "HOST";
48+
break;
49+
default:
50+
S << "<< INVALID >>";
51+
break;
52+
}
53+
}
54+
template <>
55+
void doWrite<ol_device_type_t>(std::ostream &S, ol_device_type_t &&Val) {
56+
switch (Val) {
57+
case OL_DEVICE_TYPE_GPU:
58+
S << "GPU";
59+
break;
60+
case OL_DEVICE_TYPE_CPU:
61+
S << "CPU";
62+
break;
63+
case OL_DEVICE_TYPE_HOST:
64+
S << "HOST";
65+
break;
66+
default:
67+
S << "<< INVALID >>";
68+
break;
69+
}
70+
}
71+
template <>
72+
void doWrite<ol_dimensions_t>(std::ostream &S, ol_dimensions_t &&Val) {
73+
S << "{x: " << Val.x << ", y: " << Val.y << ", z: " << Val.z << "}";
74+
}
75+
template <>
76+
void doWrite<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
77+
std::ostream &S, ol_device_fp_capability_flags_t &&Val) {
78+
S << Val << " {";
79+
80+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) {
81+
S << " CORRECTLY_ROUNDED_DIVIDE_SQRT";
82+
}
83+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) {
84+
S << " ROUND_TO_NEAREST";
85+
}
86+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) {
87+
S << " ROUND_TO_ZERO";
88+
}
89+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) {
90+
S << " ROUND_TO_INF";
91+
}
92+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) {
93+
S << " INF_NAN";
94+
}
95+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_DENORM) {
96+
S << " DENORM";
97+
}
98+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_FMA) {
99+
S << " FMA";
100+
}
101+
if (Val & OL_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) {
102+
S << " SOFT_FLOAT";
103+
}
104+
105+
S << " }";
106+
}
21107

22-
printf("Found %d devices:\n", omp_get_num_devices());
23-
for (int Dev = 0; Dev < omp_get_num_devices(); Dev++) {
24-
printf(" Device %d:\n", Dev);
25-
if (!__tgt_print_device_info(Dev))
26-
printf(" print_device_info not implemented\n");
27-
printf("\n");
108+
template <typename T>
109+
ol_result_t printPlatformValue(std::ostream &S, ol_platform_handle_t Plat,
110+
ol_platform_info_t Info, const char *Desc) {
111+
S << Desc << ": ";
112+
113+
if constexpr (std::is_pointer_v<T>) {
114+
std::vector<uint8_t> Val;
115+
size_t Size;
116+
OFFLOAD_ERR(olGetPlatformInfoSize(Plat, Info, &Size));
117+
Val.resize(Size);
118+
OFFLOAD_ERR(olGetPlatformInfo(Plat, Info, sizeof(Val), Val.data()));
119+
doWrite(S, reinterpret_cast<T>(Val.data()));
120+
} else {
121+
T Val;
122+
OFFLOAD_ERR(olGetPlatformInfo(Plat, Info, sizeof(Val), &Val));
123+
doWrite(S, std::move(Val));
124+
}
125+
S << "\n";
126+
return OL_SUCCESS;
127+
}
128+
129+
template <typename T, PrintKind PK = PrintKind::NORMAL>
130+
ol_result_t printDeviceValue(std::ostream &S, ol_device_handle_t Dev,
131+
ol_device_info_t Info, const char *Desc,
132+
const char *Units = nullptr) {
133+
S << Desc << ": ";
134+
135+
if constexpr (std::is_pointer_v<T>) {
136+
std::vector<uint8_t> Val;
137+
size_t Size;
138+
OFFLOAD_ERR(olGetDeviceInfoSize(Dev, Info, &Size));
139+
Val.resize(Size);
140+
OFFLOAD_ERR(olGetDeviceInfo(Dev, Info, sizeof(Val), Val.data()));
141+
doWrite<T, PK>(S, reinterpret_cast<T>(Val.data()));
142+
} else {
143+
T Val;
144+
OFFLOAD_ERR(olGetDeviceInfo(Dev, Info, sizeof(Val), &Val));
145+
doWrite<T, PK>(S, std::move(Val));
146+
}
147+
if (Units)
148+
S << " " << Units;
149+
S << "\n";
150+
return OL_SUCCESS;
151+
}
152+
153+
ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
154+
ol_platform_handle_t Platform;
155+
OFFLOAD_ERR(
156+
olGetDeviceInfo(D, OL_DEVICE_INFO_PLATFORM, sizeof(Platform), &Platform));
157+
158+
std::vector<char> Name;
159+
size_t NameSize;
160+
OFFLOAD_ERR(olGetDeviceInfoSize(D, OL_DEVICE_INFO_PRODUCT_NAME, &NameSize))
161+
Name.resize(NameSize);
162+
OFFLOAD_ERR(
163+
olGetDeviceInfo(D, OL_DEVICE_INFO_PRODUCT_NAME, NameSize, Name.data()));
164+
S << "[" << Name.data() << "]\n";
165+
166+
OFFLOAD_ERR(printPlatformValue<const char *>(
167+
S, Platform, OL_PLATFORM_INFO_NAME, "Platform Name"));
168+
OFFLOAD_ERR(printPlatformValue<const char *>(
169+
S, Platform, OL_PLATFORM_INFO_VENDOR_NAME, "Platform Vendor Name"));
170+
OFFLOAD_ERR(printPlatformValue<const char *>(
171+
S, Platform, OL_PLATFORM_INFO_VERSION, "Platform Version"));
172+
OFFLOAD_ERR(printPlatformValue<ol_platform_backend_t>(
173+
S, Platform, OL_PLATFORM_INFO_BACKEND, "Platform Backend"));
174+
175+
OFFLOAD_ERR(
176+
printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_NAME, "Name"));
177+
OFFLOAD_ERR(printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_PRODUCT_NAME,
178+
"Product Name"));
179+
OFFLOAD_ERR(
180+
printDeviceValue<ol_device_type_t>(S, D, OL_DEVICE_INFO_TYPE, "Type"));
181+
OFFLOAD_ERR(printDeviceValue<const char *>(
182+
S, D, OL_DEVICE_INFO_DRIVER_VERSION, "Driver Version"));
183+
OFFLOAD_ERR(printDeviceValue<uint32_t>(
184+
S, D, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, "Max Work Group Size"));
185+
OFFLOAD_ERR(printDeviceValue<ol_dimensions_t>(
186+
S, D, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION,
187+
"Max Work Group Size Per Dimension"));
188+
OFFLOAD_ERR(printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_MAX_WORK_SIZE,
189+
"Max Work Size"));
190+
OFFLOAD_ERR(printDeviceValue<ol_dimensions_t>(
191+
S, D, OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION,
192+
"Max Work Size Per Dimension"));
193+
OFFLOAD_ERR(
194+
printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_VENDOR_ID, "Vendor ID"));
195+
OFFLOAD_ERR(printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_NUM_COMPUTE_UNITS,
196+
"Num Compute Units"));
197+
OFFLOAD_ERR(printDeviceValue<uint32_t>(
198+
S, D, OL_DEVICE_INFO_MAX_CLOCK_FREQUENCY, "Max Clock Frequency", "MHz"));
199+
OFFLOAD_ERR(printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_MEMORY_CLOCK_RATE,
200+
"Memory Clock Rate", "MHz"));
201+
OFFLOAD_ERR(printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_ADDRESS_BITS,
202+
"Address Bits"));
203+
OFFLOAD_ERR(printDeviceValue<uint64_t>(
204+
S, D, OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, "Max Mem Allocation Size", "B"));
205+
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
206+
"Global Mem Size", "B"));
207+
OFFLOAD_ERR(
208+
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
209+
S, D, OL_DEVICE_INFO_SINGLE_FP_CONFIG,
210+
"Single Precision Floating Point Capability")));
211+
OFFLOAD_ERR(
212+
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
213+
S, D, OL_DEVICE_INFO_DOUBLE_FP_CONFIG,
214+
"Double Precision Floating Point Capability")));
215+
OFFLOAD_ERR(
216+
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
217+
S, D, OL_DEVICE_INFO_HALF_FP_CONFIG,
218+
"Half Precision Floating Point Capability")));
219+
OFFLOAD_ERR(
220+
printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR,
221+
"Native Vector Width For Char"));
222+
OFFLOAD_ERR(
223+
printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT,
224+
"Native Vector Width For Short"));
225+
OFFLOAD_ERR(printDeviceValue<uint32_t>(S, D,
226+
OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT,
227+
"Native Vector Width For Int"));
228+
OFFLOAD_ERR(
229+
printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG,
230+
"Native Vector Width For Long"));
231+
OFFLOAD_ERR(
232+
printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT,
233+
"Native Vector Width For Float"));
234+
OFFLOAD_ERR(printDeviceValue<uint32_t>(
235+
S, D, OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE,
236+
"Native Vector Width For Double"));
237+
OFFLOAD_ERR(
238+
printDeviceValue<uint32_t>(S, D, OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF,
239+
"Native Vector Width For Half"));
240+
241+
return OL_SUCCESS;
242+
}
243+
244+
ol_result_t printRoot(std::ostream &S) {
245+
OFFLOAD_ERR(olInit());
246+
S << "Liboffload Version: " << OL_VERSION_MAJOR << "." << OL_VERSION_MINOR
247+
<< "." << OL_VERSION_PATCH << "\n";
248+
249+
std::vector<ol_device_handle_t> Devices;
250+
OFFLOAD_ERR(olIterateDevices(
251+
[](ol_device_handle_t Device, void *UserData) {
252+
reinterpret_cast<decltype(Devices) *>(UserData)->push_back(Device);
253+
return true;
254+
},
255+
&Devices));
256+
257+
S << "Num Devices: " << Devices.size() << "\n";
258+
259+
for (auto &D : Devices) {
260+
S << "\n";
261+
OFFLOAD_ERR(printDevice(S, D));
28262
}
29263

30-
__tgt_unregister_lib(&EmptyDesc);
264+
OFFLOAD_ERR(olShutDown());
265+
return OL_SUCCESS;
266+
}
267+
268+
int main(int argc, char **argv) {
269+
auto Err = printRoot(std::cout);
270+
271+
if (Err) {
272+
std::cerr << "[Liboffload error " << Err->Code << "]: " << Err->Details
273+
<< "\n";
274+
return 1;
275+
}
31276
return 0;
32277
}

revert_patches.txt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,11 @@
11
---
2-
Revert: breaks build of hipCUB
3-
commit 55783bd0 [HIP] fix host min/max in header (#82956)
4-
Sam
5-
---
6-
Revert "Recommit "[InstCombine] Expand `foldSelectICmpAndOr` -> `foldSelectICmpAndBinOp` to work for more binops" (3rd Try)"
7-
Backup fix for SWDEV-454675
8-
This reverts commit 54ec8bcaf
9-
contact: Selehov
10-
b2659ca44 [InstCombine] Propagate flags in `foldSelectICmpAndBinOp` (#127437)
11-
---
122
revert: breaks rocBLAS build
133
d57230c7 [AMDGPU][MC] Disallow op_sel in some VOP3P dot instructions (#100485)
144
---
155
Revert "[SLP]Support LShr as base for copyable elements"
166
breaks build rocPRIM
177
---
188
needs more offload patches
19-
[Offload] Port llvm-offload-device-info to new offload API (#155626)
209
[Offload] Implement 'olIsValidBinary' in offload
2110
---
2211
needs post merge ingtegration

0 commit comments

Comments
 (0)