Skip to content

Commit 992452c

Browse files
ThorBlronlieb
authored andcommitted
Extended OffloadArch's device detection algorithm to use HSA as fallback to detect AMD GPUs for which the PCI ID is yet unkown.
- Added new command line argument -hsa which enables the HSA detection algorithm. - Removed method getRuntimeCapabilities (no call-sites) - TODO: I will move the method isHomogeniousSystemOf to the plugins. I don't see any reason for this method to be implemented in OffloadArch. The method doesn't work if the PCI ID of a GPU is unknown. Change-Id: Ia0fd44f6d5786eaf513296ac4d731c00d92170d6
1 parent 4f6a577 commit 992452c

File tree

7 files changed

+366
-152
lines changed

7 files changed

+366
-152
lines changed

llvm/include/llvm/OffloadArch/OffloadArch.h

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef __LLVM_OFFLOAD_OFFLOADARCH_H__
1010
#define __LLVM_OFFLOAD_OFFLOADARCH_H__
1111

12+
#include "llvm/ADT/StringRef.h"
1213
#include <string>
1314
#include <vector>
1415
#include <cstdint>
@@ -22,28 +23,35 @@
2223
#define NVIDIA_SEARCH_PHRASE "DRIVER=nvidia"
2324
#define NVIDIA_PCIID_PHRASE "PCI_ID=10DE:"
2425

25-
///
26-
/// Called by libomptarget runtime to get runtime capabilities.
27-
int getRuntimeCapabilities(char *offload_arch_output_buffer,
28-
size_t offload_arch_output_buffer_size);
29-
3026
/// Get the vendor specified softeare capabilities of the current runtime
3127
/// The input vendor id selects the vendor function to call.
32-
std::string getVendorCapabilities(uint16_t vid, uint16_t devid, std::string oa);
28+
std::string getVendorCapabilities(
29+
std::pair<std::string, std::string> offloadarch);
3330

3431
/// Get the AMD specific software capabilities of the current runtime
3532
std::string getAMDGPUCapabilities(uint16_t vid, uint16_t devid, std::string oa);
33+
std::string getAMDGPUCapabilitiesForOffloadarch(std::string uuid);
34+
3635
/// Get the Nvidia specific software capabilities of the current runtime
3736
std::string getNVPTXCapabilities(uint16_t vid, uint16_t devid, std::string oa);
3837

3938
/// return requirements for each offload image in an application binary
4039
std::vector<std::string> getOffloadArchFromBinary(const std::string &fn);
4140

4241
/// return all offloadable pci-ids found in the system
43-
std::vector<std::string> getAllPCIIds();
42+
std::vector<std::pair<std::string, std::string>> getAllPCIIds(bool hsa_detection);
4443
/// return all offloadable pci-ids for a given vendor
45-
std::vector<std::string> getPCIIds(const char *driver_search_phrase,
46-
const char *pci_id_search_phrase);
44+
std::vector<std::pair<std::string, std::string>>
45+
getPCIIds(const char *driver_search_phrase, const char *pci_id_search_phrase);
46+
47+
/// return vendor specific offloadable GPUs found in the system Detection
48+
/// without using PCIIds
49+
bool IsAmdDeviceAvailable();
50+
void BindHsaMethodsAndInitHSA();
51+
std::vector<std::pair<std::string, std::string>>
52+
getAmdGpuDevices(const char *driver_search_phrase,
53+
const char *pci_id_search_phrase, bool hsa_detection);
54+
std::vector<std::pair<std::string, std::string>> runHsaDetection();
4755

4856
/// lookup function to return all pci-ids for an input codename
4957
std::vector<std::string> lookupCodename(std::string lookup_codename);
@@ -54,17 +62,17 @@ std::vector<std::string> lookupOffloadArch(std::string lookup_offload_arch);
5462
/// get the offload arch for VendorId-DeviceId
5563
std::string getOffloadArch(uint16_t VendorID, uint16_t DeviceID);
5664

57-
/// get the vendor specified codename VendorId-DeviceId
58-
std::string getCodename(uint16_t VendorID, uint16_t DeviceID);
65+
/// get the vendor specified offloadarch
66+
std::string getCodename(std::string offloadArch);
5967

60-
/// get the compilation triple for VendorId-DeviceId
61-
std::string getTriple(uint16_t VendorID, uint16_t DeviceID);
68+
/// get the compilation triple for offloadarch
69+
std::string getTriple(std::string offloadarch);
6270

6371
/// Utility to return contents of a file as a string
6472
std::string getFileContents(std::string fname);
6573

6674
/// \return true if the system only has devices with architecture \in arch
6775
/// false otherwise
68-
bool isHomogeneousSystemOf(std::string arch);
76+
[[deprecated]] bool isHomogeneousSystemOf(std::string arch);
6977

7078
#endif

llvm/lib/OffloadArch/OffloadArch.cpp

Lines changed: 82 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,28 @@ std::string getFileContents(std::string fname) {
4040
return file_contents;
4141
}
4242

43-
std::vector<std::string> getPCIIds(const char *driver_search_phrase,
44-
const char *pci_id_search_phrase) {
45-
std::vector<std::string> PCI_IDS;
43+
std::vector<std::pair<std::string, std::string>>
44+
getAmdGpuDevices(const char *driver_search_phrase,
45+
const char *pci_id_search_phrase, bool hsa_detection) {
46+
std::vector<std::pair<std::string, std::string>> offloadArchs;
47+
48+
if (!hsa_detection) {
49+
offloadArchs = getPCIIds(driver_search_phrase, pci_id_search_phrase);
50+
}
51+
52+
if (offloadArchs.empty()) {
53+
if (IsAmdDeviceAvailable()) {
54+
BindHsaMethodsAndInitHSA();
55+
}
56+
offloadArchs = runHsaDetection();
57+
}
58+
59+
return offloadArchs;
60+
}
61+
62+
std::vector<std::pair<std::string, std::string>>
63+
getPCIIds(const char *driver_search_phrase, const char *pci_id_search_phrase) {
64+
std::vector<std::pair<std::string, std::string>> PCI_IDS;
4665
#ifndef _WIN32
4766
char uevent_filename[MAXPATHSIZE];
4867
const char *sys_bus_pci_devices_dir = "/sys/bus/pci/devices";
@@ -61,8 +80,18 @@ std::vector<std::string> getPCIIds(const char *driver_search_phrase,
6180
std::size_t found_loc = file_contents.find(driver_search_phrase);
6281
if (found_loc != std::string::npos) {
6382
found_loc = file_contents.find(pci_id_search_phrase);
64-
if (found_loc != std::string::npos)
65-
PCI_IDS.push_back(file_contents.substr(found_loc + 7, 9));
83+
if (found_loc != std::string::npos) {
84+
std::string pci_id = file_contents.substr(found_loc + 7, 9);
85+
unsigned vid32, devid32;
86+
sscanf(pci_id.c_str(), "%x:%x", &vid32, &devid32);
87+
uint16_t vid = vid32;
88+
uint16_t devid = devid32;
89+
std::string offload_arch = getOffloadArch(vid, devid);
90+
91+
if (!offload_arch.empty()) {
92+
PCI_IDS.emplace_back(offload_arch, pci_id);
93+
}
94+
}
6695
}
6796
}
6897
} // end of foreach subdir
@@ -96,7 +125,7 @@ std::vector<std::string> lookupCodename(std::string lookup_codename) {
96125

97126
std::vector<std::string> lookupOffloadArch(std::string lookup_offload_arch) {
98127
std::vector<std::string> PCI_IDS;
99-
for (auto id2str : AOT_OFFLOADARCHS)
128+
for (auto id2str : AOT_OFFLOADARCHS) {
100129
if (lookup_offload_arch.compare(id2str.offloadarch) == 0)
101130
for (auto aot_table_entry : AOT_TABLE) {
102131
if (id2str.offloadarch_id == aot_table_entry.offloadarch_id) {
@@ -109,19 +138,17 @@ std::vector<std::string> lookupOffloadArch(std::string lookup_offload_arch) {
109138
PCI_IDS.push_back(std::string(&pci_id[0]));
110139
}
111140
}
141+
}
112142
return PCI_IDS;
113143
}
114144

115-
std::string getCodename(uint16_t VendorID, uint16_t DeviceID) {
116-
std::string retval;
117-
for (auto aot_table_entry : AOT_TABLE) {
118-
if ((VendorID == aot_table_entry.vendorid) &&
119-
(DeviceID == aot_table_entry.devid))
120-
for (auto id2str : AOT_CODENAMES)
121-
if (id2str.codename_id == aot_table_entry.codename_id)
122-
return std::string(id2str.codename);
123-
}
124-
return retval;
145+
std::string getCodename(std::string offloadArch) {
146+
147+
for (auto aot_table_entry : AOT_AMD_OFFLOADARCH_TO_CODENAME_TABLE)
148+
if (aot_table_entry.offloadarch == offloadArch)
149+
return std::string(aot_table_entry.codename);
150+
151+
return " ";
125152
}
126153

127154
std::string getOffloadArch(uint16_t VendorID, uint16_t DeviceID) {
@@ -136,71 +163,51 @@ std::string getOffloadArch(uint16_t VendorID, uint16_t DeviceID) {
136163
return retval;
137164
}
138165

139-
std::string getVendorCapabilities(uint16_t vid, uint16_t devid,
140-
std::string oa) {
166+
std::string
167+
getVendorCapabilities(std::pair<std::string, std::string> offloadarch) {
168+
169+
if (llvm::StringRef(offloadarch.first).starts_with_insensitive("gfx") &&
170+
llvm::StringRef(offloadarch.second).starts_with_insensitive("gpu")) {
171+
return getAMDGPUCapabilitiesForOffloadarch(offloadarch.second);
172+
}
173+
174+
std::string pci_id = offloadarch.second;
175+
unsigned vid, devid;
176+
sscanf(pci_id.c_str(), "%x:%x", &vid, &devid);
177+
141178
switch (vid) {
142179
case 0x1002:
143-
return getAMDGPUCapabilities(vid, devid, oa);
180+
return getAMDGPUCapabilities((uint16_t)vid, (uint16_t)devid,
181+
offloadarch.first);
144182
case 0x10de:
145-
return getNVPTXCapabilities(vid, devid, oa);
183+
return getNVPTXCapabilities((uint16_t)vid, (uint16_t)devid,
184+
offloadarch.first);
146185
}
147-
return nullptr;
186+
187+
return "";
148188
}
149189

150-
std::string getTriple(uint16_t VendorID, uint16_t DeviceID) {
151-
std::string retval;
152-
switch (VendorID) {
153-
case 0x1002:
190+
std::string getTriple(std::string offloadarch) {
191+
llvm::StringRef OffloadarchRef(offloadarch);
192+
193+
if (OffloadarchRef.starts_with_insensitive("gfx"))
154194
return (std::string("amdgcn-amd-amdhsa"));
155-
break;
156-
case 0x10de:
195+
196+
if (OffloadarchRef.starts_with_insensitive("sm"))
157197
return (std::string("nvptx64-nvidia-cuda"));
158-
break;
159-
}
160-
return retval;
161-
}
162198

163-
std::vector<std::string> getAllPCIIds() {
164-
std::vector<std::string> PCI_IDS =
165-
getPCIIds(AMDGPU_SEARCH_PHRASE, AMDGPU_PCIID_PHRASE);
166-
for (auto PCI_ID : getPCIIds(NVIDIA_SEARCH_PHRASE, NVIDIA_PCIID_PHRASE))
167-
PCI_IDS.push_back(PCI_ID);
168-
return PCI_IDS;
199+
return "";
169200
}
170201

171-
/// Get runtime capabilities of this system for libomptarget runtime
172-
int getRuntimeCapabilities(char *offload_arch_output_buffer,
173-
size_t offload_arch_output_buffer_size) {
174-
std::vector<std::string> PCI_IDS = getAllPCIIds();
175-
std::string offload_arch;
176-
for (auto PCI_ID : PCI_IDS) {
177-
unsigned vid32, devid32;
178-
sscanf(PCI_ID.c_str(), "%x:%x", &vid32, &devid32);
179-
uint16_t vid = vid32;
180-
uint16_t devid = devid32;
181-
offload_arch = getOffloadArch(vid, devid);
182-
if (offload_arch.empty()) {
183-
fprintf(stderr, "ERROR: offload-arch not found for %x:%x.\n", vid, devid);
184-
return 1;
185-
}
186-
std::string caps = getVendorCapabilities(vid, devid, offload_arch);
187-
std::size_t found_loc = caps.find("NOT-VISIBLE");
188-
if (found_loc == std::string::npos) {
189-
// Found first visible GPU, so append caps and exit loop
190-
offload_arch.clear();
191-
offload_arch = caps;
192-
break;
193-
}
194-
}
195-
size_t out_str_len = offload_arch.size();
196-
if (out_str_len > offload_arch_output_buffer_size) {
197-
fprintf(stderr, "ERROR: strlen %zd exceeds buffer length %zd \n",
198-
out_str_len, offload_arch_output_buffer_size);
199-
return 1;
200-
}
201-
strncpy(offload_arch_output_buffer, offload_arch.c_str(), out_str_len);
202-
offload_arch_output_buffer[out_str_len] = '\0'; // terminate string
203-
return 0;
202+
std::vector<std::pair<std::string, std::string>>
203+
getAllPCIIds(bool hsa_detection) {
204+
std::vector<std::pair<std::string, std::string>> PCI_IDS = getAmdGpuDevices(
205+
AMDGPU_SEARCH_PHRASE, AMDGPU_PCIID_PHRASE, hsa_detection);
206+
std::vector<std::pair<std::string, std::string>> PCI_IDS_NV =
207+
getPCIIds(NVIDIA_SEARCH_PHRASE, NVIDIA_PCIID_PHRASE);
208+
PCI_IDS.insert(std::end(PCI_IDS), std::begin(PCI_IDS_NV),
209+
std::end(PCI_IDS_NV));
210+
return PCI_IDS;
204211
}
205212

206213
[[noreturn]] inline void exitWithError(const Twine &Message,
@@ -222,7 +229,7 @@ int getRuntimeCapabilities(char *offload_arch_output_buffer,
222229
exitWithError(errorToErrorCode(std::move(E)), Whence);
223230
}
224231
template <typename T, typename... Ts>
225-
T unwrapOrError(Expected<T> EO, Ts &&... Args) {
232+
T unwrapOrError(Expected<T> EO, Ts &&...Args) {
226233
if (EO)
227234
return std::move(*EO);
228235
exitWithError(EO.takeError(), std::forward<Ts>(Args)...);
@@ -280,18 +287,19 @@ getOffloadArchFromBinary(const std::string &input_filename) {
280287
}
281288

282289
bool isHomogeneousSystemOf(std::string arch) {
290+
283291
std::vector<std::string> archPCI_IDs = lookupOffloadArch(arch);
284-
std::vector<std::string> allPCI_IDs = getAllPCIIds();
292+
std::vector<std::pair<std::string,std::string>> allPCI_IDs = getAllPCIIds(false);
285293

286294
// arch PCI_IDs could be saved with letters in upper or lower case
287295
// make comparison case insensitive
288296
for (auto it : allPCI_IDs) {
289297
auto find_it = std::find_if(
290298
archPCI_IDs.begin(), archPCI_IDs.end(), [&](std::string &archID) {
291-
if (archID.size() != it.size())
299+
if (archID.size() != it.second.size())
292300
return false;
293301
for (long unsigned int i = 0; i < archID.size(); i++)
294-
if (std::toupper(archID[i]) != std::toupper(it[i]))
302+
if (std::toupper(archID[i]) != std::toupper((it.second.c_str())[i]))
295303
return false;
296304
return true;
297305
});

llvm/lib/OffloadArch/amdgpu/hsa-subset.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,21 @@ typedef enum {
200200
HSA_STATUS_ERROR_FATAL = 0x1026
201201
} hsa_status_t;
202202

203+
/**
204+
* @brief Agent features.
205+
*/
206+
typedef enum {
207+
/**
208+
* The agent supports AQL packets of kernel dispatch type. If this
209+
* feature is enabled, the agent is also a kernel agent.
210+
*/
211+
HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
212+
/**
213+
* The agent supports AQL packets of agent dispatch type.
214+
*/
215+
HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
216+
} hsa_agent_feature_t;
217+
203218
/**
204219
* @brief Instruction set architecture.
205220
*/
@@ -557,6 +572,22 @@ typedef enum {
557572

558573
} hsa_agent_info_t;
559574

575+
/**
576+
* @brief Agent attributes.
577+
*/
578+
typedef enum hsa_amd_agent_info_s {
579+
/**
580+
* Queries UUID of an agent. The value is an Ascii string with a maximum
581+
* of 21 chars including NUL. The string value consists of two parts: header
582+
* and body. The header identifies device type (GPU, CPU, DSP) while body
583+
* encodes UUID as a 16 digit hex string
584+
*
585+
* Agents that do not support UUID will return the string "GPU-XX" or
586+
* "CPU-XX" or "DSP-XX" depending upon their device type ::hsa_device_type_t
587+
*/
588+
HSA_AMD_AGENT_INFO_UUID = 0xA011
589+
} hsa_amd_agent_info_t;
590+
560591
/**
561592
* @brief Hardware device type.
562593
*/

0 commit comments

Comments
 (0)