@@ -40,9 +40,28 @@ std::string getFileContents(std::string fname) {
40
40
return file_contents;
41
41
}
42
42
43
- std::vector<std::string> getPCIIds (const char *driver_search_phrase,
44
- const char *pci_id_search_phrase) {
45
- std::vector<std::string> PCI_IDS;
43
+ std::vector<std::pair<std::string, std::string>>
44
+ getAmdGpuDevices (const char *driver_search_phrase,
45
+ const char *pci_id_search_phrase, bool hsa_detection) {
46
+ std::vector<std::pair<std::string, std::string>> offloadArchs;
47
+
48
+ if (!hsa_detection) {
49
+ offloadArchs = getPCIIds (driver_search_phrase, pci_id_search_phrase);
50
+ }
51
+
52
+ if (offloadArchs.empty ()) {
53
+ if (IsAmdDeviceAvailable ()) {
54
+ BindHsaMethodsAndInitHSA ();
55
+ }
56
+ offloadArchs = runHsaDetection ();
57
+ }
58
+
59
+ return offloadArchs;
60
+ }
61
+
62
+ std::vector<std::pair<std::string, std::string>>
63
+ getPCIIds (const char *driver_search_phrase, const char *pci_id_search_phrase) {
64
+ std::vector<std::pair<std::string, std::string>> PCI_IDS;
46
65
#ifndef _WIN32
47
66
char uevent_filename[MAXPATHSIZE];
48
67
const char *sys_bus_pci_devices_dir = " /sys/bus/pci/devices" ;
@@ -61,8 +80,18 @@ std::vector<std::string> getPCIIds(const char *driver_search_phrase,
61
80
std::size_t found_loc = file_contents.find (driver_search_phrase);
62
81
if (found_loc != std::string::npos) {
63
82
found_loc = file_contents.find (pci_id_search_phrase);
64
- if (found_loc != std::string::npos)
65
- PCI_IDS.push_back (file_contents.substr (found_loc + 7 , 9 ));
83
+ if (found_loc != std::string::npos) {
84
+ std::string pci_id = file_contents.substr (found_loc + 7 , 9 );
85
+ unsigned vid32, devid32;
86
+ sscanf (pci_id.c_str (), " %x:%x" , &vid32, &devid32);
87
+ uint16_t vid = vid32;
88
+ uint16_t devid = devid32;
89
+ std::string offload_arch = getOffloadArch (vid, devid);
90
+
91
+ if (!offload_arch.empty ()) {
92
+ PCI_IDS.emplace_back (offload_arch, pci_id);
93
+ }
94
+ }
66
95
}
67
96
}
68
97
} // end of foreach subdir
@@ -96,7 +125,7 @@ std::vector<std::string> lookupCodename(std::string lookup_codename) {
96
125
97
126
std::vector<std::string> lookupOffloadArch (std::string lookup_offload_arch) {
98
127
std::vector<std::string> PCI_IDS;
99
- for (auto id2str : AOT_OFFLOADARCHS)
128
+ for (auto id2str : AOT_OFFLOADARCHS) {
100
129
if (lookup_offload_arch.compare (id2str.offloadarch ) == 0 )
101
130
for (auto aot_table_entry : AOT_TABLE) {
102
131
if (id2str.offloadarch_id == aot_table_entry.offloadarch_id ) {
@@ -109,19 +138,17 @@ std::vector<std::string> lookupOffloadArch(std::string lookup_offload_arch) {
109
138
PCI_IDS.push_back (std::string (&pci_id[0 ]));
110
139
}
111
140
}
141
+ }
112
142
return PCI_IDS;
113
143
}
114
144
115
- std::string getCodename (uint16_t VendorID, uint16_t DeviceID) {
116
- std::string retval;
117
- for (auto aot_table_entry : AOT_TABLE) {
118
- if ((VendorID == aot_table_entry.vendorid ) &&
119
- (DeviceID == aot_table_entry.devid ))
120
- for (auto id2str : AOT_CODENAMES)
121
- if (id2str.codename_id == aot_table_entry.codename_id )
122
- return std::string (id2str.codename );
123
- }
124
- return retval;
145
+ std::string getCodename (std::string offloadArch) {
146
+
147
+ for (auto aot_table_entry : AOT_AMD_OFFLOADARCH_TO_CODENAME_TABLE)
148
+ if (aot_table_entry.offloadarch == offloadArch)
149
+ return std::string (aot_table_entry.codename );
150
+
151
+ return " " ;
125
152
}
126
153
127
154
std::string getOffloadArch (uint16_t VendorID, uint16_t DeviceID) {
@@ -136,71 +163,51 @@ std::string getOffloadArch(uint16_t VendorID, uint16_t DeviceID) {
136
163
return retval;
137
164
}
138
165
139
- std::string getVendorCapabilities (uint16_t vid, uint16_t devid,
140
- std::string oa) {
166
+ std::string
167
+ getVendorCapabilities (std::pair<std::string, std::string> offloadarch) {
168
+
169
+ if (llvm::StringRef (offloadarch.first ).starts_with_insensitive (" gfx" ) &&
170
+ llvm::StringRef (offloadarch.second ).starts_with_insensitive (" gpu" )) {
171
+ return getAMDGPUCapabilitiesForOffloadarch (offloadarch.second );
172
+ }
173
+
174
+ std::string pci_id = offloadarch.second ;
175
+ unsigned vid, devid;
176
+ sscanf (pci_id.c_str (), " %x:%x" , &vid, &devid);
177
+
141
178
switch (vid) {
142
179
case 0x1002 :
143
- return getAMDGPUCapabilities (vid, devid, oa);
180
+ return getAMDGPUCapabilities ((uint16_t )vid, (uint16_t )devid,
181
+ offloadarch.first );
144
182
case 0x10de :
145
- return getNVPTXCapabilities (vid, devid, oa);
183
+ return getNVPTXCapabilities ((uint16_t )vid, (uint16_t )devid,
184
+ offloadarch.first );
146
185
}
147
- return nullptr ;
186
+
187
+ return " " ;
148
188
}
149
189
150
- std::string getTriple (uint16_t VendorID, uint16_t DeviceID ) {
151
- std::string retval ;
152
- switch (VendorID) {
153
- case 0x1002 :
190
+ std::string getTriple (std::string offloadarch ) {
191
+ llvm::StringRef OffloadarchRef (offloadarch) ;
192
+
193
+ if (OffloadarchRef. starts_with_insensitive ( " gfx " ))
154
194
return (std::string (" amdgcn-amd-amdhsa" ));
155
- break ;
156
- case 0x10de :
195
+
196
+ if (OffloadarchRef. starts_with_insensitive ( " sm " ))
157
197
return (std::string (" nvptx64-nvidia-cuda" ));
158
- break ;
159
- }
160
- return retval;
161
- }
162
198
163
- std::vector<std::string> getAllPCIIds () {
164
- std::vector<std::string> PCI_IDS =
165
- getPCIIds (AMDGPU_SEARCH_PHRASE, AMDGPU_PCIID_PHRASE);
166
- for (auto PCI_ID : getPCIIds (NVIDIA_SEARCH_PHRASE, NVIDIA_PCIID_PHRASE))
167
- PCI_IDS.push_back (PCI_ID);
168
- return PCI_IDS;
199
+ return " " ;
169
200
}
170
201
171
- // / Get runtime capabilities of this system for libomptarget runtime
172
- int getRuntimeCapabilities (char *offload_arch_output_buffer,
173
- size_t offload_arch_output_buffer_size) {
174
- std::vector<std::string> PCI_IDS = getAllPCIIds ();
175
- std::string offload_arch;
176
- for (auto PCI_ID : PCI_IDS) {
177
- unsigned vid32, devid32;
178
- sscanf (PCI_ID.c_str (), " %x:%x" , &vid32, &devid32);
179
- uint16_t vid = vid32;
180
- uint16_t devid = devid32;
181
- offload_arch = getOffloadArch (vid, devid);
182
- if (offload_arch.empty ()) {
183
- fprintf (stderr, " ERROR: offload-arch not found for %x:%x.\n " , vid, devid);
184
- return 1 ;
185
- }
186
- std::string caps = getVendorCapabilities (vid, devid, offload_arch);
187
- std::size_t found_loc = caps.find (" NOT-VISIBLE" );
188
- if (found_loc == std::string::npos) {
189
- // Found first visible GPU, so append caps and exit loop
190
- offload_arch.clear ();
191
- offload_arch = caps;
192
- break ;
193
- }
194
- }
195
- size_t out_str_len = offload_arch.size ();
196
- if (out_str_len > offload_arch_output_buffer_size) {
197
- fprintf (stderr, " ERROR: strlen %zd exceeds buffer length %zd \n " ,
198
- out_str_len, offload_arch_output_buffer_size);
199
- return 1 ;
200
- }
201
- strncpy (offload_arch_output_buffer, offload_arch.c_str (), out_str_len);
202
- offload_arch_output_buffer[out_str_len] = ' \0 ' ; // terminate string
203
- return 0 ;
202
+ std::vector<std::pair<std::string, std::string>>
203
+ getAllPCIIds (bool hsa_detection) {
204
+ std::vector<std::pair<std::string, std::string>> PCI_IDS = getAmdGpuDevices (
205
+ AMDGPU_SEARCH_PHRASE, AMDGPU_PCIID_PHRASE, hsa_detection);
206
+ std::vector<std::pair<std::string, std::string>> PCI_IDS_NV =
207
+ getPCIIds (NVIDIA_SEARCH_PHRASE, NVIDIA_PCIID_PHRASE);
208
+ PCI_IDS.insert (std::end (PCI_IDS), std::begin (PCI_IDS_NV),
209
+ std::end (PCI_IDS_NV));
210
+ return PCI_IDS;
204
211
}
205
212
206
213
[[noreturn]] inline void exitWithError (const Twine &Message,
@@ -222,7 +229,7 @@ int getRuntimeCapabilities(char *offload_arch_output_buffer,
222
229
exitWithError (errorToErrorCode (std::move (E)), Whence);
223
230
}
224
231
template <typename T, typename ... Ts>
225
- T unwrapOrError (Expected<T> EO, Ts &&... Args) {
232
+ T unwrapOrError (Expected<T> EO, Ts &&...Args) {
226
233
if (EO)
227
234
return std::move (*EO);
228
235
exitWithError (EO.takeError (), std::forward<Ts>(Args)...);
@@ -280,18 +287,19 @@ getOffloadArchFromBinary(const std::string &input_filename) {
280
287
}
281
288
282
289
bool isHomogeneousSystemOf (std::string arch) {
290
+
283
291
std::vector<std::string> archPCI_IDs = lookupOffloadArch (arch);
284
- std::vector<std::string> allPCI_IDs = getAllPCIIds ();
292
+ std::vector<std::pair<std:: string,std::string>> allPCI_IDs = getAllPCIIds (false );
285
293
286
294
// arch PCI_IDs could be saved with letters in upper or lower case
287
295
// make comparison case insensitive
288
296
for (auto it : allPCI_IDs) {
289
297
auto find_it = std::find_if (
290
298
archPCI_IDs.begin (), archPCI_IDs.end (), [&](std::string &archID) {
291
- if (archID.size () != it.size ())
299
+ if (archID.size () != it.second . size ())
292
300
return false ;
293
301
for (long unsigned int i = 0 ; i < archID.size (); i++)
294
- if (std::toupper (archID[i]) != std::toupper (it [i]))
302
+ if (std::toupper (archID[i]) != std::toupper ((it. second . c_str ()) [i]))
295
303
return false ;
296
304
return true ;
297
305
});
0 commit comments