Skip to content

Commit afd6a5c

Browse files
[Driver] Enable --offload-arch for SYCL AOT compilation with the new
offload driver.
1 parent de4851c commit afd6a5c

File tree

4 files changed

+207
-5
lines changed

4 files changed

+207
-5
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 96 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10221022

10231023
for (const auto &TripleAndArchs : DerivedArchs)
10241024
OpenMPTriples.insert(TripleAndArchs.first());
1025-
}
1025+
} // end of offload-arch
10261026

10271027
for (StringRef Val : OpenMPTriples) {
10281028
llvm::Triple TT(ToolChain::getOpenMPTriple(Val));
@@ -1109,6 +1109,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11091109
};
11101110

11111111
Arg *SYCLTargets = getArgRequiringSYCLRuntime(options::OPT_fsycl_targets_EQ);
1112+
Arg *SYCLOffloadArch = getArgRequiringSYCLRuntime(options::OPT_offload_arch_EQ);
11121113

11131114
// Check if -fsycl-host-compiler is used in conjunction with -fsycl.
11141115
Arg *SYCLHostCompiler =
@@ -1182,12 +1183,14 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11821183
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11831184
llvm::StringMap<StringRef> FoundNormalizedTriples;
11841185
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
1186+
//llvm::StringSet<> SYCLTriples;
1187+
std::multiset<StringRef> SYCLTriples;
11851188
if (HasSYCLTargetsOption) {
11861189
// At this point, we know we have a valid combination
11871190
// of -fsycl*target options passed
11881191
Arg *SYCLTargetsValues = SYCLTargets;
11891192
if (SYCLTargetsValues) {
1190-
llvm::StringSet<> SYCLTriples;
1193+
11911194
if (SYCLTargetsValues->getNumValues()) {
11921195

11931196
// Multiple targets are currently not supported when using
@@ -1275,10 +1278,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12751278
SYCLTriples.insert(DeviceTriple.normalize());
12761279
if (!Arch.empty())
12771280
DerivedArchs[DeviceTriple.getTriple()].insert(Arch);
1278-
}
1281+
} // end of SYCLTargetsValues for loop
1282+
12791283
if (!SYCLTriples.empty()) {
12801284
for (const auto &SYCLTriple : SYCLTriples) {
1281-
llvm::Triple Triple(SYCLTriple.getKey());
1285+
//llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1286+
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple));
12821287
UniqueSYCLTriplesVec.push_back(Triple);
12831288
}
12841289
}
@@ -1287,7 +1292,93 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12871292
Diag(clang::diag::warn_drv_empty_joined_argument)
12881293
<< SYCLTargetsValues->getAsString(C.getInputArgs());
12891294
}
1290-
} else {
1295+
} else if(SYCLOffloadArch) {
1296+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1297+
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1298+
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1299+
HostTC->getTriple());
1300+
1301+
// Attempt to deduce the offloading triple from the set of architectures.
1302+
// We can only correctly deduce NVPTX / AMDGPU triples currently. We need
1303+
// to temporarily create these toolchains so that we can access tools for
1304+
// inferring architectures.
1305+
llvm::DenseSet<StringRef> Archs;
1306+
if (NVPTXTriple) {
1307+
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1308+
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
1309+
for (StringRef Arch : getOffloadArchs(
1310+
C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1311+
Archs.insert(Arch);
1312+
}
1313+
if (AMDTriple) {
1314+
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1315+
*this, *AMDTriple, *HostTC, C.getInputArgs());
1316+
for (StringRef Arch : getOffloadArchs(
1317+
C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1318+
Archs.insert(Arch);
1319+
}
1320+
if (!AMDTriple && !NVPTXTriple) {
1321+
for (StringRef Arch :
1322+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
1323+
Archs.insert(Arch);
1324+
}
1325+
for (StringRef Arch : Archs) {
1326+
if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch(
1327+
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1328+
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1329+
} else if (AMDTriple &&
1330+
IsAMDOffloadArch(StringToOffloadArch(
1331+
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1332+
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1333+
}
1334+
else if (IsIntelCPUOffloadArch(StringToOffloadArchIntel(Arch))) {
1335+
DerivedArchs["spir64_x86_64"].insert(Arch);
1336+
} else if(IsIntelGPUOffloadArch(StringToOffloadArchIntel(Arch))) {
1337+
DerivedArchs["spir64_gen"].insert(Arch);
1338+
}
1339+
else {
1340+
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
1341+
return;
1342+
}
1343+
}
1344+
// If the set is empty then we failed to find a native architecture.
1345+
if (Archs.empty()) {
1346+
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
1347+
<< "native";
1348+
return;
1349+
}
1350+
1351+
for (const auto &TripleAndArchs : DerivedArchs)
1352+
SYCLTriples.insert(TripleAndArchs.first()); // ["triple -> arch"]
1353+
1354+
1355+
for (StringRef Val : SYCLTriples) {
1356+
llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1357+
std::string NormalizedName = TT.normalize();
1358+
1359+
// Make sure we don't have a duplicate triple.
1360+
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
1361+
if (Duplicate != FoundNormalizedTriples.end()) {
1362+
Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
1363+
<< Val << Duplicate->second;
1364+
continue;
1365+
}
1366+
1367+
// Store the current triple so that we can check for duplicates in the
1368+
// following iterations.
1369+
FoundNormalizedTriples[NormalizedName] = Val;
1370+
}
1371+
1372+
if (!SYCLTriples.empty()) {
1373+
for (const auto &SYCLTriple : SYCLTriples) {
1374+
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple));
1375+
UniqueSYCLTriplesVec.push_back(Triple);
1376+
}
1377+
}
1378+
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
1379+
1380+
} // end of --offload-arch
1381+
else {
12911382
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
12921383
// For -fsycl-device-only, we also setup the implied triple as needed.
12931384
if (HasValidSYCLRuntime) {

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,31 @@ using namespace clang::driver::tools;
2727
using namespace clang;
2828
using namespace llvm::opt;
2929

30+
struct StringToOffloadArchIntelMap {
31+
const char *ArchName;
32+
OffloadArchIntel IntelArch;
33+
};
34+
35+
static const StringToOffloadArchIntelMap StringToArchNamesMap[] = {
36+
{"broadwell", OffloadArchIntel::BROADWELL},
37+
{"coffeelake", OffloadArchIntel::COFFEELAKE},
38+
{"icelake-client", OffloadArchIntel::ICELAKECLIENT},
39+
{"bdw", OffloadArchIntel::BDW},
40+
{"cfl", OffloadArchIntel::CFL},
41+
{"icl", OffloadArchIntel::ICL}};
42+
43+
OffloadArchIntel
44+
clang::driver::StringToOffloadArchIntel(llvm::StringRef ArchNameAsString) {
45+
auto result = std::find_if(
46+
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
47+
[ArchNameAsString](const StringToOffloadArchIntelMap &map) {
48+
return ArchNameAsString == map.ArchName;
49+
});
50+
if (result == std::end(StringToArchNamesMap))
51+
return OffloadArchIntel::UNKNOWN;
52+
return result->IntelArch;
53+
}
54+
3055
SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
3156
: D(D), InstallationCandidates() {
3257
InstallationCandidates.emplace_back(D.Dir + "/..");

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,82 @@
1616
namespace clang {
1717
namespace driver {
1818

19+
// List of supported Intel values for CPUs
20+
// and GPUs.
21+
enum class OffloadArchIntel {
22+
// CPUs
23+
UNKNOWN,
24+
SKYLAKEAVX512,
25+
COREAVX2,
26+
COREI7AVX,
27+
COREI7,
28+
WESTMERE,
29+
SANDYBRIDGE,
30+
IVYBRIDGE,
31+
BROADWELL,
32+
COFFEELAKE,
33+
ALDERLAKE,
34+
SKYLAKE,
35+
SKX,
36+
CASCADELAKE,
37+
ICELAKECLIENT,
38+
ICELAKESERVER,
39+
SAPPHIRERAPIDS,
40+
GRANITERAPIDS,
41+
// GPUs
42+
BDW,
43+
SKL,
44+
KBL,
45+
CFL,
46+
APL,
47+
BXT,
48+
GLK,
49+
WHL,
50+
AML,
51+
CML,
52+
ICLLP,
53+
ICL,
54+
EHL,
55+
JSL,
56+
TGLLP,
57+
TGL,
58+
RKL,
59+
ADL_S,
60+
RPL_S,
61+
ADL_P,
62+
ADL_N,
63+
DG1,
64+
ACM_G10,
65+
DG2_G10,
66+
ACM_G11,
67+
DG2_GLL,
68+
ACM_G12,
69+
DG2_G12,
70+
PVC,
71+
PVC_VG,
72+
MTL_U,
73+
MTL_S,
74+
ARL_U,
75+
ARL_S,
76+
MTL_H,
77+
ARL_H,
78+
BMG_G21,
79+
LNL_M
80+
};
81+
82+
// Check if the given Arch value is a valid Intel CPU.
83+
static inline bool IsIntelCPUOffloadArch(OffloadArchIntel Arch) {
84+
return Arch >= OffloadArchIntel::SKYLAKEAVX512 &&
85+
Arch < OffloadArchIntel::BDW;
86+
}
87+
88+
// Check if the given Arch value is a valid Intel GPU.
89+
static inline bool IsIntelGPUOffloadArch(OffloadArchIntel Arch) {
90+
return Arch >= OffloadArchIntel::BDW && Arch <= OffloadArchIntel::LNL_M;
91+
}
92+
93+
OffloadArchIntel StringToOffloadArchIntel(llvm::StringRef ArchNameAsString);
94+
1995
class SYCLInstallationDetector {
2096
public:
2197
SYCLInstallationDetector(const Driver &D);
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/// Tests the behaviors of using -fsycl --offload-new-driver
2+
// --offload-arch=<intel-gpu-values>.
3+
4+
// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bdw %s 2>&1 | \
5+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW
6+
// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
7+
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
8+
// MACRO: clang{{.*}} "-fsycl-is-host"
9+
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
10+
// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]"

0 commit comments

Comments
 (0)