Skip to content

Commit d5339a0

Browse files
Add initial test.
1 parent afd6a5c commit d5339a0

File tree

3 files changed

+104
-82
lines changed

3 files changed

+104
-82
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
396396
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
397397
def err_drv_sycl_thinlto_split_off: Error<
398398
"'%0' is not supported when '%1' is set with '-fsycl'">;
399+
def err_drv_sycl_offload_arch_new_driver: Error<
400+
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
399401
def warn_drv_sycl_offload_target_duplicate : Warning<
400402
"SYCL offloading target '%0' is similar to target '%1' already specified; "
401403
"will be ignored">, InGroup<SyclTarget>;

clang/lib/Driver/Driver.cpp

Lines changed: 89 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11091109
};
11101110

11111111
Arg *SYCLTargets = getArgRequiringSYCLRuntime(options::OPT_fsycl_targets_EQ);
1112-
Arg *SYCLOffloadArch = getArgRequiringSYCLRuntime(options::OPT_offload_arch_EQ);
11131112

11141113
// Check if -fsycl-host-compiler is used in conjunction with -fsycl.
11151114
Arg *SYCLHostCompiler =
@@ -1183,8 +1182,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11831182
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11841183
llvm::StringMap<StringRef> FoundNormalizedTriples;
11851184
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
1186-
//llvm::StringSet<> SYCLTriples;
1187-
std::multiset<StringRef> SYCLTriples;
1185+
llvm::StringSet<> SYCLTriples;
11881186
if (HasSYCLTargetsOption) {
11891187
// At this point, we know we have a valid combination
11901188
// of -fsycl*target options passed
@@ -1278,12 +1276,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12781276
SYCLTriples.insert(DeviceTriple.normalize());
12791277
if (!Arch.empty())
12801278
DerivedArchs[DeviceTriple.getTriple()].insert(Arch);
1281-
} // end of SYCLTargetsValues for loop
1279+
}
12821280

12831281
if (!SYCLTriples.empty()) {
12841282
for (const auto &SYCLTriple : SYCLTriples) {
1285-
//llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1286-
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple));
1283+
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple.getKey()));
12871284
UniqueSYCLTriplesVec.push_back(Triple);
12881285
}
12891286
}
@@ -1292,92 +1289,93 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12921289
Diag(clang::diag::warn_drv_empty_joined_argument)
12931290
<< SYCLTargetsValues->getAsString(C.getInputArgs());
12941291
}
1295-
} else if(SYCLOffloadArch) {
1296-
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1297-
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1298-
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1299-
HostTC->getTriple());
1300-
1301-
// Attempt to deduce the offloading triple from the set of architectures.
1302-
// We can only correctly deduce NVPTX / AMDGPU triples currently. We need
1303-
// to temporarily create these toolchains so that we can access tools for
1304-
// inferring architectures.
1305-
llvm::DenseSet<StringRef> Archs;
1306-
if (NVPTXTriple) {
1307-
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1308-
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
1309-
for (StringRef Arch : getOffloadArchs(
1310-
C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1311-
Archs.insert(Arch);
1312-
}
1313-
if (AMDTriple) {
1314-
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1315-
*this, *AMDTriple, *HostTC, C.getInputArgs());
1316-
for (StringRef Arch : getOffloadArchs(
1317-
C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1318-
Archs.insert(Arch);
1319-
}
1320-
if (!AMDTriple && !NVPTXTriple) {
1321-
for (StringRef Arch :
1322-
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
1323-
Archs.insert(Arch);
1324-
}
1325-
for (StringRef Arch : Archs) {
1326-
if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch(
1327-
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1328-
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1329-
} else if (AMDTriple &&
1330-
IsAMDOffloadArch(StringToOffloadArch(
1331-
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1332-
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1333-
}
1334-
else if (IsIntelCPUOffloadArch(StringToOffloadArchIntel(Arch))) {
1335-
DerivedArchs["spir64_x86_64"].insert(Arch);
1336-
} else if(IsIntelGPUOffloadArch(StringToOffloadArchIntel(Arch))) {
1337-
DerivedArchs["spir64_gen"].insert(Arch);
1338-
}
1339-
else {
1340-
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
1341-
return;
1342-
}
1343-
}
1344-
// If the set is empty then we failed to find a native architecture.
1345-
if (Archs.empty()) {
1346-
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
1347-
<< "native";
1292+
}
1293+
// If the user specified --offload-arch, deduce the offloading
1294+
// target triple(s) from the set of architecture(s).
1295+
// Create a toolchain for each valid triple.
1296+
else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
1297+
!IsCuda) {
1298+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1299+
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1300+
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1301+
HostTC->getTriple());
1302+
1303+
// Attempt to deduce the offloading triple from the set of architectures.
1304+
// We need to temporarily create these toolchains so that we can access
1305+
// tools for inferring architectures.
1306+
llvm::DenseSet<StringRef> Archs;
1307+
if (NVPTXTriple) {
1308+
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1309+
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
1310+
for (StringRef Arch :
1311+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1312+
Archs.insert(Arch);
1313+
}
1314+
if (AMDTriple) {
1315+
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1316+
*this, *AMDTriple, *HostTC, C.getInputArgs());
1317+
for (StringRef Arch :
1318+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1319+
Archs.insert(Arch);
1320+
}
1321+
if (!AMDTriple && !NVPTXTriple) {
1322+
for (StringRef Arch :
1323+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
1324+
Archs.insert(Arch);
1325+
}
1326+
for (StringRef Arch : Archs) {
1327+
if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch(
1328+
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1329+
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1330+
} else if (AMDTriple &&
1331+
IsAMDOffloadArch(StringToOffloadArch(
1332+
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1333+
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1334+
} else if (IsIntelCPUOffloadArch(StringToOffloadArchIntel(Arch))) {
1335+
DerivedArchs["spir64_x86_64"].insert(Arch);
1336+
} else if (IsIntelGPUOffloadArch(StringToOffloadArchIntel(Arch))) {
1337+
DerivedArchs["spir64_gen"].insert(Arch);
1338+
} else {
1339+
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
13481340
return;
13491341
}
1342+
}
1343+
// If the set is empty then we failed to find a native architecture.
1344+
if (Archs.empty()) {
1345+
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << "native";
1346+
return;
1347+
}
13501348

1351-
for (const auto &TripleAndArchs : DerivedArchs)
1352-
SYCLTriples.insert(TripleAndArchs.first()); // ["triple -> arch"]
1353-
1349+
for (const auto &TripleAndArchs : DerivedArchs)
1350+
SYCLTriples.insert(TripleAndArchs.first());
13541351

1355-
for (StringRef Val : SYCLTriples) {
1356-
llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1352+
for (const auto &Val : SYCLTriples) {
1353+
llvm::Triple TT(MakeSYCLDeviceTriple(Val.getKey()));
13571354
std::string NormalizedName = TT.normalize();
13581355

13591356
// Make sure we don't have a duplicate triple.
13601357
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
13611358
if (Duplicate != FoundNormalizedTriples.end()) {
13621359
Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
1363-
<< Val << Duplicate->second;
1360+
<< Val.getKey() << Duplicate->second;
13641361
continue;
13651362
}
13661363

13671364
// Store the current triple so that we can check for duplicates in the
13681365
// following iterations.
1369-
FoundNormalizedTriples[NormalizedName] = Val;
1370-
}
1366+
FoundNormalizedTriples[NormalizedName] = Val.getKey();
1367+
}
13711368

1372-
if (!SYCLTriples.empty()) {
1373-
for (const auto &SYCLTriple : SYCLTriples) {
1374-
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple));
1375-
UniqueSYCLTriplesVec.push_back(Triple);
1376-
}
1377-
}
1378-
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
1379-
1380-
} // end of --offload-arch
1369+
if (!SYCLTriples.empty()) {
1370+
for (const auto &SYCLTriple : SYCLTriples) {
1371+
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple.getKey()));
1372+
UniqueSYCLTriplesVec.push_back(Triple);
1373+
}
1374+
}
1375+
1376+
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
1377+
1378+
} // end of --offload-arch
13811379
else {
13821380
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
13831381
// For -fsycl-device-only, we also setup the implied triple as needed.
@@ -7335,6 +7333,21 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
73357333

73367334
handleArguments(C, Args, Inputs, Actions);
73377335

7336+
bool HasValidSYCLRuntime =
7337+
C.getInputArgs().hasFlag(options::OPT_fsycl, options::OPT_fno_sycl,
7338+
false) ||
7339+
hasSYCLDeviceOnly(C.getInputArgs());
7340+
bool IsSYCLOffloadArchEnabled =
7341+
HasValidSYCLRuntime &&
7342+
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ);
7343+
7344+
if (IsSYCLOffloadArchEnabled &&
7345+
!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
7346+
options::OPT_no_offload_new_driver, false)) {
7347+
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
7348+
return;
7349+
}
7350+
73387351
// If '-fintelfpga' is passed, add '-fsycl' to the list of arguments
73397352
const llvm::opt::OptTable &Opts = getOpts();
73407353
Arg *SYCLFpgaArg = C.getInputArgs().getLastArg(options::OPT_fintelfpga);

clang/test/Driver/sycl-offload-arch-intelgpu.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,16 @@
22
// --offload-arch=<intel-gpu-values>.
33

44
// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bdw %s 2>&1 | \
5-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW
6-
// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
7-
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
8-
// MACRO: clang{{.*}} "-fsycl-is-host"
9-
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
10-
// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]"
5+
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE,CLANG-OFFLOAD-PACKAGER -DDEV_STR=bdw -DMAC_STR=BDW
6+
7+
// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cfl %s 2>&1 | \
8+
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE,CLANG-OFFLOAD-PACKAGER -DDEV_STR=cfl -DMAC_STR=CFL
9+
10+
11+
///If Arch is icl, map it to icllp internally to create D__SYCL_TARGET_INTEL_GPU_
12+
13+
// TARGET-TRIPLE: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
14+
// TARGET-TRIPLE: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
15+
// CLANG-OFFLOAD-PACKAGER: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl"
16+
17+

0 commit comments

Comments
 (0)