Skip to content

Commit c224b81

Browse files
committed
Merge branch 'sycl' into requires-typo
2 parents a1d3bcf + fbd3675 commit c224b81

File tree

133 files changed

+6052
-872
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+6052
-872
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ sycl/doc/extensions/ @intel/dpcpp-specification-reviewers
3636

3737
# Unified Runtime
3838
sycl/cmake/modules/FetchUnifiedRuntime.cmake @intel/unified-runtime-reviewers
39+
sycl/cmake/modules/UnifiedRuntimeTag.cmake @intel/unified-runtime-reviewers
3940
sycl/include/sycl/detail/ur.hpp @intel/unified-runtime-reviewers
4041
sycl/source/detail/posix_ur.cpp @intel/unified-runtime-reviewers
4142
sycl/source/detail/ur.cpp @intel/unified-runtime-reviewers

.github/workflows/pr-code-format.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
pull_request:
88
branches:
99
- main
10+
- sycl
11+
- sycl-devops-pr/**
12+
- sycl-rel-**
1013
- 'users/**'
1114

1215
jobs:

buildbot/configure.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def do_configure(args):
6464

6565
sycl_enable_xpti_tracing = "ON"
6666
xpti_enable_werror = "OFF"
67+
llvm_enable_zstd = "OFF"
6768

6869
if sys.platform != "darwin":
6970
sycl_enabled_backends.append("level_zero")
@@ -133,6 +134,8 @@ def do_configure(args):
133134

134135
# For clang-format, clang-tidy and code coverage
135136
llvm_enable_projects += ";clang-tools-extra;compiler-rt"
137+
# Build with zstd disabled on CI for now.
138+
llvm_enable_zstd = "OFF"
136139
if sys.platform != "darwin":
137140
# libclc is required for CI validation
138141
libclc_enabled = True
@@ -177,6 +180,8 @@ def do_configure(args):
177180
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
178181
"-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform),
179182
"-DLLVM_BUILD_TOOLS=ON",
183+
"-DLLVM_ENABLE_ZSTD={}".format(llvm_enable_zstd),
184+
"-DLLVM_USE_STATIC_ZSTD=ON",
180185
"-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
181186
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
182187
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,10 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
398398
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
399399
def err_drv_sycl_thinlto_split_off: Error<
400400
"'%0' is not supported when '%1' is set with '-fsycl'">;
401+
def err_drv_sycl_offload_arch_new_driver: Error<
402+
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
403+
def err_drv_sycl_offload_arch_missing_value : Error<
404+
"must pass in an explicit cpu or gpu architecture to '--offload-arch'">;
401405
def warn_drv_sycl_offload_target_duplicate : Warning<
402406
"SYCL offloading target '%0' is similar to target '%1' already specified; "
403407
"will be ignored">, InGroup<SyclTarget>;

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4658,10 +4658,14 @@ def image__base : Separate<["-"], "image_base">;
46584658
def include_ : JoinedOrSeparate<["-", "--"], "include">, Group<clang_i_Group>, EnumName<"include">,
46594659
MetaVarName<"<file>">, HelpText<"Include file before parsing">,
46604660
Visibility<[ClangOption, CC1Option]>;
4661-
def include_footer : Separate<["-"], "include-footer">, Group<clang_i_Group>,
4661+
def include_internal_footer : Separate<["-"], "include-internal-footer">, Group<clang_i_Group>,
46624662
Visibility<[CC1Option]>,
46634663
HelpText<"Name of the footer integration file">, MetaVarName<"<file>">,
46644664
MarshallingInfoString<PreprocessorOpts<"IncludeFooter">>;
4665+
def include_internal_header : Separate<["-"], "include-internal-header">, Group<clang_i_Group>,
4666+
Visibility<[CC1Option]>,
4667+
HelpText<"Name of the header integration file">, MetaVarName<"<file>">,
4668+
MarshallingInfoString<PreprocessorOpts<"IncludeHeader">>;
46654669
def include_pch : Separate<["-"], "include-pch">, Group<clang_i_Group>,
46664670
Visibility<[ClangOption, CC1Option]>,
46674671
HelpText<"Include precompiled header file">, MetaVarName<"<file>">,

clang/include/clang/Lex/PreprocessorOptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class PreprocessorOptions {
6868
std::vector<std::pair<std::string, bool/*isUndef*/>> Macros;
6969
std::vector<std::string> Includes;
7070
std::string IncludeFooter;
71+
std::string IncludeHeader;
7172
std::vector<std::string> MacroIncludes;
7273

7374
/// Perform extra checks when loading PCM files for mutable file systems.

clang/lib/Driver/Driver.cpp

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1191,12 +1191,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11911191
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11921192
llvm::StringMap<StringRef> FoundNormalizedTriples;
11931193
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
1194+
// StringSet to contain SYCL target triples.
1195+
llvm::StringSet<> SYCLTriples;
11941196
if (HasSYCLTargetsOption) {
11951197
// At this point, we know we have a valid combination
11961198
// of -fsycl*target options passed
11971199
Arg *SYCLTargetsValues = SYCLTargets;
11981200
if (SYCLTargetsValues) {
1199-
llvm::StringSet<> SYCLTriples;
12001201
if (SYCLTargetsValues->getNumValues()) {
12011202

12021203
// Multiple targets are currently not supported when using
@@ -1296,6 +1297,109 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12961297
Diag(clang::diag::warn_drv_empty_joined_argument)
12971298
<< SYCLTargetsValues->getAsString(C.getInputArgs());
12981299
}
1300+
}
1301+
// If the user specified --offload-arch, deduce the offloading
1302+
// target triple(s) from the set of architecture(s).
1303+
// Create a toolchain for each valid triple.
1304+
// We do not support SYCL offloading if any of the inputs is a
1305+
// .cu (for CUDA type) or .hip (for HIP type) file.
1306+
else if (HasValidSYCLRuntime &&
1307+
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
1308+
!IsCuda) {
1309+
// SYCL offloading to AOT Targets with '--offload-arch'
1310+
// is currently enabled only with '--offload-new-driver' option.
1311+
// Emit a diagnostic if '--offload-arch' is invoked without
1312+
// '--offload-new driver' option.
1313+
if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
1314+
options::OPT_no_offload_new_driver, false)) {
1315+
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
1316+
return;
1317+
}
1318+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1319+
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1320+
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1321+
HostTC->getTriple());
1322+
1323+
// Attempt to deduce the offloading triple from the set of architectures.
1324+
// We need to temporarily create these toolchains so that we can access
1325+
// tools for inferring architectures.
1326+
llvm::DenseSet<StringRef> Archs;
1327+
if (NVPTXTriple) {
1328+
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1329+
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
1330+
for (StringRef Arch :
1331+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1332+
Archs.insert(Arch);
1333+
}
1334+
if (AMDTriple) {
1335+
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1336+
*this, *AMDTriple, *HostTC, C.getInputArgs());
1337+
for (StringRef Arch :
1338+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1339+
Archs.insert(Arch);
1340+
}
1341+
if (!AMDTriple && !NVPTXTriple) {
1342+
for (StringRef Arch :
1343+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
1344+
Archs.insert(Arch);
1345+
}
1346+
for (StringRef Arch : Archs) {
1347+
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch(
1348+
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1349+
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1350+
} else if (AMDTriple &&
1351+
IsSYCLSupportedAMDGPUArch(StringToOffloadArch(
1352+
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1353+
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1354+
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {
1355+
DerivedArchs[MakeSYCLDeviceTriple("spir64_x86_64").getTriple()].insert(
1356+
Arch);
1357+
} else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) {
1358+
StringRef IntelGPUArch;
1359+
// For Intel Graphics AOT target, valid values for '--offload-arch'
1360+
// are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
1361+
// compiler) via the '-device' option. The mapIntelGPUArchName
1362+
// function maps the accepted values for '--offload-arch' to enable SYCL
1363+
// offloading to Intel GPUs and the corresponding '-device' value passed
1364+
// to OCLOC.
1365+
IntelGPUArch = mapIntelGPUArchName(Arch).data();
1366+
DerivedArchs[MakeSYCLDeviceTriple("spir64_gen").getTriple()].insert(
1367+
IntelGPUArch);
1368+
} else {
1369+
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
1370+
return;
1371+
}
1372+
}
1373+
// Emit an error if architecture value is not provided
1374+
// to --offload-arch.
1375+
if (Archs.empty()) {
1376+
Diag(clang::diag::err_drv_sycl_offload_arch_missing_value);
1377+
return;
1378+
}
1379+
1380+
for (const auto &TripleAndArchs : DerivedArchs)
1381+
SYCLTriples.insert(TripleAndArchs.first());
1382+
1383+
for (const auto &Val : SYCLTriples) {
1384+
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
1385+
std::string NormalizedName = SYCLTargetTriple.normalize();
1386+
1387+
// Make sure we don't have a duplicate triple.
1388+
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
1389+
if (Duplicate != FoundNormalizedTriples.end()) {
1390+
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
1391+
<< Val.getKey() << Duplicate->second;
1392+
continue;
1393+
}
1394+
1395+
// Store the current triple so that we can check for duplicates in the
1396+
// following iterations.
1397+
FoundNormalizedTriples[NormalizedName] = Val.getKey();
1398+
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
1399+
}
1400+
1401+
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
1402+
12991403
} else {
13001404
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
13011405
// For -fsycl-device-only, we also setup the implied triple as needed.

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5741,7 +5741,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
57415741
// action to determine this.
57425742
if (types::getPreprocessedType(Input.getType()) != types::TY_INVALID &&
57435743
!Header.empty()) {
5744-
CmdArgs.push_back("-include");
5744+
// Add the -include-internal-header option to add the integration header
5745+
CmdArgs.push_back("-include-internal-header");
57455746
CmdArgs.push_back(Args.MakeArgString(Header));
57465747
// When creating dependency information, filter out the generated
57475748
// header file.
@@ -5753,11 +5754,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
57535754
CmdArgs.push_back("-fsycl-enable-int-header-diags");
57545755
}
57555756

5756-
// Add the -include-footer option to add the integration footer
57575757
StringRef Footer = D.getIntegrationFooter(Input.getBaseInput());
57585758
if (types::getPreprocessedType(Input.getType()) != types::TY_INVALID &&
57595759
!Args.hasArg(options::OPT_fno_sycl_use_footer) && !Footer.empty()) {
5760-
CmdArgs.push_back("-include-footer");
5760+
// Add the -include-internal-footer option to add the integration footer
5761+
CmdArgs.push_back("-include-internal-footer");
57615762
CmdArgs.push_back(Args.MakeArgString(Footer));
57625763
// When creating dependency information, filter out the generated
57635764
// integration footer file.
@@ -10174,6 +10175,19 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1017410175
SmallString<128> TargetTripleOpt = TT.getArchName();
1017510176
bool WrapFPGADevice = false;
1017610177
bool FPGAEarly = false;
10178+
10179+
// Validate and propogate CLI options related to device image compression.
10180+
// -offload-compress
10181+
if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) {
10182+
WrapperArgs.push_back(
10183+
C.getArgs().MakeArgString(Twine("-offload-compress")));
10184+
// -offload-compression-level=<>
10185+
if (Arg *A = C.getInputArgs().getLastArg(
10186+
options::OPT_offload_compression_level_EQ))
10187+
WrapperArgs.push_back(C.getArgs().MakeArgString(
10188+
Twine("-offload-compression-level=") + A->getValue()));
10189+
}
10190+
1017710191
if (Arg *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ)) {
1017810192
WrapFPGADevice = true;
1017910193
FPGAEarly = (A->getValue() == StringRef("early"));

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,125 @@ using namespace clang::driver::tools;
2727
using namespace clang;
2828
using namespace llvm::opt;
2929

30+
// Struct that relates an AOT target value with
31+
// Intel CPUs and Intel GPUs.
32+
struct StringToOffloadArchSYCLMap {
33+
const char *ArchName;
34+
SYCLSupportedIntelArchs IntelArch;
35+
};
36+
37+
// Mapping of supported SYCL offloading architectures.
38+
static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
39+
// Intel CPU mapping.
40+
{"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
41+
{"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
42+
{"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
43+
{"corei7", SYCLSupportedIntelArchs::COREI7},
44+
{"westmere", SYCLSupportedIntelArchs::WESTMERE},
45+
{"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
46+
{"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
47+
{"broadwell", SYCLSupportedIntelArchs::BROADWELL},
48+
{"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
49+
{"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
50+
{"skylake", SYCLSupportedIntelArchs::SKYLAKE},
51+
{"skx", SYCLSupportedIntelArchs::SKX},
52+
{"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
53+
{"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
54+
{"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
55+
{"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
56+
{"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
57+
// Intel GPU mapping.
58+
{"bdw", SYCLSupportedIntelArchs::BDW},
59+
{"skl", SYCLSupportedIntelArchs::SKL},
60+
{"kbl", SYCLSupportedIntelArchs::KBL},
61+
{"cfl", SYCLSupportedIntelArchs::CFL},
62+
{"apl", SYCLSupportedIntelArchs::APL},
63+
{"bxt", SYCLSupportedIntelArchs::BXT},
64+
{"glk", SYCLSupportedIntelArchs::GLK},
65+
{"whl", SYCLSupportedIntelArchs::WHL},
66+
{"aml", SYCLSupportedIntelArchs::AML},
67+
{"cml", SYCLSupportedIntelArchs::CML},
68+
{"icllp", SYCLSupportedIntelArchs::ICLLP},
69+
{"icl", SYCLSupportedIntelArchs::ICL},
70+
{"ehl", SYCLSupportedIntelArchs::EHL},
71+
{"jsl", SYCLSupportedIntelArchs::JSL},
72+
{"tgllp", SYCLSupportedIntelArchs::TGLLP},
73+
{"tgl", SYCLSupportedIntelArchs::TGL},
74+
{"rkl", SYCLSupportedIntelArchs::RKL},
75+
{"adl_s", SYCLSupportedIntelArchs::ADL_S},
76+
{"rpl_s", SYCLSupportedIntelArchs::RPL_S},
77+
{"adl_p", SYCLSupportedIntelArchs::ADL_P},
78+
{"adl_n", SYCLSupportedIntelArchs::ADL_N},
79+
{"dg1", SYCLSupportedIntelArchs::DG1},
80+
{"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
81+
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
82+
{"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
83+
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
84+
{"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
85+
{"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
86+
{"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
87+
{"pvc", SYCLSupportedIntelArchs::PVC},
88+
{"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
89+
{"mtl_u", SYCLSupportedIntelArchs::MTL_U},
90+
{"mtl_s", SYCLSupportedIntelArchs::MTL_S},
91+
{"arl_u", SYCLSupportedIntelArchs::ARL_U},
92+
{"arl_s", SYCLSupportedIntelArchs::ARL_S},
93+
{"mtl_h", SYCLSupportedIntelArchs::MTL_H},
94+
{"arl_h", SYCLSupportedIntelArchs::ARL_H},
95+
{"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
96+
{"lnl_m", SYCLSupportedIntelArchs::LNL_M}};
97+
98+
// Check if the user provided value for --offload-arch is a valid
99+
// SYCL supported Intel AOT target.
100+
SYCLSupportedIntelArchs
101+
clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) {
102+
auto result = std::find_if(
103+
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
104+
[ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
105+
return ArchNameAsString == map.ArchName;
106+
});
107+
if (result == std::end(StringToArchNamesMap))
108+
return SYCLSupportedIntelArchs::UNKNOWN;
109+
return result->IntelArch;
110+
}
111+
112+
// This is a mapping between the user provided --offload-arch value for Intel
113+
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
114+
// AOT compiler).
115+
StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) {
116+
StringRef Arch;
117+
Arch = llvm::StringSwitch<StringRef>(ArchName)
118+
.Case("bdw", "bdw")
119+
.Case("skl", "skl")
120+
.Case("kbl", "kbl")
121+
.Case("cfl", "cfl")
122+
.Cases("apl", "bxt", "apl")
123+
.Case("glk", "glk")
124+
.Case("whl", "whl")
125+
.Case("aml", "aml")
126+
.Case("cml", "cml")
127+
.Cases("icllp", "icl", "icllp")
128+
.Cases("ehl", "jsl", "ehl")
129+
.Cases("tgllp", "tgl", "tgllp")
130+
.Case("rkl", "rkl")
131+
.Cases("adl_s", "rpl_s", "adl_s")
132+
.Case("adl_p", "adl_p")
133+
.Case("adl_n", "adl_n")
134+
.Case("dg1", "dg1")
135+
.Cases("acm_g10", "dg2_g10", "acm_g10")
136+
.Cases("acm_g11", "dg2_g11", "acm_g11")
137+
.Cases("acm_g12", "dg2_g12", "acm_g12")
138+
.Case("pvc", "pvc")
139+
.Case("pvc_vg", "pvc_vg")
140+
.Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
141+
.Case("mtl_h", "mtl_h")
142+
.Case("arl_h", "arl_h")
143+
.Case("bmg_g21", "bmg_g21")
144+
.Case("lnl_m", "lnl_m")
145+
.Default("");
146+
return Arch;
147+
}
148+
30149
SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
31150
: D(D), InstallationCandidates() {
32151
InstallationCandidates.emplace_back(D.Dir + "/..");

0 commit comments

Comments
 (0)