Skip to content

Commit d789703

Browse files
committed
Merge branch 'sycl' into aaron/stopReportingFPExtensions
2 parents 368a9e8 + 2172d9e commit d789703

File tree

210 files changed

+1982
-770
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

210 files changed

+1982
-770
lines changed

.github/workflows/sycl-linux-build.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ jobs:
212212
if: always() && !cancelled() && contains(inputs.changes, 'libdevice')
213213
run: |
214214
cmake --build $GITHUB_WORKSPACE/build --target check-libdevice
215+
- name: Check E2E test requirements
216+
if: always() && !cancelled() && !contains(inputs.changes, 'sycl')
217+
run: |
218+
# TODO consider moving this to Dockerfile.
219+
export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
220+
LIT_OPTS="--allow-empty-runs" LIT_FILTER="e2e_test_requirements" cmake --build $GITHUB_WORKSPACE/build --target check-sycl
215221
- name: Install
216222
if: ${{ always() && !cancelled() && steps.build.conclusion == 'success' }}
217223
# TODO replace utility installation with a single CMake target

buildbot/configure.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def do_configure(args):
6464

6565
sycl_enable_xpti_tracing = "ON"
6666
xpti_enable_werror = "OFF"
67-
llvm_enable_zstd = "OFF"
67+
llvm_enable_zstd = "ON"
6868

6969
if sys.platform != "darwin":
7070
sycl_enabled_backends.append("level_zero")
@@ -134,8 +134,6 @@ def do_configure(args):
134134

135135
# For clang-format, clang-tidy and code coverage
136136
llvm_enable_projects += ";clang-tools-extra;compiler-rt"
137-
# Build with zstd enabled on CI.
138-
llvm_enable_zstd = "ON"
139137
if sys.platform != "darwin":
140138
# libclc is required for CI validation
141139
libclc_enabled = True

clang/include/clang/Driver/Action.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ class OffloadWrapperJobAction : public JobAction {
698698
// Get the compilation step setting.
699699
bool getCompileStep() const { return CompileStep; }
700700

701+
// Set the individual wrapping setting. This is used to tell the wrapper job
702+
// action that the wrapping (and subsequent compile step) should be done
703+
// with for-each instead of using -batch.
704+
void setWrapIndividualFiles() { WrapIndividualFiles = true; }
705+
706+
// Get the individual wrapping setting.
707+
bool getWrapIndividualFiles() const { return WrapIndividualFiles; }
708+
701709
// Set the offload kind for the current wrapping job action. Default usage
702710
// is to use the kind of the current toolchain.
703711
void setOffloadKind(OffloadKind SetKind) { Kind = SetKind; }
@@ -707,6 +715,7 @@ class OffloadWrapperJobAction : public JobAction {
707715

708716
private:
709717
bool CompileStep = true;
718+
bool WrapIndividualFiles = false;
710719
OffloadKind Kind = OFK_None;
711720
};
712721

clang/lib/Driver/Driver.cpp

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5536,9 +5536,58 @@ class OffloadingActionBuilder final {
55365536
BundlingActions, types::TY_Object);
55375537
if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
55385538
OWA->setOffloadKind(Action::OFK_Host);
5539+
// The Backend compilation step performed here is being done for
5540+
// creating FPGA archives. The possible split binaries after
5541+
// sycl-post-link need to be individually wrapped as opposed to
5542+
// being passed into the clang-offload-wrapper via a table and
5543+
// using the -batch option - effectively creating a single
5544+
// binary. The resulting archive created from -fsycl-link should
5545+
// not contain the singular binary, but should be individual
5546+
// binaries to be consumed later by either the -fsycl-link=image
5547+
// device compilation step or being linked into the final exe.
5548+
//
5549+
// Typical compile flow:
5550+
// .bc
5551+
// |
5552+
// sycl-post-link -split=kernel
5553+
// |
5554+
// +--------+--------+
5555+
// | | |
5556+
// split1 split2 split3
5557+
// | | |
5558+
// llvm-spirv llvm-spirv llvm-spirv
5559+
// | | |
5560+
// ocloc ocloc ocloc
5561+
// | | |
5562+
// +--------+--------+
5563+
// |
5564+
// clang-offload-wrapper -batch
5565+
// |
5566+
// .o
5567+
//
5568+
// Individual wrap compile flow:
5569+
// .bc
5570+
// |
5571+
// sycl-post-link -split=kernel
5572+
// |
5573+
// +--------+--------+
5574+
// | | |
5575+
// split1 split2 split3
5576+
// | | |
5577+
// llvm-spirv llvm-spirv llvm-spirv
5578+
// | | |
5579+
// ocloc ocloc ocloc
5580+
// | | |
5581+
// wrap wrap wrap
5582+
// | | |
5583+
// .o .o .o
5584+
//
55395585
Action *CompiledDeviceAction =
5540-
C.MakeAction<OffloadWrapperJobAction>(WrapperItems,
5541-
types::TY_Object);
5586+
C.MakeAction<OffloadWrapperJobAction>(FPGAAOTAction,
5587+
types::TY_Tempfilelist);
5588+
if (auto *OWA =
5589+
dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5590+
OWA->setWrapIndividualFiles();
55425591
addDeps(CompiledDeviceAction, TC, BoundArch);
55435592
}
55445593
addDeps(DeviceAction, TC, BoundArch);
@@ -5812,6 +5861,9 @@ class OffloadingActionBuilder final {
58125861
};
58135862

58145863
Action *ExtractIRFilesAction = createExtractIRFilesAction();
5864+
// Device binaries that are individually wrapped when creating an
5865+
// FPGA Archive.
5866+
ActionList FPGAArchiveWrapperInputs;
58155867

58165868
if (IsNVPTX || IsAMDGCN) {
58175869
JobAction *FinAction =
@@ -5897,6 +5949,7 @@ class OffloadingActionBuilder final {
58975949
FileTableTformJobAction::COL_CODE,
58985950
FileTableTformJobAction::COL_CODE);
58995951
WrapperInputs.push_back(ReplaceFilesAction);
5952+
FPGAArchiveWrapperInputs.push_back(BuildCodeAction);
59005953
}
59015954
if (SkipWrapper) {
59025955
// Wrapper step not requested.
@@ -5931,8 +5984,11 @@ class OffloadingActionBuilder final {
59315984
if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
59325985
OWA->setOffloadKind(Action::OFK_Host);
59335986
Action *CompiledDeviceAction =
5934-
C.MakeAction<OffloadWrapperJobAction>(WrapperInputs,
5935-
types::TY_Object);
5987+
C.MakeAction<OffloadWrapperJobAction>(
5988+
FPGAArchiveWrapperInputs, types::TY_Tempfilelist);
5989+
if (auto *OWA =
5990+
dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5991+
OWA->setWrapIndividualFiles();
59365992
addDeps(CompiledDeviceAction, TC, nullptr);
59375993
}
59385994
addDeps(DeviceAction, TC, nullptr);
@@ -6461,7 +6517,7 @@ class OffloadingActionBuilder final {
64616517
if (GpuInitHasErrors)
64626518
return true;
64636519

6464-
int GenIndex = 0;
6520+
size_t GenIndex = 0;
64656521
// Fill SYCLTargetInfoList
64666522
for (auto &TT : SYCLTripleList) {
64676523
auto TCIt = llvm::find_if(

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10263,8 +10263,18 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1026310263
const InputInfo &I = Inputs[0];
1026410264
assert(I.isFilename() && "Invalid input.");
1026510265

10266-
if (I.getType() == types::TY_Tempfiletable ||
10267-
I.getType() == types::TY_Tempfilelist || IsEmbeddedIR)
10266+
// TODO: The embedded compilation step after the wrapping step restricts
10267+
// the ability to control the 'for each' methodology used when performing
10268+
// device code splitting. We set the individual wrap behavior when we know
10269+
// the wrapping and compile step should be done individually. Ideally this
10270+
// would be controlled at the JobAction creation, but we cannot do that
10271+
// until the compilation of the wrap is it's own JobAction.
10272+
bool IndividualWrapCompile = WrapperJob.getWrapIndividualFiles();
10273+
const InputInfo TempOutput(types::TY_LLVM_BC, WrapperFileName,
10274+
WrapperFileName);
10275+
if (!IndividualWrapCompile &&
10276+
(I.getType() == types::TY_Tempfiletable ||
10277+
I.getType() == types::TY_Tempfilelist || IsEmbeddedIR))
1026810278
// Input files are passed via the batch job file table.
1026910279
WrapperArgs.push_back(C.getArgs().MakeArgString("-batch"));
1027010280
WrapperArgs.push_back(C.getArgs().MakeArgString(I.getFilename()));
@@ -10273,7 +10283,17 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1027310283
JA, *this, ResponseFileSupport::None(),
1027410284
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
1027510285
WrapperArgs, std::nullopt);
10276-
C.addCommand(std::move(Cmd));
10286+
10287+
if (IndividualWrapCompile) {
10288+
// When wrapping FPGA device binaries for FPGA archives, create individual
10289+
// wrapped and compiled entries for the archive.
10290+
StringRef ParallelJobs =
10291+
C.getArgs().getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
10292+
clang::driver::tools::SYCL::constructLLVMForeachCommand(
10293+
C, JA, std::move(Cmd), Inputs, TempOutput, this, "", "bc",
10294+
ParallelJobs);
10295+
} else
10296+
C.addCommand(std::move(Cmd));
1027710297

1027810298
if (WrapperCompileEnabled) {
1027910299
// TODO Use TC.SelectTool().
@@ -10296,9 +10316,19 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1029610316
SmallString<128> ClangPath(C.getDriver().Dir);
1029710317
llvm::sys::path::append(ClangPath, "clang");
1029810318
const char *Clang = C.getArgs().MakeArgString(ClangPath);
10299-
C.addCommand(std::make_unique<Command>(JA, *this,
10300-
ResponseFileSupport::None(), Clang,
10301-
ClangArgs, std::nullopt));
10319+
auto PostWrapCompileCmd =
10320+
std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
10321+
Clang, ClangArgs, std::nullopt);
10322+
if (IndividualWrapCompile) {
10323+
StringRef ParallelJobs = C.getArgs().getLastArgValue(
10324+
options::OPT_fsycl_max_parallel_jobs_EQ);
10325+
InputInfoList Inputs;
10326+
Inputs.push_back(TempOutput);
10327+
clang::driver::tools::SYCL::constructLLVMForeachCommand(
10328+
C, JA, std::move(PostWrapCompileCmd), Inputs, Output, this, "",
10329+
"bc", ParallelJobs);
10330+
} else
10331+
C.addCommand(std::move(PostWrapCompileCmd));
1030210332
}
1030310333
return;
1030410334
} // end of SYCL flavor of offload wrapper command creation

clang/lib/Driver/ToolChains/HIPUtility.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,10 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
324324
Args.MakeArgString(std::string("-output=").append(Output));
325325
BundlerArgs.push_back(BundlerOutputArg);
326326

327-
addOffloadCompressArgs(Args, BundlerArgs);
327+
// For SYCL, the compression is occurring during the wrapping step, so we do
328+
// not want to do additional compression here.
329+
if (!JA.isDeviceOffloading(Action::OFK_SYCL))
330+
addOffloadCompressArgs(Args, BundlerArgs);
328331

329332
const char *Bundler = Args.MakeArgString(
330333
T.getToolChain().GetProgramPath("clang-offload-bundler"));

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,16 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
17801780
ArgStringList TargArgs;
17811781
Args.AddAllArgValues(TargArgs, options::OPT_Xs, options::OPT_Xs_separate);
17821782
Args.AddAllArgValues(TargArgs, options::OPT_Xsycl_backend);
1783+
// For -Xsycl-target-backend=<triple> the triple value is used to push
1784+
// specific options to the matching device compilation using that triple.
1785+
// Scrutinize this to make sure we are only checking the values needed
1786+
// for the current device compilation.
1787+
for (auto *A : Args) {
1788+
if (!A->getOption().matches(options::OPT_Xsycl_backend_EQ))
1789+
continue;
1790+
if (getDriver().MakeSYCLDeviceTriple(A->getValue()) == Triple)
1791+
TargArgs.push_back(A->getValue(1));
1792+
}
17831793
// Check for any -device settings.
17841794
std::string DevArg;
17851795
if (IsJIT || Device == "pvc" || hasPVCDevice(TargArgs, DevArg)) {

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3167,7 +3167,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
31673167
// // code
31683168
// }
31693169
//
3170-
// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const
3170+
// [[sycl::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const
31713171
// {
31723172
// // code
31733173
// }

clang/lib/Sema/SemaSYCLDeclAttr.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,13 @@ void SemaSYCL::checkDeprecatedSYCLAttributeSpelling(const ParsedAttr &A,
132132
return;
133133
}
134134

135+
// Additionally, diagnose deprecated [[intel::reqd_sub_group_size]] spelling
136+
if (A.getKind() == ParsedAttr::AT_IntelReqdSubGroupSize && A.getScopeName() &&
137+
A.getScopeName()->isStr("intel")) {
138+
diagnoseDeprecatedAttribute(A, "sycl", "reqd_sub_group_size");
139+
return;
140+
}
141+
135142
// Diagnose SYCL 2020 spellings in later SYCL modes.
136143
if (getLangOpts().getSYCLVersion() >= LangOptions::SYCL_2020) {
137144
// All attributes in the cl vendor namespace are deprecated in favor of a

clang/test/CodeGenSYCL/kernel-op-calls.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class Functor1 {
1111
public:
1212
Functor1(){}
1313

14-
[[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {}
14+
[[sycl::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {}
1515

1616
[[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const {}
1717

0 commit comments

Comments
 (0)