Skip to content

Commit 2f68d8f

Browse files
committed
Merge branch 'sycl' into requires-typo
2 parents c224b81 + 1ed687b commit 2f68d8f

File tree

25 files changed

+332
-190
lines changed

25 files changed

+332
-190
lines changed

.github/workflows/sycl-linux-build.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ jobs:
212212
if: always() && !cancelled() && contains(inputs.changes, 'libdevice')
213213
run: |
214214
cmake --build $GITHUB_WORKSPACE/build --target check-libdevice
215+
- name: Check E2E test requirements
216+
if: always() && !cancelled() && !contains(inputs.changes, 'sycl')
217+
run: |
218+
# TODO consider moving this to Dockerfile.
219+
export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
220+
LIT_OPTS="--allow-empty-runs" LIT_FILTER="e2e_test_requirements" cmake --build $GITHUB_WORKSPACE/build --target check-sycl
215221
- name: Install
216222
if: ${{ always() && !cancelled() && steps.build.conclusion == 'success' }}
217223
# TODO replace utility installation with a single CMake target

buildbot/configure.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def do_configure(args):
6464

6565
sycl_enable_xpti_tracing = "ON"
6666
xpti_enable_werror = "OFF"
67-
llvm_enable_zstd = "OFF"
67+
llvm_enable_zstd = "ON"
6868

6969
if sys.platform != "darwin":
7070
sycl_enabled_backends.append("level_zero")
@@ -134,8 +134,6 @@ def do_configure(args):
134134

135135
# For clang-format, clang-tidy and code coverage
136136
llvm_enable_projects += ";clang-tools-extra;compiler-rt"
137-
# Build with zstd disabled on CI for now.
138-
llvm_enable_zstd = "OFF"
139137
if sys.platform != "darwin":
140138
# libclc is required for CI validation
141139
libclc_enabled = True

clang/include/clang/Driver/Action.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ class OffloadWrapperJobAction : public JobAction {
698698
// Get the compilation step setting.
699699
bool getCompileStep() const { return CompileStep; }
700700

701+
// Set the individual wrapping setting. This is used to tell the wrapper job
702+
// action that the wrapping (and subsequent compile step) should be done
703+
// with for-each instead of using -batch.
704+
void setWrapIndividualFiles() { WrapIndividualFiles = true; }
705+
706+
// Get the individual wrapping setting.
707+
bool getWrapIndividualFiles() const { return WrapIndividualFiles; }
708+
701709
// Set the offload kind for the current wrapping job action. Default usage
702710
// is to use the kind of the current toolchain.
703711
void setOffloadKind(OffloadKind SetKind) { Kind = SetKind; }
@@ -707,6 +715,7 @@ class OffloadWrapperJobAction : public JobAction {
707715

708716
private:
709717
bool CompileStep = true;
718+
bool WrapIndividualFiles = false;
710719
OffloadKind Kind = OFK_None;
711720
};
712721

clang/lib/Driver/Driver.cpp

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5536,9 +5536,58 @@ class OffloadingActionBuilder final {
55365536
BundlingActions, types::TY_Object);
55375537
if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
55385538
OWA->setOffloadKind(Action::OFK_Host);
5539+
// The Backend compilation step performed here is being done for
5540+
// creating FPGA archives. The possible split binaries after
5541+
// sycl-post-link need to be individually wrapped as opposed to
5542+
// being passed into the clang-offload-wrapper via a table and
5543+
// using the -batch option - effectively creating a single
5544+
// binary. The resulting archive created from -fsycl-link should
5545+
// not contain the singular binary, but should be individual
5546+
// binaries to be consumed later by either the -fsycl-link=image
5547+
// device compilation step or being linked into the final exe.
5548+
//
5549+
// Typical compile flow:
5550+
// .bc
5551+
// |
5552+
// sycl-post-link -split=kernel
5553+
// |
5554+
// +--------+--------+
5555+
// | | |
5556+
// split1 split2 split3
5557+
// | | |
5558+
// llvm-spirv llvm-spirv llvm-spirv
5559+
// | | |
5560+
// ocloc ocloc ocloc
5561+
// | | |
5562+
// +--------+--------+
5563+
// |
5564+
// clang-offload-wrapper -batch
5565+
// |
5566+
// .o
5567+
//
5568+
// Individual wrap compile flow:
5569+
// .bc
5570+
// |
5571+
// sycl-post-link -split=kernel
5572+
// |
5573+
// +--------+--------+
5574+
// | | |
5575+
// split1 split2 split3
5576+
// | | |
5577+
// llvm-spirv llvm-spirv llvm-spirv
5578+
// | | |
5579+
// ocloc ocloc ocloc
5580+
// | | |
5581+
// wrap wrap wrap
5582+
// | | |
5583+
// .o .o .o
5584+
//
55395585
Action *CompiledDeviceAction =
5540-
C.MakeAction<OffloadWrapperJobAction>(WrapperItems,
5541-
types::TY_Object);
5586+
C.MakeAction<OffloadWrapperJobAction>(FPGAAOTAction,
5587+
types::TY_Tempfilelist);
5588+
if (auto *OWA =
5589+
dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5590+
OWA->setWrapIndividualFiles();
55425591
addDeps(CompiledDeviceAction, TC, BoundArch);
55435592
}
55445593
addDeps(DeviceAction, TC, BoundArch);
@@ -5812,6 +5861,9 @@ class OffloadingActionBuilder final {
58125861
};
58135862

58145863
Action *ExtractIRFilesAction = createExtractIRFilesAction();
5864+
// Device binaries that are individually wrapped when creating an
5865+
// FPGA Archive.
5866+
ActionList FPGAArchiveWrapperInputs;
58155867

58165868
if (IsNVPTX || IsAMDGCN) {
58175869
JobAction *FinAction =
@@ -5897,6 +5949,7 @@ class OffloadingActionBuilder final {
58975949
FileTableTformJobAction::COL_CODE,
58985950
FileTableTformJobAction::COL_CODE);
58995951
WrapperInputs.push_back(ReplaceFilesAction);
5952+
FPGAArchiveWrapperInputs.push_back(BuildCodeAction);
59005953
}
59015954
if (SkipWrapper) {
59025955
// Wrapper step not requested.
@@ -5931,8 +5984,11 @@ class OffloadingActionBuilder final {
59315984
if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
59325985
OWA->setOffloadKind(Action::OFK_Host);
59335986
Action *CompiledDeviceAction =
5934-
C.MakeAction<OffloadWrapperJobAction>(WrapperInputs,
5935-
types::TY_Object);
5987+
C.MakeAction<OffloadWrapperJobAction>(
5988+
FPGAArchiveWrapperInputs, types::TY_Tempfilelist);
5989+
if (auto *OWA =
5990+
dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5991+
OWA->setWrapIndividualFiles();
59365992
addDeps(CompiledDeviceAction, TC, nullptr);
59375993
}
59385994
addDeps(DeviceAction, TC, nullptr);
@@ -6461,7 +6517,7 @@ class OffloadingActionBuilder final {
64616517
if (GpuInitHasErrors)
64626518
return true;
64636519

6464-
int GenIndex = 0;
6520+
size_t GenIndex = 0;
64656521
// Fill SYCLTargetInfoList
64666522
for (auto &TT : SYCLTripleList) {
64676523
auto TCIt = llvm::find_if(

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10263,8 +10263,18 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1026310263
const InputInfo &I = Inputs[0];
1026410264
assert(I.isFilename() && "Invalid input.");
1026510265

10266-
if (I.getType() == types::TY_Tempfiletable ||
10267-
I.getType() == types::TY_Tempfilelist || IsEmbeddedIR)
10266+
// TODO: The embedded compilation step after the wrapping step restricts
10267+
// the ability to control the 'for each' methodology used when performing
10268+
// device code splitting. We set the individual wrap behavior when we know
10269+
// the wrapping and compile step should be done individually. Ideally this
10270+
// would be controlled at the JobAction creation, but we cannot do that
10271+
// until the compilation of the wrap is it's own JobAction.
10272+
bool IndividualWrapCompile = WrapperJob.getWrapIndividualFiles();
10273+
const InputInfo TempOutput(types::TY_LLVM_BC, WrapperFileName,
10274+
WrapperFileName);
10275+
if (!IndividualWrapCompile &&
10276+
(I.getType() == types::TY_Tempfiletable ||
10277+
I.getType() == types::TY_Tempfilelist || IsEmbeddedIR))
1026810278
// Input files are passed via the batch job file table.
1026910279
WrapperArgs.push_back(C.getArgs().MakeArgString("-batch"));
1027010280
WrapperArgs.push_back(C.getArgs().MakeArgString(I.getFilename()));
@@ -10273,7 +10283,17 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1027310283
JA, *this, ResponseFileSupport::None(),
1027410284
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
1027510285
WrapperArgs, std::nullopt);
10276-
C.addCommand(std::move(Cmd));
10286+
10287+
if (IndividualWrapCompile) {
10288+
// When wrapping FPGA device binaries for FPGA archives, create individual
10289+
// wrapped and compiled entries for the archive.
10290+
StringRef ParallelJobs =
10291+
C.getArgs().getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
10292+
clang::driver::tools::SYCL::constructLLVMForeachCommand(
10293+
C, JA, std::move(Cmd), Inputs, TempOutput, this, "", "bc",
10294+
ParallelJobs);
10295+
} else
10296+
C.addCommand(std::move(Cmd));
1027710297

1027810298
if (WrapperCompileEnabled) {
1027910299
// TODO Use TC.SelectTool().
@@ -10296,9 +10316,19 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1029610316
SmallString<128> ClangPath(C.getDriver().Dir);
1029710317
llvm::sys::path::append(ClangPath, "clang");
1029810318
const char *Clang = C.getArgs().MakeArgString(ClangPath);
10299-
C.addCommand(std::make_unique<Command>(JA, *this,
10300-
ResponseFileSupport::None(), Clang,
10301-
ClangArgs, std::nullopt));
10319+
auto PostWrapCompileCmd =
10320+
std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
10321+
Clang, ClangArgs, std::nullopt);
10322+
if (IndividualWrapCompile) {
10323+
StringRef ParallelJobs = C.getArgs().getLastArgValue(
10324+
options::OPT_fsycl_max_parallel_jobs_EQ);
10325+
InputInfoList Inputs;
10326+
Inputs.push_back(TempOutput);
10327+
clang::driver::tools::SYCL::constructLLVMForeachCommand(
10328+
C, JA, std::move(PostWrapCompileCmd), Inputs, Output, this, "",
10329+
"bc", ParallelJobs);
10330+
} else
10331+
C.addCommand(std::move(PostWrapCompileCmd));
1030210332
}
1030310333
return;
1030410334
} // end of SYCL flavor of offload wrapper command creation

clang/test/Driver/linker-wrapper-sycl-win.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,7 @@
112112
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]].bc
113113
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang.exe"{{.*}} -c -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
114114
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
115+
116+
// Error handling when --linker-path is not provided for clang-linker-wrapper
117+
// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
118+
// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'

clang/test/Driver/linker-wrapper-sycl.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,7 @@
190190
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT2:.*]]
191191
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -c -o [[LLCOUT2:.*]] [[WRAPPEROUT2]]
192192
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
193+
194+
// Error handling when --linker-path is not provided for clang-linker-wrapper
195+
// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
196+
// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'

clang/test/Driver/linker-wrapper.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,7 @@ __attribute__((visibility("protected"), used)) int x;
250250
// MLLVM-SAME: -Xlinker -mllvm=-pass-remarks=foo,bar
251251
// OFFLOAD-OPT-NOT: -Xlinker -mllvm=-pass-remarks=foo,bar
252252
// OFFLOAD-OPT-SAME: {{$}}
253+
254+
// Error handling when --linker-path is not provided for clang-linker-wrapper
255+
// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
256+
// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'

0 commit comments

Comments
 (0)