Skip to content

Commit a25c96a

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into steffen/enable_threading_windows
2 parents 97e8f3a + a5161f2 commit a25c96a

File tree

469 files changed

+6898
-2930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

469 files changed

+6898
-2930
lines changed

.github/workflows/sycl-linux-precommit.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,11 @@ jobs:
117117
env: ${{ matrix.env || '{}' }}
118118

119119
# Do not install drivers on AMD and CUDA runners.
120-
install_igc_driver: |
120+
install_igc_driver: >-
121121
${{ !contains(matrix.target_devices, 'ext_oneapi_cuda') &&
122122
!contains(matrix.target_devices, 'ext_oneapi_hip') &&
123123
contains(needs.detect_changes.outputs.filters, 'drivers') }}
124-
install_dev_igc_driver: |
124+
install_dev_igc_driver: >-
125125
${{ !contains(matrix.target_devices, 'ext_oneapi_cuda') &&
126126
!contains(matrix.target_devices, 'ext_oneapi_hip') &&
127127
matrix.use_igc_dev && contains(needs.detect_changes.outputs.filters, 'devigccfg') ||

.github/workflows/sycl-linux-run-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ permissions:
151151

152152
jobs:
153153
run:
154-
if: inputs.skip_run == 'false'
154+
if: github.event_name == 'workflow_dispatch' || inputs.skip_run == 'false'
155155
name: ${{ inputs.name }}
156156
runs-on: ${{ fromJSON(inputs.runner) }}
157157
container:

.github/workflows/sycl-windows-run-tests.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ jobs:
8686
# Run E2E tests.
8787
export LIT_OPTS="-v --no-progress-bar --show-unsupported --show-pass --show-xfail --max-time 3600 --time-tests ${{ inputs.extra_lit_opts }}"
8888
cmake --build build-e2e --target check-sycl-e2e
89+
- name: Detect hung tests
90+
shell: powershell
91+
run: |
92+
$exitCode = 0
93+
$hungTests = Get-Process | Where-Object { ($_.Path -match "llvm\\install") -or ($_.Path -match "llvm\\build-e2e") }
94+
$hungTests | Foreach-Object {
95+
$exitCode = 1
96+
echo "Test $($_.Path) hung!"
97+
Stop-Process -Force $_
98+
}
99+
exit $exitCode
89100
- name: Cleanup
90101
shell: cmd
91102
if: always()

clang/include/clang/Basic/LangOptions.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
307307
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
308308
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
309309
LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point")
310-
LANGOPT(SYCLAllowAllFeaturesInConstexpr, 1, 0, "Allow all C++ features in SYCL device code in manifestly constant-evaluated expressions")
311310
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
312311
LANGOPT(SYCLESIMDBuildHostCode, 1, 1, "Build the host implementation of ESIMD functions")
313312
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")

clang/include/clang/Driver/Options.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8628,11 +8628,6 @@ def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">,
86288628
HelpText<"Perform device compilation for Native CPU.">,
86298629
Visibility<[CC1Option]>,
86308630
MarshallingInfoFlag<LangOpts<"SYCLIsNativeCPU">>;
8631-
defm sycl_allow_all_features_in_constexpr
8632-
: BoolFOption<
8633-
"sycl-allow-all-features-in-constexpr", LangOpts<"SYCLAllowAllFeaturesInConstexpr">,
8634-
DefaultFalse,
8635-
PosFlag<SetTrue, [], [CC1Option], "Allow all C++ features in SYCL device code in manifestly constant-evaluated expressions">, NegFlag<SetFalse>>;
86368631

86378632
} // let Visibility = [CC1Option]
86388633

clang/include/clang/Driver/ToolChain.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,10 @@ class ToolChain {
781781
virtual void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
782782
llvm::opt::ArgStringList &CC1Args) const;
783783

784+
/// Add arguments to use SYCL specific includes.
785+
virtual void AddSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs,
786+
llvm::opt::ArgStringList &CC1Args) const;
787+
784788
/// Add arguments to use MCU GCC toolchain includes.
785789
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
786790
llvm::opt::ArgStringList &CC1Args) const;

clang/lib/Driver/Driver.cpp

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6786,13 +6786,10 @@ class OffloadingActionBuilder final {
67866786
// Do not use unbundler if the Host does not depend on device action.
67876787
// Now that we have unbundled the object, when doing -fsycl-link we
67886788
// want to continue the host link with the input object.
6789-
// For unbundling of an FPGA AOCX binary, we want to link with the original
6790-
// FPGA device archive.
67916789
if ((OffloadKind == Action::OFK_None && CanUseBundler) ||
67926790
(Args.hasArg(options::OPT_fsycl_link_EQ) && !HasFPGATarget) ||
67936791
(HasFPGATarget && ((Args.hasArg(options::OPT_fsycl_link_EQ) &&
6794-
HostAction->getType() == types::TY_Object) ||
6795-
HostAction->getType() == types::TY_FPGA_AOCX)))
6792+
HostAction->getType() == types::TY_Object))))
67966793
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction))
67976794
HostAction = UA->getInputs().back();
67986795

@@ -7443,16 +7440,36 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
74437440

74447441
// For an FPGA archive, we add the unbundling step above to take care of
74457442
// the device side, but also unbundle here to extract the host side
7446-
bool EarlyLink = false;
7447-
if (const Arg *A = Args.getLastArg(options::OPT_fsycl_link_EQ))
7448-
EarlyLink = A->getValue() == StringRef("early");
74497443
for (auto &LI : LinkerInputs) {
74507444
Action *UnbundlerInput = nullptr;
74517445
auto wrapObject = [&] {
7452-
if (EarlyLink && Args.hasArg(options::OPT_fintelfpga)) {
7453-
// Only wrap the object with -fsycl-link=early
7454-
auto *BC = C.MakeAction<OffloadWrapperJobAction>(LI, types::TY_LLVM_BC);
7455-
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
7446+
if (Args.hasArg(options::OPT_fsycl_link_EQ) &&
7447+
Args.hasArg(options::OPT_fintelfpga)) {
7448+
// Wrap the object when creating an FPGA AOCX or AOCR binary.
7449+
// When the input file is an AOCR (early) archive, the unbundled host
7450+
// binary consists of a list of objects. We cannot directly wrap that
7451+
// binary to be consumed later - this has to go through each listed
7452+
// object.
7453+
bool FPGAEarly = true;
7454+
if (auto *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ))
7455+
FPGAEarly = A->getValue() == StringRef("early");
7456+
7457+
Action *WrapperAction;
7458+
if ((LI->getType() == types::TY_FPGA_AOCR ||
7459+
LI->getType() == types::TY_FPGA_AOCR_EMU) &&
7460+
!FPGAEarly) {
7461+
auto *RenameAction = C.MakeAction<FileTableTformJobAction>(
7462+
LI, types::TY_Tempfilelist, types::TY_Tempfilelist);
7463+
RenameAction->addRenameColumnTform(FileTableTformJobAction::COL_ZERO,
7464+
FileTableTformJobAction::COL_CODE);
7465+
ActionList WrapperItems({RenameAction});
7466+
WrapperAction = C.MakeAction<OffloadWrapperJobAction>(
7467+
WrapperItems, types::TY_LLVM_BC);
7468+
} else
7469+
WrapperAction =
7470+
C.MakeAction<OffloadWrapperJobAction>(LI, types::TY_LLVM_BC);
7471+
auto *ASM =
7472+
C.MakeAction<BackendJobAction>(WrapperAction, types::TY_PP_Asm);
74567473
auto *OBJ = C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
74577474
OffloadAction::HostDependence HDep(
74587475
*OBJ, *C.getSingleOffloadToolChain<Action::OFK_Host>(),

clang/lib/Driver/ToolChain.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,9 @@ void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
15191519
void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
15201520
ArgStringList &CC1Args) const {}
15211521

1522+
void ToolChain::AddSYCLIncludeArgs(const ArgList &DriverArgs,
1523+
ArgStringList &CC1Args) const {}
1524+
15221525
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
15231526
ToolChain::getDeviceLibs(
15241527
const ArgList &DriverArgs,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
11601160
getToolChain().AddHIPIncludeArgs(Args, CmdArgs);
11611161

11621162
if (JA.isOffloading(Action::OFK_SYCL)) {
1163-
toolchains::SYCLToolChain::AddSYCLIncludeArgs(D, Args, CmdArgs);
1163+
getToolChain().AddSYCLIncludeArgs(Args, CmdArgs);
11641164
if (Inputs[0].getType() == types::TY_CUDA) {
11651165
// Include __clang_cuda_runtime_wrapper.h in .cu SYCL compilation.
11661166
getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
@@ -10216,7 +10216,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1021610216

1021710217
if (I.getType() == types::TY_Tempfiletable ||
1021810218
I.getType() == types::TY_Tempfilelist || IsEmbeddedIR)
10219-
// wrapper actual input files are passed via the batch job file table:
10219+
// Input files are passed via the batch job file table.
1022010220
WrapperArgs.push_back(C.getArgs().MakeArgString("-batch"));
1022110221
WrapperArgs.push_back(C.getArgs().MakeArgString(I.getFilename()));
1022210222

@@ -10283,6 +10283,11 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1028310283
CmdArgs.push_back(
1028410284
TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple()));
1028510285

10286+
if (Inputs[0].getType() == types::TY_Tempfiletable ||
10287+
Inputs[0].getType() == types::TY_Tempfilelist)
10288+
// Input files are passed via the batch job file table.
10289+
CmdArgs.push_back(C.getArgs().MakeArgString("-batch"));
10290+
1028610291
// Add input.
1028710292
assert(Inputs[0].isFilename() && "Invalid input.");
1028810293
CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename()));
@@ -10320,7 +10325,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1032010325

1032110326
if (Inputs[I].getType() == types::TY_Tempfiletable ||
1032210327
Inputs[I].getType() == types::TY_Tempfilelist)
10323-
// wrapper actual input files are passed via the batch job file table:
10328+
// Input files are passed via the batch job file table.
1032410329
CmdArgs.push_back(C.getArgs().MakeArgString("-batch"));
1032510330

1032610331
// Add input.

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
924924
const ToolChain &HostTC, const ArgList &Args,
925925
const Action::OffloadKind OK)
926926
: NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC),
927-
OK(OK) {}
927+
SYCLInstallation(D), OK(OK) {}
928928

929929
void CudaToolChain::addClangTargetOptions(
930930
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
@@ -941,11 +941,19 @@ void CudaToolChain::addClangTargetOptions(
941941
// If we are compiling SYCL kernels for Nvidia GPUs, we do not support Cuda
942942
// device code compatability, hence we do not set Cuda mode in that instance.
943943
if (DeviceOffloadingKind == Action::OFK_SYCL) {
944-
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
945-
CC1Args);
944+
SYCLInstallation.AddSYCLIncludeArgs(DriverArgs, CC1Args);
946945

947946
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt))
948947
CC1Args.push_back("-fcuda-prec-sqrt");
948+
949+
bool FastRelaxedMath = DriverArgs.hasFlag(
950+
options::OPT_ffast_math, options::OPT_fno_fast_math, false);
951+
bool UnsafeMathOpt =
952+
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
953+
options::OPT_fno_unsafe_math_optimizations, false);
954+
if (FastRelaxedMath || UnsafeMathOpt)
955+
CC1Args.append({"-mllvm", "--nvptx-prec-divf32=0", "-mllvm",
956+
"--nvptx-prec-sqrtf32=0"});
949957
} else {
950958
CC1Args.append(
951959
{"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
@@ -1196,8 +1204,7 @@ CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
11961204
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
11971205
ArgStringList &CC1Args) const {
11981206
if (DriverArgs.hasArg(options::OPT_fsycl)) {
1199-
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
1200-
CC1Args);
1207+
SYCLInstallation.AddSYCLIncludeArgs(DriverArgs, CC1Args);
12011208
}
12021209
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
12031210

0 commit comments

Comments
 (0)