Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions clang/include/clang/Driver/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,13 @@ class Driver {
/// @name Helper Methods
/// @{

/// Utility function to parse all devices passed via -fsycl-targets.
/// Return 'true' for JIT, AOT Intel CPU/GPUs and NVidia/AMD targets.
/// Otherwise return 'false'.
bool
GetUseNewOffloadDriverForSYCLOffload(Compilation &C,
const llvm::opt::ArgList &Args) const;

/// getSYCLDeviceTriple - Returns the SYCL device triple for the
/// specified subarch
// TODO: Additional Arg input parameter is for diagnostic output information
Expand Down
37 changes: 32 additions & 5 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,32 @@ static void appendOneArg(InputArgList &Args, const Arg *Opt) {
}
}

// Utility function to parse all devices passed via -fsycl-targets.
// Return 'true' for JIT, AOT Intel CPU/GPUs and NVidia/AMD targets.
// Otherwise return 'false'.
bool Driver::GetUseNewOffloadDriverForSYCLOffload(Compilation &C,
const ArgList &Args) const {
// Check only if enabled with -fsycl
if (!Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false))
return false;

if (Args.hasFlag(options::OPT_no_offload_new_driver,
options::OPT_offload_new_driver, false))
return false;

if (Args.hasArg(options::OPT_fintelfpga))
return false;

if (const Arg *A = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
for (const char *Val : A->getValues()) {
llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val));
if ((!TT.isSPIROrSPIRV()) || TT.isSPIRAOT())
return false;
}
}
return true;
}

bool Driver::readConfigFile(StringRef FileName,
llvm::cl::ExpansionContext &ExpCtx) {
// Try opening the given file.
Expand Down Expand Up @@ -2195,12 +2221,12 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
// Use new offloading path for OpenMP. This is disabled as the SYCL
// offloading path is not properly setup to use the updated device linking
// scheme.
if ((C->isOffloadingHostKind(Action::OFK_OpenMP) &&
TranslatedArgs->hasFlag(options::OPT_fopenmp_new_driver,
options::OPT_no_offload_new_driver, true)) ||
if (C->isOffloadingHostKind(Action::OFK_OpenMP) ||
TranslatedArgs->hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false))
options::OPT_no_offload_new_driver, false) ||
GetUseNewOffloadDriverForSYCLOffload(*C, *TranslatedArgs)) {
setUseNewOffloadingDriver();
}

// Construct the list of abstract actions to perform for this compilation. On
// MachO targets this uses the driver-driver and universal actions.
Expand Down Expand Up @@ -7085,7 +7111,8 @@ void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
options::OPT_fno_offload_via_llvm, false) ||
Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver,
C.isOffloadingHostKind(Action::OFK_Cuda));
C.isOffloadingHostKind(Action::OFK_Cuda)) ||
GetUseNewOffloadDriverForSYCLOffload(C, Args);

bool HIPNoRDC =
C.isOffloadingHostKind(Action::OFK_HIP) &&
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5233,7 +5233,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
(JA.isHostOffloading(C.getActiveOffloadKinds()) &&
Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver,
C.isOffloadingHostKind(Action::OFK_Cuda)));
C.isOffloadingHostKind(Action::OFK_Cuda))) ||
(JA.isHostOffloading(Action::OFK_SYCL) &&
C.getDriver().GetUseNewOffloadDriverForSYCLOffload(C, Args));

bool IsRDCMode =
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, IsSYCL);
Expand Down
4 changes: 2 additions & 2 deletions sycl/test-e2e/Compression/compression_separate_compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
////////////////////// Link device images
// RUN: %{run-aux} %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v

// Make sure the clang-offload-wrapper is called with the --offload-compress
// Make sure the clang-offload-wrapper is called with the --offload-compress when using the old offloading model
// option.
// RUN: %{run-aux} %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -### &> %t_driver_opts.txt
// RUN: %{run-aux} %clangxx --no-offload-new-driver --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -### &> %t_driver_opts.txt
// RUN: %{run-aux} FileCheck -input-file=%t_driver_opts.txt %s --check-prefix=CHECK-DRIVER-OPTS

// CHECK-DRIVER-OPTS: clang-offload-wrapper{{.*}} "-offload-compress"
Expand Down
87 changes: 87 additions & 0 deletions sycl/test-e2e/NewOffloadDriver/image_compression.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// End-to-End test for testing device image compression when we
// separately compile and link device images.

// This test is copied from compression_separate_compile.cpp and adapted
// to use the New Offloading Model.

// REQUIRES: zstd, opencl-aot, cpu, linux

// XFAIL: run-mode && preview-mode
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20397

// XFAIL: target-native_cpu
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20397

// CPU AOT targets host isa, so we compile everything on the run system instead.
////////////////////// Compile device images
// RUN: %clangxx --offload-new-driver -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o
// RUN: %clangxx --offload-new-driver -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o

////////////////////// Link device images
// RUN: %clangxx --offload-new-driver --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v

// Make sure the clang-linker-wrapper is called with the --compress when using the new offloading model
// option.
// RUN: %clangxx --offload-new-driver --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -### &> %t_driver_opts.txt
// RUN: FileCheck -input-file=%t_driver_opts.txt %s --check-prefix=CHECK-DRIVER-OPTS

// CHECK-DRIVER-OPTS: clang-linker-wrapper{{.*}} "--compress"

////////////////////// Compile the host program
// RUN: %clangxx --offload-new-driver -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o

////////////////////// Link the host program and compressed device images
// RUN: %clangxx --offload-new-driver -fsycl %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -o %t_compress.out

// RUN: %{run} %t_compress.out

#include <sycl/detail/core.hpp>

using namespace sycl;

// Kernel 1
#ifdef ENABLE_KERNEL1
class test_kernel1;
void run_kernel1(int *a, queue q) {
q.single_task<test_kernel1>([=]() { *a *= 3; }).wait();
}
#endif

// Kernel 2
#ifdef ENABLE_KERNEL2
class test_kernel2;
void run_kernel2(int *a, queue q) {
q.single_task<test_kernel2>([=]() { *a += 42; }).wait();
}
#endif

// Main application.
#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2)
#include <sycl/properties/all_properties.hpp>
#include <sycl/usm.hpp>

#include <iostream>

class kernel_init;
void run_kernel1(int *a, queue q);
void run_kernel2(int *a, queue q);
int main() {
int retCode = 0;
queue q;

if (!q.get_device().get_info<info::device::usm_shared_allocations>())
return 0;

int *p = malloc_shared<int>(1, q);
*p = 42;

run_kernel1(p, q);
run_kernel2(p, q);
q.wait();

retCode = *p != (42 * 3 + 42);

free(p, q);
return retCode;
}
#endif