Skip to content

Commit 74ffed8

Browse files
[SYCL][NewOffloadModel] Support optional kernel features and fix ocloc argument parsing (#20470)
This patch adds support for optional kernel features and fixes ocloc command parsing with the following changes: (1) Enable optional kernel features for the new offloading model by passing the device architecture name to the `sycl-post-link` command constructed in `ClangLinkerWrapper.cpp`. This invokes `sycl-post-link` with the device architecture when Intel GPU targets are specified in `-fsycl-targets`. A new test has been added in `aot-gpu.cpp` to verify this functionality. (2) The original implementation in `ClangLinkerWrapper.cpp` incorrectly constructed `ocloc` command line arguments by concatenating all arguments into a single string, causing parsing failures in the executor. This patch fixes the issue by properly splitting arguments on whitespace boundaries and rejoining them into a correctly formatted command string. (3) Add test cases running with `-O0 -g` flag to `fp64-conv-emu-1.cpp` and `fp64-conv-emu-2.cpp` The following tests now pass (fail before) with the new offload model after applying this patch: 1. fp64-conv-emu-1.cpp 2. fp64-conv-emu-2.cpp 3. AOT/double.cpp 4. AOT/half.cpp 5. AOT/reqd-sg-size.cpp 6. BFloat16/bfloat16_example_aot_gpu.cpp 7. DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp 8. AOT/fallback.cpp
1 parent 97efb4f commit 74ffed8

File tree

4 files changed

+30
-3
lines changed

4 files changed

+30
-3
lines changed

clang/test/Driver/clang-linker-wrapper.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,20 @@
6363
// CHK-CMDS-AOT-GEN: spirv-to-ir-wrapper{{.*}} -o [[FIRSTLLVMLINKIN:.*]].bc --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global
6464
// CHK-CMDS-AOT-GEN-NEXT: llvm-link{{.*}} --suppress-warnings [[FIRSTLLVMLINKIN]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc
6565
// CHK-CMDS-AOT-GEN-NEXT: llvm-link{{.*}} -only-needed --suppress-warnings [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc
66-
// CHK-CMDS-AOT-GEN-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
66+
// Check that target specified by -fsycl-targets is passed to sycl-post-link for filtering.
67+
// CHK-CMDS-AOT-GEN-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o intel_gpu_pvc,[[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
6768
// CHK-CMDS-AOT-GEN-NEXT: llvm-spirv{{.*}} LLVM_SPIRV_OPTIONS -o {{.*}}
6869
// CHK-CMDS-AOT-GEN-NEXT: ocloc{{.*}} -output_no_suffix -spirv_input -device pvc{{.*}} -output {{.*}} -file {{.*}}
6970
// CHK-CMDS-AOT-GEN-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: {{.*}}, compile-opts: , link-opts:
7071
// CHK-CMDS-AOT-GEN-NEXT: clang{{.*}} -c -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
7172
// CHK-CMDS-AOT-GEN-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
7273

74+
// Check that when --gpu-tool-arg is specified in clang-linker-wrapper
75+
// (happen when AOT device is specified via -Xsycl-target-backend '-device pvc' in clang),
76+
// the target is not passed to sycl-post-link for filtering.
77+
// RUN: clang-linker-wrapper -sycl-embed-ir -sycl-device-libraries=%t1.devicelib.o -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-unknown-linux-gnu" "--gpu-tool-arg=-device pvc" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t1.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-NO-CMDS-AOT-GEN %s
78+
// CHK-NO-CMDS-AOT-GEN-NOT: sycl-post-link{{.*}} -o intel_gpu_pv,{{.*}}
79+
7380
/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for Intel CPU)
7481
// -------
7582
// Generate .o file as linker wrapper input.

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -727,10 +727,24 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
727727
createOutputFile(sys::path::filename(ExecutableName), "table");
728728
if (!TempFileOrErr)
729729
return TempFileOrErr.takeError();
730+
std::string OutputPathWithArch = TempFileOrErr->str();
731+
732+
// Enable the driver to invoke sycl-post-link with the device architecture
733+
// when Intel GPU targets are passed in -fsycl-targets.
734+
// OPT_gpu_tool_arg_EQ is checked to ensure the device architecture is not
735+
// passed through -Xsycl-target-backend=spir64_gen "-device <arch>" format
736+
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
737+
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
738+
StringRef IsGPUTool = Args.getLastArgValue(OPT_gpu_tool_arg_EQ);
739+
740+
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Arch.empty() &&
741+
IsGPUTool.empty() && Arch != "*")
742+
OutputPathWithArch = "intel_gpu_" + Arch.str() + "," + OutputPathWithArch;
743+
else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
744+
OutputPathWithArch = "spir64_x86_64," + OutputPathWithArch;
730745

731746
SmallVector<StringRef, 8> CmdArgs;
732747
CmdArgs.push_back(*SYCLPostLinkPath);
733-
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
734748
Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ);
735749
if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) {
736750
std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue();
@@ -748,7 +762,7 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
748762
SYCLPostLinkOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
749763
/* KeepEmpty = */ false);
750764
CmdArgs.push_back("-o");
751-
CmdArgs.push_back(*TempFileOrErr);
765+
CmdArgs.push_back(Args.MakeArgString(OutputPathWithArch));
752766
for (auto &File : InputFiles)
753767
CmdArgs.push_back(File);
754768
if (Error Err = executeCommands(*SYCLPostLinkPath, CmdArgs))

sycl/test-e2e/OptionalKernelFeatures/fp64-conv-emu-1.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10,intel_gpu_dg2_g11,intel_gpu_dg2_g12,intel_gpu_pvc,intel_gpu_mtl_h,intel_gpu_mtl_u -fsycl-fp64-conv-emu %O0 %s -o %t.out
2121
// RUN: %{run} %t.out
2222

23+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10,intel_gpu_dg2_g11,intel_gpu_dg2_g12,intel_gpu_pvc,intel_gpu_mtl_h,intel_gpu_mtl_u -fsycl-fp64-conv-emu -g %O0 %s -o %t.debug.out
24+
// RUN: %{run} %t.debug.out
25+
2326
// Tests that aspect::fp64 is not emitted correctly when -fsycl-fp64-conv-emu
2427
// flag is used.
2528

sycl/test-e2e/OptionalKernelFeatures/fp64-conv-emu-2.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10,intel_gpu_dg2_g11,intel_gpu_dg2_g12,intel_gpu_pvc,intel_gpu_mtl_h,intel_gpu_mtl_u -fsycl-fp64-conv-emu %O0 %s -o %t.out
2424
// RUN: %{run} %t.out
2525

26+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10,intel_gpu_dg2_g11,intel_gpu_dg2_g12,intel_gpu_pvc,intel_gpu_mtl_h,intel_gpu_mtl_u -fsycl-fp64-conv-emu -g %O0 %s -o %t.debug.out
27+
// RUN: %{run} %t.debug.out
28+
2629
#include <sycl/detail/core.hpp>
2730
using namespace sycl;
2831

0 commit comments

Comments
 (0)