Skip to content

Commit 68781d6

Browse files
committed
Merge branch 'sycl' into jtodd/update-filecheck-tests
2 parents 1d007b5 + dc181bb commit 68781d6

File tree

23 files changed

+618
-13
lines changed

23 files changed

+618
-13
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ LANGOPT(SYCLExperimentalRangeRounding, 1, 0, "Use experimental parallel for rang
326326
LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the "
327327
"SYCL integration header")
328328
LANGOPT(SYCLIsNativeCPU , 1, 0, "Generate code for SYCL Native CPU")
329+
LANGOPT(SYCLRTCMode, 1, 0, "Compile in RTC mode")
329330

330331
LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
331332
LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6877,6 +6877,11 @@ defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-me
68776877
NegFlag<SetFalse, [], [ClangOption, CLOption], "Do not enforce using "
68786878
"stateless memory accesses.">,
68796879
BothFlags<[], [ClangOption, CLOption, CC1Option], "">>;
6880+
defm sycl_rtc_mode: BoolFOption<"sycl-rtc-mode",
6881+
LangOpts<"SYCLRTCMode">, DefaultFalse,
6882+
PosFlag<SetTrue, [], [ClangOption], "Enable">,
6883+
NegFlag<SetFalse, [], [ClangOption], "Disable">,
6884+
BothFlags<[HelpHidden], [ClangOption, CC1Option], " RTC mode in SYCL.">>;
68806885
// TODO: Remove this option once ESIMD headers are updated to
68816886
// guard vectors to be device only.
68826887
def fno_sycl_esimd_build_host_code : Flag<["-"], "fno-sycl-esimd-build-host-code">,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5589,6 +5589,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
55895589
Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor,
55905590
options::OPT_fno_sycl_decompose_functor);
55915591

5592+
Args.AddLastArg(CmdArgs, options::OPT_fsycl_rtc_mode,
5593+
options::OPT_fno_sycl_rtc_mode);
5594+
55925595
// Forward -fsycl-instrument-device-code option to cc1. This option will
55935596
// only be used for SPIR/SPIR-V based targets.
55945597
if (Triple.isSPIROrSPIRV())

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2040,6 +2040,11 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler {
20402040
}
20412041

20422042
bool handleStructType(ParmVarDecl *PD, QualType ParamTy) final {
2043+
if (SemaSYCLRef.getLangOpts().SYCLRTCMode) {
2044+
// When compiling in RTC mode, the restriction regarding forward
2045+
// declarations doesn't apply, as we don't need the integration header.
2046+
return isValid();
2047+
}
20432048
CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl();
20442049
// For free functions all struct/class kernel arguments are forward declared
20452050
// in integration header, that adds additional restrictions for kernel
@@ -6453,6 +6458,13 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
64536458
O << "} // namespace _V1\n";
64546459
O << "} // namespace sycl\n";
64556460

6461+
// The rest of this function only applies to free-function kernels. However,
6462+
// in RTC mode, we do not need integration header information for
6463+
// free-function kernels, so we can return early here.
6464+
if (S.getLangOpts().SYCLRTCMode) {
6465+
return;
6466+
}
6467+
64566468
unsigned ShimCounter = 1;
64576469
int FreeFunctionCount = 0;
64586470
for (const KernelDesc &K : KernelDescs) {
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fsycl-rtc-mode -fsycl-int-header=%t.rtc.h %s
2+
// RUN: FileCheck -input-file=%t.rtc.h --check-prefixes=CHECK,CHECK-RTC %s
3+
4+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fno-sycl-rtc-mode -fsycl-int-header=%t.nortc.h %s
5+
// RUN: FileCheck -input-file=%t.nortc.h --check-prefixes=CHECK,CHECK-NORTC %s
6+
7+
// This test checks that free-function kernel information is included or
8+
// excluded from the integration header, depending on the '-fsycl-rtc-mode'
9+
// flag.
10+
11+
#include "sycl.hpp"
12+
13+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 2)]]
14+
void free_function_single(int* ptr, int start, int end){
15+
for(int i = start; i < end; ++i){
16+
ptr[i] = start + 66;
17+
}
18+
}
19+
20+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
21+
void free_function_nd_range(int* ptr, int start, int end){
22+
for(int i = start; i < end; ++i){
23+
ptr[i] = start + 66;
24+
}
25+
}
26+
27+
template<typename KernelName, typename KernelFunc>
28+
__attribute__((sycl_kernel)) void kernel(const KernelFunc &kernelFunc){
29+
kernelFunc();
30+
}
31+
32+
int main(){
33+
sycl::accessor<int, 1, sycl::access::mode::read_write> accessorA;
34+
kernel<class Kernel_Function>(
35+
[=]() {
36+
accessorA.use();
37+
});
38+
return 0;
39+
}
40+
41+
42+
// CHECK: const char* const kernel_names[] = {
43+
// CHECK-NEXT: "{{.*}}__sycl_kernel_free_function_singlePiii",
44+
// CHECK-NEXT: "{{.*}}__sycl_kernel_free_function_nd_rangePiii",
45+
// CHECK-NEXT: "{{.*}}Kernel_Function",
46+
47+
48+
// CHECK: static constexpr const char* getName() { return "{{.*}}__sycl_kernel_free_function_singlePiii"; }
49+
// CHECK: static constexpr const char* getName() { return "{{.*}}__sycl_kernel_free_function_nd_rangePiii"; }
50+
// CHECK: static constexpr const char* getName() { return "{{.*}}Kernel_Function"; }
51+
52+
// CHECK-RTC-NOT: free_function_single_kernel
53+
// CHECK-RTC-NOT: free_function_nd_range
54+
55+
// CHECK-NORTC: void free_function_single(int *ptr, int start, int end);
56+
// CHECK-NORTC: static constexpr auto __sycl_shim[[#FIRST:]]()
57+
// CHECK-NORTC-NEXT: return (void (*)(int *, int, int))free_function_single;
58+
59+
// CHECK-NORTC: struct ext::oneapi::experimental::is_kernel<__sycl_shim[[#FIRST]]()> {
60+
// CHECK-NORTC-NEXT: static constexpr bool value = true;
61+
62+
// CHECK-NORTC: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim[[#FIRST]]()> {
63+
// CHECK-NORTC-NEXT: static constexpr bool value = true;
64+
65+
66+
// CHECK-NORTC: void free_function_nd_range(int *ptr, int start, int end);
67+
// CHECK-NORTC: static constexpr auto __sycl_shim[[#SECOND:]]() {
68+
// CHECK-NORTC-NEXT: return (void (*)(int *, int, int))free_function_nd_range;
69+
70+
// CHECK-NORTC: struct ext::oneapi::experimental::is_kernel<__sycl_shim[[#SECOND]]()> {
71+
// CHECK-NORTC-NEXT: static constexpr bool value = true;
72+
73+
// CHECK-NORTC: struct ext::oneapi::experimental::is_nd_range_kernel<__sycl_shim2(), 2> {
74+
// CHECK-NORTC-NEXT: static constexpr bool value = true;
75+
76+
// CHECK-NORTC: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim[[#FIRST]]()>() {
77+
// CHECK-NORTC-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"{{.*}}__sycl_kernel_free_function_singlePiii"});
78+
79+
// CHECK-NORTC: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim[[#SECOND]]()>() {
80+
// CHECK-NORTC-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"{{.*}}__sycl_kernel_free_function_nd_rangePiii"});
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
///
2+
/// Perform driver test for SYCL RTC mode.
3+
///
4+
5+
/// Check that the '-fsycl-rtc-mode' is correctly forwarded to the device
6+
/// compilation and only to the device compilation.
7+
8+
// RUN: %clangxx -fsycl -fsycl-rtc-mode --no-offload-new-driver %s -### 2>&1 \
9+
// RUN: | FileCheck %s
10+
11+
// RUN: %clangxx -fsycl -fsycl-rtc-mode --offload-new-driver %s -### 2>&1 \
12+
// RUN: | FileCheck %s
13+
14+
// CHECK: clang{{.*}} "-fsycl-is-device"
15+
// CHECK-SAME: -fsycl-rtc-mode
16+
// CHECK: clang{{.*}} "-fsycl-is-host"
17+
// CHECK-NOT: -fsycl-rtc-mode
18+
19+
20+
/// Check that the '-fno-sycl-rtc-mode' is correctly forwarded to the device
21+
/// compilation and only to the device compilation.
22+
23+
// RUN: %clangxx -fsycl -fno-sycl-rtc-mode --no-offload-new-driver %s -### 2>&1 \
24+
// RUN: | FileCheck %s --check-prefix=NEGATIVE
25+
26+
// RUN: %clangxx -fsycl -fno-sycl-rtc-mode --offload-new-driver %s -### 2>&1 \
27+
// RUN: | FileCheck %s --check-prefix=NEGATIVE
28+
29+
// NEGATIVE: clang{{.*}} "-fsycl-is-device"
30+
// NEGATIVE-SAME: -fno-sycl-rtc-mode
31+
// NEGATIVE: clang{{.*}} "-fsycl-is-host"
32+
// NEGATIVE-NOT: -fsycl-rtc-mode

clang/tools/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ add_clang_subdirectory(clang-diff)
77
add_clang_subdirectory(clang-format)
88
add_clang_subdirectory(clang-fuzzer)
99
add_clang_subdirectory(clang-import-test)
10-
add_clang_subdirectory(clang-nvlink-wrapper)
1110
add_clang_subdirectory(clang-linker-wrapper)
1211
add_clang_subdirectory(clang-nvlink-wrapper)
1312
add_clang_subdirectory(clang-offload-packager)

devops/cts_exclude_filter_L0_GPU

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ kernel_bundle
33
marray
44
# fix: https://github.com/KhronosGroup/SYCL-CTS/pull/964
55
accessor_legacy
6+
# CMPLRLLVM-61839
7+
multi_ptr

devops/cts_exclude_filter_OCL_CPU

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ math_builtin_api
77
hierarchical
88
# fix: https://github.com/KhronosGroup/SYCL-CTS/pull/964
99
accessor_legacy
10+
# CMPLRLLVM-61839
11+
multi_ptr

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,38 @@ void setSyclFixedTargetsMD(const std::vector<Function *> &EntryPoints,
647647
F->setMetadata("sycl_fixed_targets", MDN);
648648
}
649649

650+
void collectVirtualFunctionSetInfo(
651+
Function &F, StringMap<SmallVector<Function *, 4>> &VirtualFunctionSets) {
652+
if (!F.hasFnAttribute("indirectly-callable"))
653+
return;
654+
Attribute IndirectlyCallableAttr = F.getFnAttribute("indirectly-callable");
655+
StringRef SetName = IndirectlyCallableAttr.getValueAsString();
656+
VirtualFunctionSets[SetName].push_back(&F);
657+
}
658+
659+
// For each set S of virtual functions that F declares,
660+
// propagate S through the CG and then add the aspects
661+
// used by S to F.
662+
void processDeclaredVirtualFunctionSets(
663+
Function *F, CallGraphTy &CG, FunctionToAspectsMapTy &AspectsMap,
664+
SmallPtrSet<const Function *, 16> &Visited,
665+
StringMap<SmallVector<Function *, 4>> &VirtualFunctionSets) {
666+
if (!F->hasFnAttribute("calls-indirectly"))
667+
return;
668+
Attribute CallsIndirectlyAttr = F->getFnAttribute("calls-indirectly");
669+
SmallVector<StringRef, 4> DeclaredVirtualFunctionSetNames;
670+
CallsIndirectlyAttr.getValueAsString().split(DeclaredVirtualFunctionSetNames,
671+
",");
672+
auto &AspectsF = AspectsMap[F];
673+
for (auto Name : DeclaredVirtualFunctionSetNames) {
674+
for (auto VFn : VirtualFunctionSets[Name]) {
675+
propagateAspectsThroughCG(VFn, CG, AspectsMap, Visited);
676+
for (auto Aspect : AspectsMap[VFn])
677+
AspectsF.insert(Aspect);
678+
}
679+
}
680+
}
681+
650682
/// Returns a map of functions with corresponding used aspects.
651683
std::pair<FunctionToAspectsMapTy, FunctionToAspectsMapTy>
652684
buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
@@ -655,16 +687,21 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
655687
bool ValidateAspects, bool FP64ConvEmu) {
656688
FunctionToAspectsMapTy FunctionToUsedAspects;
657689
FunctionToAspectsMapTy FunctionToDeclaredAspects;
690+
StringMap<SmallVector<Function *, 4>> VirtualFunctionSets;
658691
CallGraphTy CG;
659692

660693
for (Function &F : M.functions()) {
661694
processFunction(F, FunctionToUsedAspects, FunctionToDeclaredAspects,
662695
TypesWithAspects, CG, AspectValues, FP64ConvEmu);
696+
collectVirtualFunctionSetInfo(F, VirtualFunctionSets);
663697
}
664698

665699
SmallPtrSet<const Function *, 16> Visited;
666-
for (Function *F : EntryPoints)
700+
for (Function *F : EntryPoints) {
667701
propagateAspectsThroughCG(F, CG, FunctionToUsedAspects, Visited);
702+
processDeclaredVirtualFunctionSets(F, CG, FunctionToUsedAspects, Visited,
703+
VirtualFunctionSets);
704+
}
668705

669706
if (ValidateAspects)
670707
validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues,

0 commit comments

Comments
 (0)