Skip to content

Commit 759a3d3

Browse files
Merge branch 'sycl' into update_l0_tag_again
2 parents 3227a78 + 611e245 commit 759a3d3

File tree

295 files changed

+3262
-1464
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

295 files changed

+3262
-1464
lines changed

.github/workflows/sycl-linux-precommit.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,42 @@ jobs:
6262
e2e_binaries_artifact: e2e_bin
6363
e2e_binaries_preview_artifact: e2e_bin_preview
6464

65+
# Build and run native cpu e2e tests separately as cannot currently
66+
# build all the e2e tests
67+
build_run_native_cpu_e2e_tests:
68+
if: ${{ always() && !cancelled() && needs.build.outputs.build_conclusion == 'success' }}
69+
runs-on: [Linux, build]
70+
needs: [build]
71+
container:
72+
image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
73+
options: -u 1001:1001
74+
steps:
75+
- uses: actions/checkout@v4
76+
with:
77+
sparse-checkout: |
78+
devops/
79+
80+
# download build artefact
81+
- name: Download toolchain
82+
uses: actions/download-artifact@v4
83+
with:
84+
name: sycl_linux_default
85+
- name: Extract SYCL toolchain
86+
shell: bash
87+
run: |
88+
mkdir toolchain
89+
tar -xf llvm_sycl.tar.zst -C toolchain
90+
rm llvm_sycl.tar.zst
91+
- name: Build and run E2E tests
92+
uses: ./devops/actions/run-tests/e2e
93+
with:
94+
ref: ${{ inputs.ref || github.sha }}
95+
testing_mode: full
96+
target_devices: native_cpu:cpu
97+
sycl_compiler: $GITHUB_WORKSPACE/toolchain/bin/clang++
98+
extra_lit_opts: --param sycl_build_targets="native_cpu"
99+
extra_cmake_args: -DSYCL_TEST_E2E_TARGETS="native_cpu:cpu" -DSYCL_TEST_E2E_STANDALONE=ON
100+
65101
# If a PR changes CUDA adapter, run the build on Ubuntu 22.04 as well.
66102
# Ubuntu 22.04 container has CUDA 12.1 installed while Ubuntu 24.0 image
67103
# has CUDA 12.6.1 installed.

.github/workflows/sycl-linux-run-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ jobs:
235235
- name: Reset Intel GPU
236236
uses: ./devops/actions/reset_gpu
237237
- name: Install drivers
238-
if: inputs.install_igc_driver == 'true' || inputs.install_dev_igc_driver == 'true'
238+
if: inputs.e2e_binaries_artifact != 'in-container' && (inputs.install_igc_driver == 'true' || inputs.install_dev_igc_driver == 'true')
239239
env:
240240
GITHUB_TOKEN: ${{ github.token }}
241241
run: |

clang/include/clang/Sema/Sema.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9183,7 +9183,13 @@ class Sema final : public SemaBase {
91839183
};
91849184

91859185
/// Compute the mangling number context for a lambda expression or
9186-
/// block literal. Also return the extra mangling decl if any.
9186+
/// block literal that appears in the specified declaration context in
9187+
/// consideration of the current expression evaluation and template
9188+
/// instantiation contexts. If the mangling context requires external linkage,
9189+
/// then a mangling number context is returned in the first tuple
9190+
/// element. If the mangling context is non-normal (specialized for
9191+
/// lambda and block types relative to other entities), the overriding
9192+
/// declaration is returned in the second tuple element.
91879193
///
91889194
/// \param DC - The DeclContext containing the lambda expression or
91899195
/// block literal.

clang/lib/AST/ASTContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13356,6 +13356,7 @@ MangleNumberingContext &
1335613356
ASTContext::getManglingNumberContext(const DeclContext *DC) {
1335713357
assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
1335813358
std::unique_ptr<MangleNumberingContext> &MCtx = MangleNumberingContexts[DC];
13359+
DC = DC->getPrimaryContext();
1335913360
if (!MCtx)
1336013361
MCtx = createMangleNumberingContext();
1336113362
return *MCtx;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10097,8 +10097,13 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1009710097
// clang-offload-wrapper
1009810098
// -o=<outputfile>.bc
1009910099
// -host=x86_64-pc-linux-gnu -kind=sycl
10100+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
1010010101
// -format=spirv <inputfile1>.spv <manifest1>(optional)
1010110102
// -format=spirv <inputfile2>.spv <manifest2>(optional)
10103+
#else
10104+
// -format=spirv <inputfile1>.spv
10105+
// -format=spirv <inputfile2>.spv
10106+
#endif
1010210107
// ...
1010310108
ArgStringList WrapperArgs;
1010410109

@@ -10163,6 +10168,12 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1016310168
WrapperArgs.push_back(
1016410169
C.getArgs().MakeArgString(Twine("-kind=") + Twine(Kind)));
1016510170

10171+
// Enable preview breaking changes in clang-offload-wrapper,
10172+
// in case it needs to introduce any ABI breaking changes.
10173+
// For example, changes in offload binary descriptor format.
10174+
if (C.getArgs().hasArg(options::OPT_fpreview_breaking_changes))
10175+
WrapperArgs.push_back("-fpreview-breaking-changes");
10176+
1016610177
assert((Inputs.size() > 0) && "no inputs for clang-offload-wrapper");
1016710178
assert(((Inputs[0].getType() != types::TY_Tempfiletable) ||
1016810179
(Inputs.size() == 1)) &&
@@ -11580,6 +11591,12 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1158011591
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
1158111592
}
1158211593

11594+
// Enable preview breaking changes in clang-linker-wrapper,
11595+
// in case it needs to introduce any ABI breaking changes.
11596+
// For example, changes in offload binary descriptor format.
11597+
if (Args.hasArg(options::OPT_fpreview_breaking_changes))
11598+
CmdArgs.push_back("-fpreview-breaking-changes");
11599+
1158311600
const char *Exec =
1158411601
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));
1158511602

clang/lib/Sema/SemaLambda.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,14 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC) {
356356
return std::make_tuple(&Context.getManglingNumberContext(DC), nullptr);
357357
}
358358

359-
return std::make_tuple(nullptr, nullptr);
359+
if (ManglingContextDecl) {
360+
// Lambdas defined in the initializer of a local variable are mangled
361+
// in the enclosing function context.
362+
if (auto *VD = dyn_cast<VarDecl>(ManglingContextDecl);
363+
VD && !VD->hasGlobalStorage())
364+
ManglingContextDecl = nullptr;
365+
}
366+
return std::make_tuple(nullptr, ManglingContextDecl);
360367
}
361368

362369
case NonInlineInModulePurview:

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6626,10 +6626,12 @@ class FreeFunctionPrinter {
66266626
raw_ostream &O;
66276627
PrintingPolicy &Policy;
66286628
bool NSInserted = false;
6629+
ASTContext &Context;
66296630

66306631
public:
6631-
FreeFunctionPrinter(raw_ostream &O, PrintingPolicy &PrintPolicy)
6632-
: O(O), Policy(PrintPolicy) {}
6632+
FreeFunctionPrinter(raw_ostream &O, PrintingPolicy &PrintPolicy,
6633+
ASTContext &Context)
6634+
: O(O), Policy(PrintPolicy), Context(Context) {}
66336635

66346636
/// Emits the function declaration of template free function.
66356637
/// \param FTD The function declaration to print.
@@ -6826,18 +6828,42 @@ class FreeFunctionPrinter {
68266828
CTN.getAsTemplateDecl()->printQualifiedName(ParmListOstream);
68276829
ParmListOstream << "<";
68286830

6829-
auto SpecArgs = TST->template_arguments();
6830-
auto DeclArgs = CTST->template_arguments();
6831+
ArrayRef<TemplateArgument> SpecArgs = TST->template_arguments();
6832+
ArrayRef<TemplateArgument> DeclArgs = CTST->template_arguments();
6833+
6834+
auto TemplateArgPrinter = [&](const TemplateArgument &Arg) {
6835+
if (Arg.getKind() != TemplateArgument::ArgKind::Expression ||
6836+
Arg.isInstantiationDependent()) {
6837+
Arg.print(Policy, ParmListOstream, /* IncludeType = */ false);
6838+
return;
6839+
}
6840+
6841+
Expr *E = Arg.getAsExpr();
6842+
assert(E && "Failed to get an Expr for an Expression template arg?");
6843+
if (E->getType().getTypePtr()->isScopedEnumeralType()) {
6844+
// Scoped enumerations can't be implicitly cast from integers, so
6845+
// we don't need to evaluate them.
6846+
Arg.print(Policy, ParmListOstream, /* IncludeType = */ false);
6847+
return;
6848+
}
6849+
6850+
Expr::EvalResult Res;
6851+
[[maybe_unused]] bool Success =
6852+
Arg.getAsExpr()->EvaluateAsConstantExpr(Res, Context);
6853+
assert(Success && "invalid non-type template argument?");
6854+
assert(!Res.Val.isAbsent() && "couldn't read the evaulation result?");
6855+
Res.Val.printPretty(ParmListOstream, Policy, Arg.getAsExpr()->getType(),
6856+
&Context);
6857+
};
68316858

68326859
for (size_t I = 0, E = std::max(DeclArgs.size(), SpecArgs.size()),
68336860
SE = SpecArgs.size();
68346861
I < E; ++I) {
68356862
if (I != 0)
68366863
ParmListOstream << ", ";
6837-
if (I < SE) // A specialized argument exists, use it
6838-
SpecArgs[I].print(Policy, ParmListOstream, false /* IncludeType */);
6839-
else // Print a canonical form of a default argument
6840-
DeclArgs[I].print(Policy, ParmListOstream, false /* IncludeType */);
6864+
// If we have a specialized argument, use it. Otherwise fallback to a
6865+
// default argument.
6866+
TemplateArgPrinter(I < SE ? SpecArgs[I] : DeclArgs[I]);
68416867
}
68426868

68436869
ParmListOstream << ">";
@@ -7236,7 +7262,7 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
72367262
// template arguments that match default template arguments while printing
72377263
// template-ids, even if the source code doesn't reference them.
72387264
Policy.EnforceDefaultTemplateArgs = true;
7239-
FreeFunctionPrinter FFPrinter(O, Policy);
7265+
FreeFunctionPrinter FFPrinter(O, Policy, S.getASTContext());
72407266
if (FTD) {
72417267
FFPrinter.printFreeFunctionDeclaration(FTD);
72427268
if (const auto kind = K.SyclKernel->getTemplateSpecializationKind();
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// This test checks that lambdas assigned to variables (including inline and
2+
// templated cases) in the same namespace are uniquely mangled and callable via
3+
// template functions. It ensures that the compiler generates distinct symbols
4+
// for each lambda and resolves them correctly in function calls.
5+
6+
// RUN: %clang_cc1 -O0 -triple x86_64-unknown-unknown \
7+
// RUN: -emit-llvm %s -o - | FileCheck %s
8+
9+
// RUN: %clang_cc1 -O0 -triple x86_64-pc-windows-msvc \
10+
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
11+
12+
namespace QL {
13+
auto dg1 = [] { return 1; };
14+
inline auto dg_inline1 = [] { return 1; };
15+
}
16+
17+
namespace QL {
18+
auto dg2 = [] { return 2; };
19+
template<int N>
20+
auto dg_template = [] { return N; };
21+
}
22+
23+
using namespace QL;
24+
template<typename T>
25+
void f(T t) {
26+
t();
27+
}
28+
29+
void g() {
30+
f(dg1);
31+
f(dg2);
32+
f(dg_inline1);
33+
f(dg_template<3>);
34+
}
35+
36+
// CHECK: @_ZN2QL3dg1E = internal global %class.anon undef, align 1
37+
// CHECK: @_ZN2QL3dg2E = internal global %class.anon.0 undef, align 1
38+
// CHECK: @_ZN2QL10dg_inline1E = linkonce_odr global %class.anon.2 undef, comdat, align 1
39+
// CHECK: @_ZN2QL11dg_templateILi3EEE = linkonce_odr global %class.anon.4 undef, comdat, align 1
40+
41+
// MSVC: @"?dg1@QL@@3V<lambda_0>@1@A" = internal global %class.anon undef, align 1
42+
// MSVC: @"?dg2@QL@@3V<lambda_1>@1@A" = internal global %class.anon.0 undef, align 1
43+
// MSVC: @"?dg_inline1@QL@@3V<lambda_1>@01@A" = linkonce_odr dso_local global %class.anon.2 undef, comdat, align 1
44+
// MSVC: @"??$dg_template@$02@QL@@3V<lambda_1>@01@A" = linkonce_odr dso_local global %class.anon.4 undef, comdat, align 1
45+
46+
47+
// CHECK: define internal void @"_Z1fIN2QL3$_0EEvT_"
48+
// CHECK: call noundef i32 @"_ZNK2QL3$_0clEv"
49+
// CHECK: define internal void @"_Z1fIN2QL3$_1EEvT_"
50+
// CHECK: define linkonce_odr void @_Z1fIN2QL10dg_inline1MUlvE_EEvT_
51+
// CHECK: call noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv
52+
// CHECK: define linkonce_odr void @_Z1fIN2QL11dg_templateILi3EEMUlvE_EEvT_
53+
// CHECK: call noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv
54+
// CHECK: define internal noundef i32 @"_ZNK2QL3$_0clEv"
55+
// CHECK: define internal noundef i32 @"_ZNK2QL3$_1clEv"
56+
// CHECK: define linkonce_odr noundef i32 @_ZNK2QL10dg_inline1MUlvE_clEv
57+
// CHECK: define linkonce_odr noundef i32 @_ZNK2QL11dg_templateILi3EEMUlvE_clEv
58+
59+
// MSVC: define linkonce_odr dso_local void @"??$f@V<lambda_1>@dg_inline1@QL@@@@YAXV<lambda_1>@dg_inline1@QL@@@Z"
60+
// MSVC: call noundef i32 @"??R<lambda_1>@dg_inline1@QL@@QEBA?A?<auto>@@XZ"
61+
// MSVC: define linkonce_odr dso_local void @"??$f@V<lambda_1>@?$dg_template@$02@QL@@@@YAXV<lambda_1>@?$dg_template@$02@QL@@@Z"
62+
// MSVC: call noundef i32 @"??R<lambda_1>@?$dg_template@$02@QL@@QEBA?A?<auto>@@XZ"
63+
// MSVC: define internal noundef i32 @"??R<lambda_0>@QL@@QEBA?A?<auto>@@XZ"
64+
// MSVC: define internal noundef i32 @"??R<lambda_1>@QL@@QEBA?A?<auto>@@XZ"
65+
// MSVC: define linkonce_odr dso_local noundef i32 @"??R<lambda_1>@dg_inline1@QL@@QEBA?A?<auto>@@XZ"
66+
// MSVC: define linkonce_odr dso_local noundef i32 @"??R<lambda_1>@?$dg_template@$02@QL@@QEBA?A?<auto>@@XZ"

clang/test/CodeGenCUDA/anon-ns.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626

2727
// HIP-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
2828
// HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
29-
// HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
29+
// HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_16lambdaMUlvE_EEvT_.intern\.b04fd23c98500190]](
3030
// HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
3131
// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized constant
3232
// HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
3333

3434
// CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
3535
// CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
36-
// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
36+
// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_16lambdaMUlvE_EEvT___intern__b04fd23c98500190]](
3737
// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized constant
3838
// CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
3939

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fsycl-int-header=%t.h %s
2+
// RUN: FileCheck -input-file=%t.h %s
3+
//
4+
// The purpose of this test is to ensure that forward declarations of free
5+
// function kernels are emitted properly.
6+
// However, this test checks a specific scenario:
7+
// - free function argument is a template which accepts constant expressions as
8+
// arguments
9+
10+
constexpr int A = 2;
11+
constexpr int B = 3;
12+
13+
namespace ns {
14+
15+
constexpr int C = 4;
16+
17+
struct Foo {
18+
static constexpr int D = 5;
19+
};
20+
21+
enum non_class_enum {
22+
VAL_A,
23+
VAL_B
24+
};
25+
26+
enum class class_enum {
27+
VAL_A,
28+
VAL_B
29+
};
30+
31+
enum non_class_enum_typed : int {
32+
VAL_C,
33+
VAL_D
34+
};
35+
36+
enum class class_enum_typed : int {
37+
VAL_C,
38+
VAL_D
39+
};
40+
41+
constexpr int bar(int arg) {
42+
return arg + 42;
43+
}
44+
45+
} // namespace ns
46+
47+
template<int V>
48+
struct Arg {};
49+
50+
template<ns::non_class_enum V>
51+
struct Arg2 {};
52+
53+
template<ns::non_class_enum_typed V>
54+
struct Arg3 {};
55+
56+
template<ns::class_enum V>
57+
struct Arg4 {};
58+
59+
template<ns::class_enum_typed V>
60+
struct Arg5 {};
61+
62+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
63+
void constant(Arg<1>) {}
64+
65+
// CHECK: void constant(Arg<1> );
66+
67+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
68+
void constexpr_v(Arg<A>) {}
69+
70+
// CHECK: void constexpr_v(Arg<2> );
71+
72+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
73+
void constexpr_expr(Arg<A * B>) {}
74+
75+
// CHECK: void constexpr_expr(Arg<6> );
76+
77+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
78+
void constexpr_ns(Arg<ns::C>) {}
79+
80+
// CHECK: void constexpr_ns(Arg<4> );
81+
82+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
83+
void constexpr_ns2(Arg<ns::Foo::D>) {}
84+
85+
// CHECK: void constexpr_ns2(Arg<5> );
86+
87+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
88+
void constexpr_ns2(Arg2<ns::non_class_enum::VAL_A>) {}
89+
90+
// CHECK: void constexpr_ns2(Arg2<ns::VAL_A> );
91+
92+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
93+
void constexpr_ns2(Arg3<ns::non_class_enum_typed::VAL_C>) {}
94+
95+
// CHECK: void constexpr_ns2(Arg3<ns::VAL_C> );
96+
97+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
98+
void constexpr_ns2(Arg4<ns::class_enum::VAL_A>) {}
99+
100+
// CHECK: void constexpr_ns2(Arg4<ns::class_enum::VAL_A> );
101+
102+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
103+
void constexpr_ns2(Arg5<ns::class_enum_typed::VAL_C>) {}
104+
105+
// CHECK: void constexpr_ns2(Arg5<ns::class_enum_typed::VAL_C> );
106+
107+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
108+
void constexpr_call(Arg<ns::bar(B)>) {}
109+
110+
// CHECK: void constexpr_call(Arg<45> );

0 commit comments

Comments
 (0)