Skip to content

Commit f001374

Browse files
committed
Merge branch 'sycl' into review/yang/improve_statistic_test
2 parents 137ae80 + a1355e8 commit f001374

File tree

29 files changed

+373
-70
lines changed

29 files changed

+373
-70
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,6 +3020,21 @@ namespace {
30203020
};
30213021
}
30223022

3023+
static bool hasSYCLRestrictPropertyIRAttr(const VarDecl *Arg,
3024+
const ASTContext &Context) {
3025+
auto *IRAttr = Arg->getAttr<SYCLAddIRAttributesKernelParameterAttr>();
3026+
if (!IRAttr)
3027+
return false;
3028+
3029+
SmallVector<std::pair<std::string, std::string>, 4> NameValuePairs =
3030+
IRAttr->getAttributeNameValuePairs(Context);
3031+
return std::any_of(
3032+
NameValuePairs.begin(), NameValuePairs.end(),
3033+
[](const std::pair<std::string, std::string> &NameValuePair) {
3034+
return NameValuePair.first == "sycl-restrict";
3035+
});
3036+
}
3037+
30233038
void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
30243039
llvm::Function *Fn,
30253040
const FunctionArgList &Args) {
@@ -3244,9 +3259,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
32443259

32453260
// Set 'noalias' if an argument type has the `restrict` qualifier.
32463261
if (Arg->getType().isRestrictQualified() ||
3247-
(CurCodeDecl &&
3248-
CurCodeDecl->hasAttr<SYCLIntelKernelArgsRestrictAttr>() &&
3249-
Arg->getType()->isPointerType()) ||
3262+
(Arg->getType()->isPointerType() &&
3263+
((CurCodeDecl &&
3264+
CurCodeDecl->hasAttr<SYCLIntelKernelArgsRestrictAttr>()) ||
3265+
hasSYCLRestrictPropertyIRAttr(Arg, getContext()))) ||
32503266
(Arg->hasAttr<RestrictAttr>() && Arg->getType()->isPointerType()))
32513267
AI->addAttr(llvm::Attribute::NoAlias);
32523268
}

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,8 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
14081408
.Cases("intel_gpu_arl_h", "intel_gpu_12_74_4", "arl_h")
14091409
.Cases("intel_gpu_bmg_g21", "intel_gpu_20_1_4", "bmg_g21")
14101410
.Cases("intel_gpu_lnl_m", "intel_gpu_20_4_4", "lnl_m")
1411+
.Cases("intel_gpu_ptl_h", "intel_gpu_30_0_4", "ptl_h")
1412+
.Cases("intel_gpu_ptl_u", "intel_gpu_30_1_1", "ptl_u")
14111413
.Case("nvidia_gpu_sm_50", "sm_50")
14121414
.Case("nvidia_gpu_sm_52", "sm_52")
14131415
.Case("nvidia_gpu_sm_53", "sm_53")
@@ -1498,6 +1500,8 @@ SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
14981500
.Case("arl_h", "INTEL_GPU_ARL_H")
14991501
.Case("bmg_g21", "INTEL_GPU_BMG_G21")
15001502
.Case("lnl_m", "INTEL_GPU_LNL_M")
1503+
.Case("ptl_h", "INTEL_GPU_PTL_H")
1504+
.Case("ptl_u", "INTEL_GPU_PTL_U")
15011505
.Case("sm_50", "NVIDIA_GPU_SM_50")
15021506
.Case("sm_52", "NVIDIA_GPU_SM_52")
15031507
.Case("sm_53", "NVIDIA_GPU_SM_53")
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// RUN: %clang_cc1 -fsycl-is-device %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s
2+
3+
struct __attribute__((sycl_special_class))
4+
[[__sycl_detail__::sycl_type(annotated_arg)]]
5+
AnnotatedIntPtr {
6+
void __init([[__sycl_detail__::add_ir_attributes_kernel_parameter(
7+
"sycl-restrict", nullptr)]]
8+
__attribute__((opencl_global)) int* InPtr) {
9+
Ptr = InPtr;
10+
}
11+
12+
int &operator[](unsigned I) const { return Ptr[I]; }
13+
14+
__attribute__((opencl_global)) int *Ptr;
15+
};
16+
17+
template <typename name, typename Func>
18+
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
19+
kernelFunc();
20+
}
21+
22+
int main() {
23+
{
24+
int *a;
25+
int *b;
26+
int *c;
27+
kernel<class kernel_norestrict>([a, b, c]() { c[0] = a[0] + b[0]; });
28+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_norestrict(ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}})
29+
}
30+
{
31+
AnnotatedIntPtr a;
32+
int *b;
33+
int *c;
34+
kernel<class kernel_restrict1>([a, b, c]() { c[0] = a[0] + b[0]; });
35+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict1(ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}})
36+
}
37+
{
38+
int *a;
39+
AnnotatedIntPtr b;
40+
int *c;
41+
kernel<class kernel_restrict2>([a, b, c]() { c[0] = a[0] + b[0]; });
42+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict2(ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}})
43+
}
44+
{
45+
int *a;
46+
int *b;
47+
AnnotatedIntPtr c;
48+
kernel<class kernel_restrict3>([a, b, c]() { c[0] = a[0] + b[0]; });
49+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict3(ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}})
50+
}
51+
{
52+
AnnotatedIntPtr a;
53+
AnnotatedIntPtr b;
54+
int *c;
55+
kernel<class kernel_restrict4>([a, b, c]() { c[0] = a[0] + b[0]; });
56+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict4(ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}})
57+
}
58+
{
59+
AnnotatedIntPtr a;
60+
int *b;
61+
AnnotatedIntPtr c;
62+
kernel<class kernel_restrict5>([a, b, c]() { c[0] = a[0] + b[0]; });
63+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict5(ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}})
64+
}
65+
{
66+
int *a;
67+
AnnotatedIntPtr b;
68+
AnnotatedIntPtr c;
69+
kernel<class kernel_restrict6>([a, b, c]() { c[0] = a[0] + b[0]; });
70+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict6(ptr addrspace(1) noundef align 4 %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}})
71+
}
72+
{
73+
AnnotatedIntPtr a;
74+
AnnotatedIntPtr b;
75+
AnnotatedIntPtr c;
76+
kernel<class kernel_restrict7>([a, b, c]() { c[0] = a[0] + b[0]; });
77+
// CHECK-DAG: define {{.*}}spir_kernel {{.*}}kernel_restrict7(ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}}, ptr addrspace(1) noalias noundef align 4 "sycl-restrict" %{{.*}})
78+
}
79+
}

clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,14 @@
152152
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=lnl_m -DMAC_STR=LNL_M
153153
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_20_4_4 -### %s 2>&1 | \
154154
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=lnl_m -DMAC_STR=LNL_M
155+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_ptl_h -### %s 2>&1 | \
156+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ptl_h -DMAC_STR=PTL_H
157+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_30_0_4 -### %s 2>&1 | \
158+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ptl_h -DMAC_STR=PTL_H
159+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_ptl_u -### %s 2>&1 | \
160+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ptl_u -DMAC_STR=PTL_U
161+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_30_1_1 -### %s 2>&1 | \
162+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ptl_u -DMAC_STR=PTL_U
155163
// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
156164
// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
157165
// MACRO: clang{{.*}} "-fsycl-is-host"

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,14 @@ class IntelTargetInfo<string Name, list<Aspect> Aspects, list<int> subGroupSizes
188188
: TargetInfo<Name, IntelBaseAspects # Aspects, subGroupSizesList>;
189189
// Note: only the "canonical" target names are listed here - see
190190
// SYCL::gen::resolveGenDevice().
191-
def : IntelTargetInfo<"intel_gpu_bmg_g21", Fp16Fp64Atomic64, Sg16_32>;
191+
//
192+
// TODO: instructions on how to get "???" values below are required. Currently
193+
// device architectures below which have "???" in it are not fully supported.
194+
//
195+
// def : IntelTargetInfo<"intel_gpu_ptl_u", ???, ???>;
196+
// def : IntelTargetInfo<"intel_gpu_ptl_h", ???, ???>;
192197
def : IntelTargetInfo<"intel_gpu_lnl_m", Fp16Fp64Atomic64, Sg16_32>;
198+
def : IntelTargetInfo<"intel_gpu_bmg_g21", Fp16Fp64Atomic64, Sg16_32>;
193199
def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>;
194200
def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>;
195201
def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>;

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
116116
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
117117
endfunction()
118118

119-
set(UNIFIED_RUNTIME_REPO "https://github.com/zhaomaosu/unified-runtime.git")
119+
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120120
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)
121121

122122
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# commit eb076da108a49ef1426f38690547a71905f58015
2-
# Merge: d8d8ee90 46832dfd
3-
# Author: Callum Fare <callum@codeplay.com>
4-
# Date: Fri Nov 29 15:54:31 2024 +0000
5-
# Merge pull request #2396 from kswiecicki/init-results-fix
6-
# [L0] Add nullopt check before init results access
7-
set(UNIFIED_RUNTIME_TAG do-alloc-use-pool)
1+
# commit e1cc9b2cfce7f329f0f411b5b84837b511b86a2d
2+
# Merge: 5a1a81b6 b209eba8
3+
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
4+
# Date: Tue Dec 10 14:29:41 2024 +0000
5+
# Merge pull request #2394 from zhaomaosu/do-alloc-use-pool
6+
# [DevASAN] Do allocation with USM pool to reduce memory overhead
7+
set(UNIFIED_RUNTIME_TAG e1cc9b2cfce7f329f0f411b5b84837b511b86a2d)

sycl/doc/UsersManual.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ and not recommended to use in production environment.
4747
Special target values specific to Intel, NVIDIA and AMD Processor Graphics
4848
support are accepted, providing a streamlined interface for AOT. Only one of
4949
these values at a time is supported.
50+
* intel_gpu_ptl_u, intel_gpu_30_1_1 - Panther Lake U Intel graphics architecture
51+
* intel_gpu_ptl_h, intel_gpu_30_0_4 - Panther Lake H Intel graphics architecture
5052
* intel_gpu_lnl_m, intel_gpu_20_4_4 - Lunar Lake Intel graphics architecture
5153
* intel_gpu_bmg_g21, intel_gpu_20_1_4 - Battlemage G21 Intel graphics architecture
5254
* intel_gpu_arl_h, intel_gpu_12_74_4 - Arrow Lake H Intel graphics architecture

sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,22 @@ intel_gpu_lnl_m
394394
|-
395395
|Lunar Lake Intel graphics architecture.
396396

397+
a|
398+
[source]
399+
----
400+
intel_gpu_ptl_h
401+
----
402+
|-
403+
|Panther Lake H Intel graphics architecture.
404+
405+
a|
406+
[source]
407+
----
408+
intel_gpu_ptl_u
409+
----
410+
|-
411+
|Panther Lake U Intel graphics architecture.
412+
397413
a|
398414
[source]
399415
----
@@ -424,6 +440,8 @@ intel_gpu_12_71_4 = intel_gpu_mtl_h
424440
intel_gpu_12_74_4 = intel_gpu_arl_h
425441
intel_gpu_20_1_4 = intel_gpu_bmg_g21
426442
intel_gpu_20_4_4 = intel_gpu_lnl_m
443+
intel_gpu_30_0_4 = intel_gpu_ptl_h
444+
intel_gpu_30_1_1 = intel_gpu_ptl_u
427445
----
428446
|-
429447
|Aliases for Intel graphics architectures.

sycl/include/sycl/ext/oneapi/experimental/common_annotated_properties/properties.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ struct propagateToPtrAnnotation<property_value<PropKeyT, PropValuesTs...>>
6666
//===----------------------------------------------------------------------===//
6767
// Common properties of annotated_arg/annotated_ptr
6868
//===----------------------------------------------------------------------===//
69+
struct restrict_key
70+
: detail::compile_time_property_key<detail::PropKind::Restrict> {
71+
using value_t = property_value<restrict_key>;
72+
};
73+
74+
inline constexpr restrict_key::value_t restrict;
75+
6976
struct alignment_key
7077
: detail::compile_time_property_key<detail::PropKind::Alignment> {
7178
template <int K>
@@ -74,10 +81,18 @@ struct alignment_key
7481

7582
template <int K> inline constexpr alignment_key::value_t<K> alignment;
7683

84+
template <typename T>
85+
struct is_valid_property<T, restrict_key::value_t>
86+
: std::bool_constant<std::is_pointer<T>::value> {};
87+
7788
template <typename T, int W>
7889
struct is_valid_property<T, alignment_key::value_t<W>>
7990
: std::bool_constant<std::is_pointer<T>::value> {};
8091

92+
template <typename T, typename PropertyListT>
93+
struct is_property_key_of<restrict_key, annotated_ptr<T, PropertyListT>>
94+
: std::true_type {};
95+
8196
template <typename T, typename PropertyListT>
8297
struct is_property_key_of<alignment_key, annotated_ptr<T, PropertyListT>>
8398
: std::true_type {};
@@ -86,6 +101,10 @@ template <typename T, typename PropertyListT>
86101
struct is_property_key_of<alignment_key, annotated_arg<T, PropertyListT>>
87102
: std::true_type {};
88103

104+
template <typename T, typename PropertyListT>
105+
struct is_property_key_of<restrict_key, annotated_arg<T, PropertyListT>>
106+
: std::true_type {};
107+
89108
template <> struct propagateToPtrAnnotation<alignment_key> : std::true_type {};
90109

91110
namespace detail {
@@ -94,6 +113,11 @@ template <int N> struct PropertyMetaInfo<alignment_key::value_t<N>> {
94113
static constexpr int value = N;
95114
};
96115

116+
template <> struct PropertyMetaInfo<restrict_key::value_t> {
117+
static constexpr const char *name = "sycl-restrict";
118+
static constexpr std::nullptr_t value = nullptr;
119+
};
120+
97121
} // namespace detail
98122

99123
} // namespace experimental

0 commit comments

Comments
 (0)