Skip to content

Commit d41fee7

Browse files
committed
Merge branch 'free_function_kernel_scratch_memory' of https://github.com/lbushi25/llvm into free_function_kernel_scratch_memory
2 parents dc1e3d0 + bd359c6 commit d41fee7

File tree

4 files changed

+135
-66
lines changed

4 files changed

+135
-66
lines changed

llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ sycl::getKernelNamesUsingImplicitLocalMem(const Module &M) {
7171
return -1;
7272
};
7373
llvm::for_each(M.functions(), [&](const Function &F) {
74-
if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
74+
if (F.getCallingConv() == CallingConv::SPIR_KERNEL &&
75+
F.hasFnAttribute(WORK_GROUP_STATIC_ATTR)) {
7576
int ArgPos = GetArgumentPos(F);
76-
if (ArgPos >= 0 || F.hasFnAttribute(WORK_GROUP_STATIC_ATTR))
77-
SPIRKernelNames.emplace_back(F.getName(), ArgPos);
77+
SPIRKernelNames.emplace_back(F.getName(), ArgPos);
7878
}
7979
});
8080
}

llvm/test/SYCLLowerIR/work_group_static.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,29 @@ entry:
2222
ret void
2323
}
2424

25+
; Function Attrs: convergent norecurse
26+
; CHECK: @__sycl_kernel_B{{.*}} #[[ATTRS:[0-9]+]]
27+
define weak_odr dso_local spir_kernel void @__sycl_kernel_B(ptr addrspace(1) %0) local_unnamed_addr #1 !kernel_arg_addr_space !5 {
28+
entry:
29+
%1 = tail call spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64 128) #1
30+
ret void
31+
}
32+
33+
; Function Attrs: convergent norecurse
34+
; CHECK: @__sycl_kernel_C{{.*}} #[[ATTRS]]
35+
define weak_odr dso_local spir_kernel void @__sycl_kernel_C(ptr addrspace(1) %0) local_unnamed_addr #1 !kernel_arg_addr_space !5 {
36+
entry:
37+
%1 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 128, i64 4) #1
38+
ret void
39+
}
40+
41+
; Function Attrs: convergent
42+
declare dso_local spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64, i64) local_unnamed_addr #1
43+
2544
; Function Attrs: convergent
2645
declare dso_local spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64) local_unnamed_addr #1
2746

47+
; CHECK: #[[ATTRS]] = {{.*}} "sycl-work-group-static"
2848
attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" "sycl-work-group-static"="1" }
2949
attributes #1 = { convergent norecurse }
3050

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// UNSUPPORTED: target-amd
3+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
4+
5+
// RUN: %{build} -o %t.out
6+
// RUN: %{run} %t.out
7+
8+
// This test verifies that we can compile, run and get correct results when
9+
// using a free function kernel that allocates shared local memory in a kernel
10+
// either by way of the work group scratch memory extension or the work group
11+
// static memory extension.
12+
13+
#include <sycl/ext/oneapi/work_group_static.hpp>
14+
15+
#include "helpers.hpp"
16+
#include <cassert>
17+
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
18+
#include <sycl/ext/oneapi/free_function_queries.hpp>
19+
#include <sycl/group_barrier.hpp>
20+
#include <sycl/usm.hpp>
21+
22+
namespace syclext = sycl::ext::oneapi;
23+
namespace syclexp = sycl::ext::oneapi::experimental;
24+
25+
constexpr int SIZE = 16;
26+
27+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
28+
void scratch_kernel(float *src, float *dst) {
29+
size_t lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
30+
float *local_mem = (float *)syclexp::get_work_group_scratch_memory();
31+
local_mem[lid] = 2 * src[lid];
32+
dst[lid] = local_mem[lid];
33+
}
34+
35+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
36+
void static_kernel(float *src, float *dst) {
37+
sycl::nd_item<1> item = syclext::this_work_item::get_nd_item<1>();
38+
size_t lid = item.get_local_linear_id();
39+
syclexp::work_group_static<float[SIZE]> local_mem;
40+
local_mem[lid] = src[lid] * src[lid];
41+
sycl::group_barrier(item.get_group());
42+
if (item.get_group().leader()) { // Check that memory is indeed shared between
43+
// the work group
44+
for (int i = 0; i < SIZE; ++i)
45+
assert(local_mem[i] == src[i] * src[i]);
46+
}
47+
dst[lid] = local_mem[lid];
48+
}
49+
50+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
51+
void scratch_static_kernel(float *src, float *dst) {
52+
size_t lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
53+
float *scratch_mem = (float *)syclexp::get_work_group_scratch_memory();
54+
syclexp::work_group_static<float[SIZE]> static_mem;
55+
scratch_mem[lid] = src[lid];
56+
static_mem[lid] = src[lid];
57+
dst[lid] = scratch_mem[lid] + static_mem[lid];
58+
}
59+
60+
int main() {
61+
sycl::queue q;
62+
float *src = sycl::malloc_shared<float>(SIZE, q);
63+
float *dst = sycl::malloc_shared<float>(SIZE, q);
64+
65+
for (int i = 0; i < SIZE; i++) {
66+
src[i] = i;
67+
}
68+
69+
auto scratchbndl = syclexp::get_kernel_bundle<scratch_kernel,
70+
sycl::bundle_state::executable>(
71+
q.get_context());
72+
auto staticbndl =
73+
syclexp::get_kernel_bundle<static_kernel, sycl::bundle_state::executable>(
74+
q.get_context());
75+
auto scratchstaticbndl = syclexp::get_kernel_bundle<
76+
scratch_static_kernel, sycl::bundle_state::executable>(q.get_context());
77+
78+
sycl::kernel ScratchKernel =
79+
scratchbndl.template ext_oneapi_get_kernel<scratch_kernel>();
80+
sycl::kernel StaticKernel =
81+
staticbndl.template ext_oneapi_get_kernel<static_kernel>();
82+
sycl::kernel ScratchStaticKernel =
83+
scratchstaticbndl.template ext_oneapi_get_kernel<scratch_static_kernel>();
84+
syclexp::launch_config ScratchKernelcfg{
85+
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
86+
syclexp::properties{
87+
syclexp::work_group_scratch_size(SIZE * sizeof(float))}};
88+
syclexp::launch_config StaticKernelcfg{
89+
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE))};
90+
91+
syclexp::nd_launch(q, ScratchKernelcfg, ScratchKernel, src, dst);
92+
q.wait();
93+
for (int i = 0; i < SIZE; i++) {
94+
assert(dst[i] == 2 * src[i]);
95+
}
96+
97+
syclexp::nd_launch(q, StaticKernelcfg, StaticKernel, src, dst);
98+
q.wait();
99+
for (int i = 0; i < SIZE; i++) {
100+
assert(dst[i] == src[i] * src[i]);
101+
}
102+
103+
syclexp::nd_launch(q, ScratchKernelcfg, ScratchStaticKernel, src, dst);
104+
q.wait();
105+
for (int i = 0; i < SIZE; i++) {
106+
assert(dst[i] == 2 * src[i]);
107+
}
108+
109+
sycl::free(src, q);
110+
sycl::free(dst, q);
111+
return 0;
112+
}

sycl/test-e2e/FreeFunctionKernels/work_group_scratch_memory.cpp

Lines changed: 0 additions & 63 deletions
This file was deleted.

0 commit comments

Comments
 (0)