Skip to content

Commit 430bd9c

Browse files
author
Victor Lomuller
committed
address last feedbacks
1 parent c163fb1 commit 430bd9c

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

sycl/source/detail/jit_compiler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
10211021
NDRDesc, nullptr, nullptr, std::move(KernelBundleImplPtr),
10221022
std::move(CGData), std::move(FusedArgs), FusedOrCachedKernelName, {}, {},
10231023
CGType::Kernel, KernelCacheConfig, false /* KernelIsCooperative */,
1024-
false /* KernelUsesClusterLaunch*/, 0));
1024+
false /* KernelUsesClusterLaunch*/, 0 /* KernelWorkGroupMemorySize */));
10251025
return FusedCG;
10261026
}
10271027

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
//
4+
5+
// UNSUPPORTED: gpu-intel-gen12
6+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
7+
8+
#include <sycl/detail/core.hpp>
9+
#include <sycl/ext/oneapi/work_group_scratch_memory.hpp>
10+
#include <sycl/usm.hpp>
11+
12+
constexpr size_t Size = 1024;
13+
using DataType = int;
14+
15+
namespace sycl_ext = sycl::ext::oneapi::experimental;
16+
17+
int main() {
18+
sycl::queue queue;
19+
DataType *a = sycl::malloc_device<DataType>(Size, queue);
20+
DataType *b = sycl::malloc_device<DataType>(Size, queue);
21+
std::vector<DataType> a_host(Size, 1.0);
22+
std::vector<DataType> b_host(Size, -5.0);
23+
24+
queue.copy(a_host.data(), a, Size).wait_and_throw();
25+
26+
queue
27+
.submit([&](sycl::handler &cgh) {
28+
cgh.parallel_for(sycl::nd_range<1>({Size}, {Size}),
29+
sycl_ext::properties{sycl_ext::work_group_scratch_size(
30+
Size * sizeof(DataType))},
31+
[=](sycl::nd_item<1> it) {
32+
b[it.get_local_linear_id()] =
33+
a[it.get_local_linear_id()];
34+
})
35+
.wait_and_throw();
36+
37+
queue.copy(b, b_host.data(), Size).wait_and_throw();
38+
for (size_t i = 0; i < b_host.size(); i++) {
39+
assert(b_host[i] == a_host[i]);
40+
}
41+
sycl::free(a, queue);
42+
sycl::free(b, queue);
43+
}

0 commit comments

Comments
 (0)