Skip to content

Commit 9cb95fc

Browse files
committed
add 1d test
Signed-off-by: dchigarev <[email protected]>
1 parent d4e5039 commit 9cb95fc

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: gc-opt %s --allocs-to-slm | FileCheck %s
2+
3+
// Computex thread offset for SLM: (Xthread_idx * Yblock_sz * Zblock_sz + Ythread_idx * Zblock_sz + Zthread_idx) * Xchunk_size
4+
// CHECK: #map = affine_map<(d0, d1, d2) -> ((d0 * 12 + d1 * 4 + d2) * 256)>
5+
6+
func.func @entry() {
7+
%c1 = arith.constant 1 : index
8+
%c2 = arith.constant 2 : index
9+
%c3 = arith.constant 3 : index
10+
%c4 = arith.constant 4 : index
11+
12+
// Memory space wasn't assigned as it's allocated outside of gpu.launch block
13+
// CHECK: %[[NEW_MEMREF_0:.*]] = memref.alloc() : memref<256xf16>
14+
%0 = memref.alloc() : memref<256xf16>
15+
// Capture thread-id variables
16+
// CHECK: gpu.launch blocks(%[[ARG0:.+]], %[[ARG1:.+]], %[[ARG2:.+]]) in (%[[ARG6:.+]] = %c2, %[[ARG7:.+]] = %c2, %[[ARG8:.+]] = %c1) threads
17+
// CHECK-SAME: (%[[THREAD_X:.+]], %[[THREAD_Y:.+]], %[[THREAD_Z:.+]]) in
18+
// CHECK-SAME: (%[[ARG9:.+]] = %c2, %[[ARG10:.+]] = %c3, %[[ARG11:.+]] = %c4) {
19+
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c2, %sz_by = %c2, %sz_bz = %c1)
20+
threads(%tx, %ty, %tz) in (%sz_tx = %c2, %sz_ty = %c3, %sz_tz = %c4) {
21+
// Memory space was changed as it's explicitly specifided
22+
// CHECK: %[[NEW_MEMREF_1:.*]] = memref.alloc() : memref<256xf16, 1>
23+
%1 = memref.alloc() : memref<256xf16, 1>
24+
// Added 'shared' memory space and allocated SLM for each thread (2 * 3 * 4 = 24; 24 * 256 = 6144)
25+
// CHECK: %[[NEW_MEMREF_2:.*]] = memref.alloc() : memref<6144xf16, 3>
26+
// CHECK: %[[OFF_X:.*]] = affine.apply #map(%[[THREAD_X]], %[[THREAD_Y]], %[[THREAD_Z]])
27+
// CHECK: %[[NEW_MEMREF_3:.*]] = memref.subview %[[NEW_MEMREF_2]][%[[OFF_X]]] [256] [1]
28+
// CHECK-SAME: memref<6144xf16, 3> to memref<256xf16, strided<[1], offset: ?>, 3>
29+
%2 = memref.alloc() : memref<256xf16>
30+
31+
// CHECK: linalg.add ins(%[[NEW_MEMREF_1]], %[[NEW_MEMREF_3]] :
32+
// CHECK-SAME: memref<256xf16, 1>, memref<256xf16, strided<[1], offset: ?>, 3>) outs(%[[NEW_MEMREF_0]] : memref<256xf16>)
33+
linalg.add ins(%1, %2 :memref<256xf16, 1>, memref<256xf16>) outs(%0 : memref<256xf16>)
34+
// CHECK: memref.dealloc %[[NEW_MEMREF_1]] : memref<256xf16, 1>
35+
// Verify that there are no deallocs for SLM
36+
// CHECK-NOT: memref.dealloc %[[NEW_MEMREF_2]] .*
37+
// CHECK-NOT: memref.dealloc %[[NEW_MEMREF_3]] .*
38+
memref.dealloc %1 : memref<256xf16, 1>
39+
memref.dealloc %2 : memref<256xf16>
40+
gpu.terminator
41+
}
42+
return
43+
}

0 commit comments

Comments
 (0)