Skip to content

Commit 7f0476c

Browse files
authored
More aggressive Buffer elimination via memref load/store (#291)
1 parent 45a10b9 commit 7f0476c

File tree

8 files changed

+567
-346
lines changed

8 files changed

+567
-346
lines changed

lib/polygeist/Ops.cpp

Lines changed: 433 additions & 311 deletions
Large diffs are not rendered by default.

test/polygeist-opt/affbufcopy.mlir

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// RUN: polygeist-opt --canonicalize --split-input-file %s -allow-unregistered-dialect | FileCheck %s
22

33
module {
4+
func.func private @print3(i32, i32, i32) -> ()
5+
func.func private @print1(i32) -> ()
46
func.func private @run(%c: i32, %rng : index) -> memref<?xi32> {
57
%c0 = arith.constant 0 : index
68
%c1 = arith.constant 1 : index
@@ -14,7 +16,7 @@ module {
1416
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
1517
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
1618
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
17-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
19+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
1820
}
1921
affine.store %c, %prev[0] : memref<?xi32>
2022
return %prev : memref<?xi32>
@@ -26,7 +28,7 @@ module {
2628
// CHECK-NEXT: %[[i1:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 1] : memref<?xi32>
2729
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 4] : memref<?xi32>
2830
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 11] : memref<?xi32>
29-
// CHECK-NEXT: "test.use"(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
31+
// CHECK-NEXT: func.call @print3(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
3032
// CHECK-NEXT: }
3133
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
3234
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
@@ -45,11 +47,11 @@ module {
4547
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
4648
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
4749
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
48-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
50+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
4951
}
5052
affine.store %c, %prev[0] : memref<?xi32>
5153
%v = affine.load %tmp[3, 4] : memref<16x16xi32>
52-
"test.use"(%v) : (i32) -> ()
54+
func.call @print1(%v) : (i32) -> ()
5355
return %prev : memref<?xi32>
5456
}
5557

@@ -64,11 +66,11 @@ module {
6466
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 1] : memref<?xi32>
6567
// CHECK-NEXT: %[[i4:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 4] : memref<?xi32>
6668
// CHECK-NEXT: %[[i5:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 11] : memref<?xi32>
67-
// CHECK-NEXT: "test.use"(%[[i3]], %[[i4]], %[[i5]]) : (i32, i32, i32) -> ()
69+
// CHECK-NEXT: func.call @print3(%[[i3]], %[[i4]], %[[i5]]) : (i32, i32, i32) -> ()
6870
// CHECK-NEXT: }
6971
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
7072
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i1]][3, 4] : memref<16x16xi32>
71-
// CHECK-NEXT: "test.use"(%[[i2]]) : (i32) -> ()
73+
// CHECK-NEXT: call @print1(%[[i2]]) : (i32) -> ()
7274
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
7375
// CHECK-NEXT: }
7476

@@ -87,7 +89,7 @@ module {
8789
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
8890
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
8991
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
90-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
92+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
9193
}
9294
affine.store %c, %prev[0] : memref<?xi32>
9395
return %prev : memref<?xi32>
@@ -99,7 +101,7 @@ module {
99101
// CHECK-NEXT: %[[i1:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 1] : memref<?xi32>
100102
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 4] : memref<?xi32>
101103
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i0]][%arg3 * 3 + %arg2 * 10 + 11] : memref<?xi32>
102-
// CHECK-NEXT: "test.use"(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
104+
// CHECK-NEXT: func.call @print3(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
103105
// CHECK-NEXT: }
104106
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
105107
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
@@ -120,7 +122,7 @@ module {
120122
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
121123
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
122124
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
123-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
125+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
124126
}
125127
affine.store %c, %prev[0] : memref<?xi32>
126128
return %prev : memref<?xi32>
@@ -139,7 +141,7 @@ module {
139141
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i1:.+]][%arg2, %arg3] : memref<16x16xi32>
140142
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i1:.+]][%arg2, %arg3 + 1] : memref<16x16xi32>
141143
// CHECK-NEXT: %[[i4:.+]] = affine.load %[[i1:.+]][%arg2 + 1, %arg3] : memref<16x16xi32>
142-
// CHECK-NEXT: "test.use"(%[[i2]], %[[i3]], %[[i4]]) : (i32, i32, i32) -> ()
144+
// CHECK-NEXT: func.call @print3(%[[i2]], %[[i3]], %[[i4]]) : (i32, i32, i32) -> ()
143145
// CHECK-NEXT: }
144146
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
145147
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
@@ -162,7 +164,7 @@ module {
162164
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
163165
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
164166
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
165-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
167+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
166168
}
167169
affine.store %c, %prev[0] : memref<?xi32>
168170
return %prev : memref<?xi32>
@@ -183,7 +185,7 @@ module {
183185
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i1:.+]][%arg3, %arg4] : memref<16x16xi32>
184186
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i1:.+]][%arg3, %arg4 + 1] : memref<16x16xi32>
185187
// CHECK-NEXT: %[[i4:.+]] = affine.load %[[i1:.+]][%arg3 + 1, %arg4] : memref<16x16xi32>
186-
// CHECK-NEXT: "test.use"(%[[i2]], %[[i3]], %[[i4]]) : (i32, i32, i32) -> ()
188+
// CHECK-NEXT: func.call @print3(%[[i2]], %[[i3]], %[[i4]]) : (i32, i32, i32) -> ()
187189
// CHECK-NEXT: }
188190
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
189191
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
@@ -206,7 +208,7 @@ module {
206208
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
207209
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
208210
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
209-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
211+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
210212
}
211213
affine.store %c, %prev[0] : memref<?xi32>
212214
return %prev : memref<?xi32>
@@ -218,7 +220,7 @@ module {
218220
// CHECK-NEXT: %[[i1:.+]] = affine.load %[[i0:.+]][%arg4 * 3 + %arg3 * 10 + 1] : memref<?xi32>
219221
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i0:.+]][%arg4 * 3 + %arg3 * 10 + 4] : memref<?xi32>
220222
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i0:.+]][%arg4 * 3 + %arg3 * 10 + 11] : memref<?xi32>
221-
// CHECK-NEXT: "test.use"(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
223+
// CHECK-NEXT: func.call @print3(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
222224
// CHECK-NEXT: }
223225
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
224226
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
@@ -241,7 +243,7 @@ module {
241243
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
242244
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
243245
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
244-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
246+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
245247
}
246248
affine.store %c, %prev[0] : memref<?xi32>
247249
return %prev : memref<?xi32>
@@ -262,7 +264,7 @@ module {
262264
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i1]][%arg3, %arg4] : memref<16x16xi32>
263265
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i1]][%arg3, %arg4 + 1] : memref<16x16xi32>
264266
// CHECK-NEXT: %[[i4:.+]] = affine.load %[[i1]][%arg3 + 1, %arg4] : memref<16x16xi32>
265-
// CHECK-NEXT: "test.use"(%[[i2]], %[[i3]], %[[i4]]) : (i32, i32, i32) -> ()
267+
// CHECK-NEXT: call @print3(%[[i2]], %[[i3]], %[[i4]]) : (i32, i32, i32) -> ()
266268
// CHECK-NEXT: }
267269
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
268270
// CHECK-NEXT: return %[[i0]] : memref<?xi32>
@@ -283,7 +285,7 @@ module {
283285
%v = affine.load %tmp[%arg4, %arg5] : memref<16x16xi32>
284286
%v2 = affine.load %tmp[%arg4, 1 + %arg5] : memref<16x16xi32>
285287
%v3 = affine.load %tmp[1 + %arg4, %arg5] : memref<16x16xi32>
286-
"test.use"(%v, %v2, %v3) : (i32, i32, i32) -> ()
288+
func.call @print3(%v, %v2, %v3) : (i32, i32, i32) -> ()
287289
}
288290
affine.store %c, %prev[0] : memref<?xi32>
289291
return %prev : memref<?xi32>
@@ -295,7 +297,7 @@ module {
295297
// CHECK-NEXT: %[[i1:.+]] = affine.load %[[i0]][%arg5 * 3 + %arg4 * 10 + symbol(%arg3)] : memref<?xi32>
296298
// CHECK-NEXT: %[[i2:.+]] = affine.load %[[i0]][%arg5 * 3 + %arg4 * 10 + symbol(%arg3) + 3] : memref<?xi32>
297299
// CHECK-NEXT: %[[i3:.+]] = affine.load %[[i0]][%arg5 * 3 + %arg4 * 10 + symbol(%arg3) + 10] : memref<?xi32>
298-
// CHECK-NEXT: "test.use"(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
300+
// CHECK-NEXT: func.call @print3(%[[i1]], %[[i2]], %[[i3]]) : (i32, i32, i32) -> ()
299301
// CHECK-NEXT: }
300302
// CHECK-NEXT: affine.store %arg0, %[[i0]][0] : memref<?xi32>
301303
// CHECK-NEXT: return %[[i0]] : memref<?xi32>

test/polygeist-opt/bufcopy.mlir

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -179,24 +179,14 @@ module {
179179

180180
// CHECK: func.func private @runDouble(%[[arg0:.+]]: index, %[[arg1:.+]]: memref<?xi32>) {
181181
// CHECK-NEXT: %[[V0:.+]] = memref.alloca(%[[arg0]]) : memref<?xi32>
182-
// CHECK-NEXT: %[[V1:.+]] = memref.alloc(%[[arg0]]) : memref<?xi32>
183-
// CHECK-NEXT: affine.for %[[arg2:.+]] = 0 to %[[arg0]] {
184-
// CHECK-NEXT: %[[V2:.+]] = affine.load %[[arg1]][%[[arg2]]] : memref<?xi32>
185-
// CHECK-NEXT: affine.store %[[V2]], %[[V1]][%[[arg2]]] : memref<?xi32>
186-
// CHECK-NEXT: }
187182
// CHECK-NEXT: affine.for %[[arg2:.+]] = 0 to 22 {
188183
// CHECK-NEXT: %[[V2:.+]] = affine.load %[[V0]][%[[arg2]]] : memref<?xi32>
189184
// CHECK-NEXT: %[[V3:.+]] = arith.addi %[[V2]], %[[V2]] : i32
190185
// CHECK-NEXT: memref.store %[[V3]], %[[V0]][%[[arg2]]] : memref<?xi32>
191186
// CHECK-NEXT: }
192187
// CHECK-NEXT: affine.for %[[arg2:.+]] = 0 to %[[arg0]] {
193188
// CHECK-NEXT: %[[V2:.+]] = affine.load %[[V0]][%[[arg2]]] : memref<?xi32>
194-
// CHECK-NEXT: affine.store %[[V2]], %[[V1]][%[[arg2]]] : memref<?xi32>
195-
// CHECK-NEXT: }
196-
// CHECK-NEXT: affine.for %[[arg2:.+]] = 0 to %[[arg0]] {
197-
// CHECK-NEXT: %[[V2:.+]] = affine.load %[[V1]][%[[arg2]]] : memref<?xi32>
198189
// CHECK-NEXT: affine.store %[[V2]], %[[arg1]][%[[arg2]]] : memref<?xi32>
199190
// CHECK-NEXT: }
200-
// CHECK-NEXT: memref.dealloc %[[V1]] : memref<?xi32>
201191
// CHECK-NEXT: return
202192
// CHECK-NEXT: }

test/polygeist-opt/multibuf.mlir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: polygeist-opt --canonicalize --split-input-file %s --allow-unregistered-dialect | FileCheck %s
2+
3+
module {
4+
func.func @multi(%arg0: i32, %arg1: memref<?xmemref<?xi8>>, %arg2: index, %arg3: index) -> (i32, i32) {
5+
%c0 = arith.constant 0 : index
6+
%c1 = arith.constant 1 : index
7+
%c8 = arith.constant 8 : index
8+
%c8_i64 = arith.constant 8 : i64
9+
%c2_i32 = arith.constant 2 : i32
10+
%alloca = memref.alloca(%arg3) : memref<?x2xi32>
11+
scf.for %arg4 = %c0 to %arg3 step %c1 {
12+
%a = arith.index_cast %arg4 : index to i32
13+
memref.store %c2_i32, %alloca[%arg4, %c0] : memref<?x2xi32>
14+
memref.store %a, %alloca[%arg4, %c1] : memref<?x2xi32>
15+
}
16+
%a10 = memref.load %alloca[%arg2, %c0] : memref<?x2xi32>
17+
%a11 = memref.load %alloca[%arg2, %c1] : memref<?x2xi32>
18+
return %a10, %a11 : i32, i32
19+
}
20+
}
21+
22+
// CHECK: func.func @multi(%arg0: i32, %arg1: memref<?xmemref<?xi8>>, %arg2: index, %arg3: index)
23+
// CHECK-NEXT: %c2_i32 = arith.constant 2 : i32
24+
// CHECK-NEXT: %[[i0:.+]] = arith.index_cast %arg2 : index to i32
25+
// CHECK-NEXT: return %c2_i32, %[[i0]] : i32, i32
26+
// CHECK-NEXT: }

test/polygeist-opt/scanbuf.mlir

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// RUN: polygeist-opt --canonicalize --split-input-file %s --allow-unregistered-dialect | FileCheck %s
2+
3+
module {
4+
llvm.mlir.global internal constant @str5("%d \00") {addr_space = 0 : i32}
5+
llvm.func @scanf(!llvm.ptr<i8>, ...) -> i32
6+
func.func @overwrite(%arg: index, %arg2: index) -> (i32) {
7+
%c0 = arith.constant 0 : index
8+
%c1 = arith.constant 1 : index
9+
%c8 = arith.constant 8 : index
10+
%c8_i64 = arith.constant 8 : i64
11+
%c2_i32 = arith.constant 2 : i32
12+
%alloca = memref.alloca(%arg) : memref<?xi32>
13+
%alloca2 = memref.alloca() : memref<i32>
14+
%ptr = "polygeist.memref2pointer"(%alloca2) : (memref<i32>) -> !llvm.ptr<i32>
15+
16+
%6 = llvm.mlir.addressof @str5 : !llvm.ptr<array<6 x i8>>
17+
%7 = llvm.getelementptr %6[0, 0] : (!llvm.ptr<array<6 x i8>>) -> !llvm.ptr<i8>
18+
19+
scf.for %arg4 = %c0 to %arg step %c1 {
20+
%12 = llvm.call @scanf(%7, %ptr) : (!llvm.ptr<i8>, !llvm.ptr<i32>) -> i32
21+
%ld = memref.load %alloca2[] : memref<i32>
22+
memref.store %ld, %alloca[%arg4] : memref<?xi32>
23+
}
24+
%a10 = memref.load %alloca[%arg2] : memref<?xi32>
25+
return %a10 : i32
26+
}
27+
}
28+
29+
// CHECK: func.func @overwrite(%arg0: index, %arg1: index) -> i32 {
30+
// CHECK: scf.for %arg2 = %c0 to %arg0 step %c1 {
31+
// CHECK-NEXT: %[[i4:.+]] = llvm.call @scanf(%[[i2:.+]], %[[i0:.+]]) : (!llvm.ptr<i8>, !llvm.ptr<i32>) -> i32
32+
// CHECK-NEXT: %[[i5:.+]] = memref.load %[[alloca_0:.+]][] : memref<i32>
33+
// CHECK-NEXT: memref.store %[[i5]], %[[alloca:.+]][%arg2] : memref<?xi32>
34+
// CHECK-NEXT: }
35+
// CHECK-NEXT: %[[i3:.+]] = memref.load %[[alloca]][%arg1] : memref<?xi32>
36+
// CHECK-NEXT: return %[[i3]] : i32
37+
// CHECK-NEXT: }

test/polygeist-opt/shmemfwd.mlir

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: polygeist-opt --canonicalize --split-input-file %s --allow-unregistered-dialect | FileCheck %s
2+
3+
module {
4+
func.func private @print1(f32)
5+
func.func @main() {
6+
%c-1 = arith.constant -1 : index
7+
%c0 = arith.constant 0 : index
8+
%c1 = arith.constant 1 : index
9+
%c2 = arith.constant 2 : index
10+
%alloca = memref.alloca() : memref<2xf32>
11+
scf.parallel (%arg2) = (%c0) to (%c2) step (%c1) {
12+
%0 = arith.index_cast %arg2 : index to i32
13+
%1 = arith.sitofp %0 : i32 to f32
14+
memref.store %1, %alloca[%arg2] : memref<2xf32>
15+
"polygeist.barrier"(%arg2, %c0) : (index, index) -> ()
16+
%2 = arith.cmpi eq, %arg2, %c1 : index
17+
scf.if %2 {
18+
%3 = arith.addi %arg2, %c-1 : index
19+
%4 = memref.load %alloca[%3] : memref<2xf32>
20+
func.call @print1(%4) : (f32) -> ()
21+
}
22+
scf.yield
23+
}
24+
return
25+
}
26+
}
27+
28+
// CHECK: func.func @main() {
29+
// CHECK-NEXT: %[[cm1:.+]] = arith.constant -1 : index
30+
// CHECK-NEXT: %[[c0:.+]] = arith.constant 0 : index
31+
// CHECK-NEXT: %[[c1:.+]] = arith.constant 1 : index
32+
// CHECK-NEXT: %[[c2:.+]] = arith.constant 2 : index
33+
// CHECK-NEXT: scf.parallel (%[[arg0:.+]]) = (%[[c0]]) to (%[[c2]]) step (%[[c1]]) {
34+
// CHECK-NEXT: %[[i0:.+]] = arith.cmpi eq, %arg0, %[[c1]] : index
35+
// CHECK-NEXT: scf.if %[[i0]] {
36+
// CHECK-NEXT: %[[i1:.+]] = arith.addi %[[arg0]], %[[cm1]] : index
37+
// CHECK-NEXT: %[[i2:.+]] = arith.index_cast %[[i1]] : index to i32
38+
// CHECK-NEXT: %[[i3:.+]] = arith.sitofp %[[i2]] : i32 to f32
39+
// CHECK-NEXT: func.call @print1(%[[i3]]) : (f32) -> ()
40+
// CHECK-NEXT: }
41+
// CHECK-NEXT: scf.yield
42+
// CHECK-NEXT: }
43+
// CHECK-NEXT: return
44+
// CHECK-NEXT: }

tools/cgeist/Test/Verification/stream.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ void run(cudaStream_t stream1, int *array, int n) {
1414
}
1515

1616
// CHECK: func.func @_Z3runP10cudaStreamPii(%[[arg0:.+]]: memref<?x!llvm.struct<()>>, %[[arg1:.+]]: memref<?xi32>, %[[arg2:.+]]: i32) attributes {llvm.linkage = #llvm.linkage<external>} {
17-
// CHECK-NEXT: %[[c10:.+]] = arith.constant 10 : index
18-
// CHECK-NEXT: %[[c1:.+]] = arith.constant 1 : index
19-
// CHECK-NEXT: %[[c20:.+]] = arith.constant 20 : index
17+
// CHECK-DAG: %[[c10:.+]] = arith.constant 10 : index
18+
// CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index
19+
// CHECK-DAG: %[[c20:.+]] = arith.constant 20 : index
2020
// CHECK-NEXT: %[[V0:.+]] = "polygeist.stream2token"(%[[arg0]]) : (memref<?x!llvm.struct<()>>) -> !gpu.async.token
2121
// CHECK-NEXT: %[[V1:.+]] = gpu.launch async [%[[V0:.+]]] blocks(%[[arg3:.+]], %[[arg4:.+]], %[[arg5:.+]]) in (%[[arg9:.+]] = %[[c10]], %[[arg10:.+]] = %[[c1]], %[[arg11:.+]] = %[[c1]]) threads(%[[arg6:.+]], %[[arg7:.+]], %[[arg8:.+]]) in (%[[arg12:.+]] = %[[c20]], %[[arg13:.+]] = %[[c1]], %[[arg14:.+]] = %[[c1]]) {
2222
// CHECK-NEXT: func.call @_Z21__device_stub__squarePii(%[[arg1:.+]], %[[arg2:.+]]) : (memref<?xi32>, i32) -> ()

tools/cgeist/Test/elaborated-init.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ void testArrayInitExpr()
1111
}
1212

1313
// CHECK: func.func @_Z17testArrayInitExprv()
14-
// CHECK-NEXT: %[[c4_i32:.+]] = arith.constant 4 : i32
15-
// CHECK-NEXT: %[[c3_i32:.+]] = arith.constant 3 : i32
16-
// CHECK-NEXT: %[[c2_i32:.+]] = arith.constant 2 : i32
17-
// CHECK-NEXT: %[[c1_i32:.+]] = arith.constant 1 : i32
14+
// CHECK-DAG: %[[c4_i32:.+]] = arith.constant 4 : i32
15+
// CHECK-DAG: %[[c3_i32:.+]] = arith.constant 3 : i32
16+
// CHECK-DAG: %[[c2_i32:.+]] = arith.constant 2 : i32
17+
// CHECK-DAG: %[[c1_i32:.+]] = arith.constant 1 : i32
1818
// CHECK-NEXT: %[[V0:.+]] = memref.alloca() : memref<1x!llvm.struct<(array<4 x i32>)>>
1919
// CHECK-NEXT: %[[V1:.+]] = "polygeist.memref2pointer"(%[[V0]]) : (memref<1x!llvm.struct<(array<4 x i32>)>>) -> !llvm.ptr<struct<(array<4 x i32>)>>
2020
// CHECK-NEXT: %[[V2:.+]] = llvm.getelementptr %[[V1]][0, 0] : (!llvm.ptr<struct<(array<4 x i32>)>>) -> !llvm.ptr<array<4 x i32>>

0 commit comments

Comments
 (0)