Skip to content

Commit 4cbf3c2

Browse files
authored
[Analysis] Use verify-diagnostics for print-based tests (NFC) (#5970)
This is more robust because the diagnostics are attached to op locations in the checking of outputs.
1 parent e1e287a commit 4cbf3c2

File tree

4 files changed

+349
-396
lines changed

4 files changed

+349
-396
lines changed

test/Analysis/test-alias.mlir

Lines changed: 51 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: triton-opt %s --mlir-disable-threading -test-print-alias -split-input-file 2>&1 | FileCheck %s
1+
// RUN: triton-opt %s -mlir-disable-threading -test-print-alias -verify-diagnostics -o /dev/null
22

33
#AL = #ttg.blocked<{sizePerThread = [1, 4], threadsPerWarp = [4, 8], warpsPerCTA = [4, 1], order = [1, 0]}>
44
#BL = #ttg.blocked<{sizePerThread = [1, 4], threadsPerWarp = [1, 32], warpsPerCTA = [4, 1], order = [1, 0]}>
@@ -11,8 +11,6 @@
1111

1212
module attributes {"ttg.num-warps" = 4 : i32, "ttg.target" = "cuda:80"} {
1313

14-
// CHECK-LABEL: matmul_loop
15-
// CHECK-NOT: ->
1614
// There shouldn't be any aliasing with the dot op encoding.
1715
tt.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
1816
%a_ptr_init = tt.splat %A : !tt.ptr<f16> -> tensor<128x32x!tt.ptr<f16>, #AL>
@@ -38,47 +36,42 @@ tt.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>,
3836
tt.return
3937
}
4038

41-
// CHECK-LABEL: alloc
4239
tt.func @alloc(%A : !tt.ptr<f16>) {
43-
// CHECK: %0 -> %0
40+
// expected-remark @below {{%0 -> %0}}
4441
%cst2 = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
4542
tt.return
4643
}
4744

48-
// CHECK-LABEL: alloc_init
4945
tt.func @alloc_init(%A : !tt.ptr<f16>) {
5046
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
51-
// CHECK: %0 -> %0
47+
// expected-remark @below {{%0 -> %0}}
5248
%cst1 = ttg.local_alloc %cst0 : (tensor<16x16xf16, #AL>) -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory>
5349
tt.return
5450
}
5551

56-
// CHECK-LABEL: trans
5752
tt.func @trans(%A : !tt.ptr<f16>) {
58-
// CHECK: %0 -> %0
53+
// expected-remark @below {{%0 -> %0}}
5954
%tensor = ttg.local_alloc : () -> !ttg.memdesc<16x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
60-
// CHECK: %1 -> %0
55+
// expected-remark @below {{%1 -> %0}}
6156
%b = ttg.memdesc_trans %tensor {order=array<i32: 1,0>} : !ttg.memdesc<16x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<32x16xf16, #A_SHARED_T, #ttg.shared_memory, mutable>
6257
tt.return
6358
}
6459

65-
// CHECK-LABEL: subview
6660
tt.func @subview(%A : !ttg.memdesc<1x16x16xf16, #A_SHARED, #ttg.shared_memory>) {
6761
%index = arith.constant 0 : i32
68-
// CHECK: %0 -> %0
62+
// expected-remark @below {{%0 -> %0}}
6963
%a = ttg.local_alloc : () -> !ttg.memdesc<1x16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
70-
// CHECK-NEXT: %1 -> %0
64+
// expected-remark @below {{%1 -> %0}}
7165
%cst1 = ttg.memdesc_subview %a[%index, %index, %index] : !ttg.memdesc<1x16x16xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
7266
tt.return
7367
}
7468

75-
// CHECK-LABEL: if_alias
7669
tt.func @if_alias(%i1 : i1) {
77-
// CHECK: %0 -> %0
70+
// expected-remark @below {{%0 -> %0}}
7871
%a = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
79-
// CHECK: %1 -> %1
72+
// expected-remark @below {{%1 -> %1}}
8073
%b = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
81-
// CHECK-NEXT: %2 -> %0,%1
74+
// expected-remark @below {{%2 -> %0,%1}}
8275
%cst2 = scf.if %i1 -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable> {
8376
scf.yield %a : !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
8477
} else {
@@ -87,46 +80,44 @@ tt.func @if_alias(%i1 : i1) {
8780
tt.return
8881
}
8982

90-
// CHECK-LABEL: for
9183
tt.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
92-
// CHECK: %0 -> %0
84+
// expected-remark @below {{%0 -> %0}}
9385
%a = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
94-
// CHECK: %1 -> %1
86+
// expected-remark @below {{%1 -> %1}}
9587
%b = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
96-
// CHECK: %2 -> %2
88+
// expected-remark @below {{%2 -> %2}}
9789
%c = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
98-
// CHECK-NEXT: %arg6 -> %0
99-
// CHECK-NEXT: %arg7 -> %1
100-
// CHECK-NEXT: %arg8 -> %2
101-
// CHECK-NEXT: %3#0 -> %0,%1
102-
// CHECK-NEXT: %3#1 -> %0,%1
103-
// CHECK-NEXT: %3#2 -> %0,%1,%2
90+
// expected-remark @below {{%arg6 -> %0}}
91+
// expected-remark @below {{%arg7 -> %1}}
92+
// expected-remark @below {{%arg8 -> %2}}
93+
// expected-remark @below {{%3#0 -> %0,%1}}
94+
// expected-remark @below {{%3#1 -> %0,%1}}
95+
// expected-remark @below {{%3#2 -> %0,%1,%2}}
10496
%a_shared, %b_shared, %c_shared = scf.for %iv = %lb to %ub step %step iter_args(%a_shared = %a, %b_shared = %b, %c_shared = %c) ->
10597
(!ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
10698
scf.yield %b_shared, %a_shared, %a_shared : !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
10799
}
108100
tt.return
109101
}
110102

111-
// CHECK-LABEL: for_if
112103
tt.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
113-
// CHECK: %0 -> %0
104+
// expected-remark @below {{%0 -> %0}}
114105
%a_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
115-
// CHECK-NEXT: %1 -> %1
106+
// expected-remark @below {{%1 -> %1}}
116107
%b_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
117-
// CHECK-NEXT: %2 -> %2
108+
// expected-remark @below {{%2 -> %2}}
118109
%c_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
119-
// CHECK-NEXT: %arg7 -> %0
120-
// CHECK-NEXT: %arg8 -> %1
121-
// CHECK-NEXT: %arg9 -> %2
122-
// CHECK-NEXT: %3#0 -> %0,%1
123-
// CHECK-NEXT: %3#1 -> %0,%1
124-
// CHECK-NEXT: %3#2 -> %0,%1,%2
110+
// expected-remark @below {{%arg7 -> %0}}
111+
// expected-remark @below {{%arg8 -> %1}}
112+
// expected-remark @below {{%arg9 -> %2}}
113+
// expected-remark @below {{%3#0 -> %0,%1}}
114+
// expected-remark @below {{%3#1 -> %0,%1}}
115+
// expected-remark @below {{%3#2 -> %0,%1,%2}}
125116
%a_shared, %b_shared, %c_shared = scf.for %iv = %lb to %ub step %step iter_args(%a_shared = %a_shared_init, %b_shared = %b_shared_init, %c_shared = %c_shared_init) ->
126117
(!ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
127118
scf.if %i1 {
128119
%index = arith.constant 8 : i32
129-
// CHECK-NEXT: %4 -> %0,%1
120+
// expected-remark @below {{%4 -> %0,%1}}
130121
%cst0 = ttg.memdesc_subview %a_shared[%index, %index] : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<32xf16, #A_SHARED, #ttg.shared_memory, mutable>
131122
scf.yield
132123
}
@@ -135,32 +126,31 @@ tt.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :
135126
tt.return
136127
}
137128

138-
// CHECK-LABEL: for_for_if
139129
tt.func @for_for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
140-
// CHECK: %0 -> %0
130+
// expected-remark @below {{%0 -> %0}}
141131
%a_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
142-
// CHECK-NEXT: %1 -> %1
132+
// expected-remark @below {{%1 -> %1}}
143133
%b_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
144-
// CHECK-NEXT: %2 -> %2
134+
// expected-remark @below {{%2 -> %2}}
145135
%c_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
146-
// CHECK-NEXT: %arg7 -> %0
147-
// CHECK-NEXT: %arg8 -> %1
148-
// CHECK-NEXT: %arg9 -> %2
149-
// CHECK-NEXT: %3#0 -> %0
150-
// CHECK-NEXT: %3#1 -> %1
151-
// CHECK-NEXT: %3#2 -> %2,%6,%6
136+
// expected-remark @below {{%arg7 -> %0}}
137+
// expected-remark @below {{%arg8 -> %1}}
138+
// expected-remark @below {{%arg9 -> %2}}
139+
// expected-remark @below {{%3#0 -> %0}}
140+
// expected-remark @below {{%3#1 -> %1}}
141+
// expected-remark @below {{%3#2 -> %2,%6,%6}}
152142
%a_shared, %b_shared, %c_shared = scf.for %iv = %lb to %ub step %step iter_args(%a_shared = %a_shared_init, %b_shared = %b_shared_init, %c_shared = %c_shared_init) ->
153143
(!ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
154-
// CHECK-NEXT: %arg11 -> %2,%6,%6
155-
// CHECK-NEXT: %4 -> %2,%6,%6
144+
// expected-remark @below {{%arg11 -> %2,%6,%6}}
145+
// expected-remark @below {{%4 -> %2,%6,%6}}
156146
%c_shared_next = scf.for %jv = %lb to %ub step %step iter_args(%c_shared_next = %c_shared) -> (!ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
157-
// CHECK-NEXT: %5 -> %6,%6
147+
// expected-remark @below {{%5 -> %6,%6}}
158148
%c_shared_next_next = scf.if %i1 -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> {
159-
// CHECK-NEXT: %6 -> %6
149+
// expected-remark @below {{%6 -> %6}}
160150
%cst0 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
161151
scf.yield %cst0 : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
162152
} else {
163-
// CHECK-NEXT: %6 -> %6
153+
// expected-remark @below {{%6 -> %6}}
164154
%cst0 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
165155
scf.yield %cst0 : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
166156
}
@@ -171,32 +161,31 @@ tt.func @for_for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>,
171161
tt.return
172162
}
173163

174-
// CHECK-LABEL: cf_for
175164
tt.func @cf_for(%arg0: index, %arg1: index, %arg2: index, %arg3: !tt.ptr<f16>, %arg4: !tt.ptr<f16>) {
176165
%idx = arith.constant 0 : i32
177-
// CHECK: %0 -> %0
166+
// expected-remark @below {{%0 -> %0}}
178167
%cst = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
179-
// CHECK-NEXT: %1 -> %1
168+
// expected-remark @below {{%1 -> %1}}
180169
%cst_0 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
181-
// CHECK-NEXT: %2 -> %0
170+
// expected-remark @below {{%2 -> %0}}
182171
%0 = ttg.memdesc_subview %cst[%idx, %idx] : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
183172
gpu.barrier
184-
// CHECK-NEXT: %3 -> %3
173+
// expected-remark @below {{%3 -> %3}}
185174
%cst_1 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
186-
// CHECK-NEXT: %5 -> %0,%1,%3
187-
// CHECK-NEXT: %6 -> %0,%1,%3
188-
// CHECK-NEXT: %7 -> %0,%1,%3
189175
cf.br ^bb1(%arg0, %cst, %cst_0, %cst_1 : index, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>)
190176
^bb1(%1: index, %2: !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, %3: !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, %4: !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>): // 2 preds: ^bb0, ^bb2
191177
%5 = arith.cmpi slt, %1, %arg1 : index
178+
// expected-remark @below {{%5 -> %0,%1,%3}}
179+
// expected-remark @below {{%6 -> %0,%1,%3}}
180+
// expected-remark @below {{%7 -> %0,%1,%3}}
192181
cf.cond_br %5, ^bb2, ^bb3
193182
^bb2: // pred: ^bb1
194183
gpu.barrier
195184
%8 = arith.addi %1, %arg2 : index
196185
cf.br ^bb1(%8, %4, %2, %3 : index, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>)
197186
^bb3: // pred: ^bb1
198187
gpu.barrier
199-
// CHECK-NEXT: %10 -> %0
188+
// expected-remark @below {{%10 -> %0}}
200189
%9 = ttg.memdesc_subview %0[%idx, %idx] : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
201190
tt.return
202191
}

0 commit comments

Comments
 (0)