1- // RUN: triton-opt %s -- mlir-disable-threading -test-print-alias -split-input-file 2>&1 | FileCheck %s
1+ // RUN: triton-opt %s -mlir-disable-threading -test-print-alias -verify-diagnostics -o /dev/null
22
33#AL = #ttg.blocked <{sizePerThread = [1 , 4 ], threadsPerWarp = [4 , 8 ], warpsPerCTA = [4 , 1 ], order = [1 , 0 ]}>
44#BL = #ttg.blocked <{sizePerThread = [1 , 4 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [4 , 1 ], order = [1 , 0 ]}>
1111
1212module attributes {" ttg.num-warps" = 4 : i32 , " ttg.target" = " cuda:80" } {
1313
14- // CHECK-LABEL: matmul_loop
15- // CHECK-NOT: ->
1614// There shouldn't be any aliasing with the dot op encoding.
1715tt.func @matmul_loop (%lb : index , %ub : index , %step : index , %A : !tt.ptr <f16 >, %B : !tt.ptr <f16 >) {
1816 %a_ptr_init = tt.splat %A : !tt.ptr <f16 > -> tensor <128 x32 x!tt.ptr <f16 >, #AL >
@@ -38,47 +36,42 @@ tt.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>,
3836 tt.return
3937}
4038
41- // CHECK-LABEL: alloc
4239tt.func @alloc (%A : !tt.ptr <f16 >) {
43- // CHECK: %0 -> %0
40+ // expected-remark @below {{ %0 -> %0}}
4441 %cst2 = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
4542 tt.return
4643}
4744
48- // CHECK-LABEL: alloc_init
4945tt.func @alloc_init (%A : !tt.ptr <f16 >) {
5046 %cst0 = arith.constant dense <0.000000e+00 > : tensor <16 x16 xf16 , #AL >
51- // CHECK: %0 -> %0
47+ // expected-remark @below {{ %0 -> %0}}
5248 %cst1 = ttg.local_alloc %cst0 : (tensor <16 x16 xf16 , #AL >) -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory >
5349 tt.return
5450}
5551
56- // CHECK-LABEL: trans
5752tt.func @trans (%A : !tt.ptr <f16 >) {
58- // CHECK: %0 -> %0
53+ // expected-remark @below {{ %0 -> %0}}
5954 %tensor = ttg.local_alloc : () -> !ttg.memdesc <16 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
60- // CHECK: %1 -> %0
55+ // expected-remark @below {{ %1 -> %0}}
6156 %b = ttg.memdesc_trans %tensor {order =array<i32 : 1 ,0 >} : !ttg.memdesc <16 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> !ttg.memdesc <32 x16 xf16 , #A_SHARED_T , #ttg.shared_memory , mutable >
6257 tt.return
6358}
6459
65- // CHECK-LABEL: subview
6660tt.func @subview (%A : !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory >) {
6761 %index = arith.constant 0 : i32
68- // CHECK: %0 -> %0
62+ // expected-remark @below {{ %0 -> %0}}
6963 %a = ttg.local_alloc : () -> !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
70- // CHECK-NEXT: %1 -> %0
64+ // expected-remark @below {{ %1 -> %0}}
7165 %cst1 = ttg.memdesc_subview %a [%index , %index , %index ] : !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
7266 tt.return
7367}
7468
75- // CHECK-LABEL: if_alias
7669tt.func @if_alias (%i1 : i1 ) {
77- // CHECK: %0 -> %0
70+ // expected-remark @below {{ %0 -> %0}}
7871 %a = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
79- // CHECK: %1 -> %1
72+ // expected-remark @below {{ %1 -> %1}}
8073 %b = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
81- // CHECK-NEXT: %2 -> %0,%1
74+ // expected-remark @below {{ %2 -> %0,%1}}
8275 %cst2 = scf.if %i1 -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable > {
8376 scf.yield %a : !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
8477 } else {
@@ -87,46 +80,44 @@ tt.func @if_alias(%i1 : i1) {
8780 tt.return
8881}
8982
90- // CHECK-LABEL: for
9183tt.func @for (%lb : index , %ub : index , %step : index , %A : !tt.ptr <f16 >, %B : !tt.ptr <f16 >) {
92- // CHECK: %0 -> %0
84+ // expected-remark @below {{ %0 -> %0}}
9385 %a = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
94- // CHECK: %1 -> %1
86+ // expected-remark @below {{ %1 -> %1}}
9587 %b = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
96- // CHECK: %2 -> %2
88+ // expected-remark @below {{ %2 -> %2}}
9789 %c = ttg.local_alloc : () -> !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
98- // CHECK-NEXT: %arg6 -> %0
99- // CHECK-NEXT: %arg7 -> %1
100- // CHECK-NEXT: %arg8 -> %2
101- // CHECK-NEXT: %3#0 -> %0,%1
102- // CHECK-NEXT: %3#1 -> %0,%1
103- // CHECK-NEXT: %3#2 -> %0,%1,%2
90+ // expected-remark @below {{ %arg6 -> %0}}
91+ // expected-remark @below {{ %arg7 -> %1}}
92+ // expected-remark @below {{ %arg8 -> %2}}
93+ // expected-remark @below {{ %3#0 -> %0,%1}}
94+ // expected-remark @below {{ %3#1 -> %0,%1}}
95+ // expected-remark @below {{ %3#2 -> %0,%1,%2}}
10496 %a_shared , %b_shared , %c_shared = scf.for %iv = %lb to %ub step %step iter_args (%a_shared = %a , %b_shared = %b , %c_shared = %c ) ->
10597 (!ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >) {
10698 scf.yield %b_shared , %a_shared , %a_shared : !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
10799 }
108100 tt.return
109101}
110102
111- // CHECK-LABEL: for_if
112103tt.func @for_if (%lb : index , %ub : index , %step : index , %A : !tt.ptr <f16 >, %B : !tt.ptr <f16 >, %i1 : i1 ) {
113- // CHECK: %0 -> %0
104+ // expected-remark @below {{ %0 -> %0}}
114105 %a_shared_init = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
115- // CHECK-NEXT: %1 -> %1
106+ // expected-remark @below {{ %1 -> %1}}
116107 %b_shared_init = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
117- // CHECK-NEXT: %2 -> %2
108+ // expected-remark @below {{ %2 -> %2}}
118109 %c_shared_init = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
119- // CHECK-NEXT: %arg7 -> %0
120- // CHECK-NEXT: %arg8 -> %1
121- // CHECK-NEXT: %arg9 -> %2
122- // CHECK-NEXT: %3#0 -> %0,%1
123- // CHECK-NEXT: %3#1 -> %0,%1
124- // CHECK-NEXT: %3#2 -> %0,%1,%2
110+ // expected-remark @below {{ %arg7 -> %0}}
111+ // expected-remark @below {{ %arg8 -> %1}}
112+ // expected-remark @below {{ %arg9 -> %2}}
113+ // expected-remark @below {{ %3#0 -> %0,%1}}
114+ // expected-remark @below {{ %3#1 -> %0,%1}}
115+ // expected-remark @below {{ %3#2 -> %0,%1,%2}}
125116 %a_shared , %b_shared , %c_shared = scf.for %iv = %lb to %ub step %step iter_args (%a_shared = %a_shared_init , %b_shared = %b_shared_init , %c_shared = %c_shared_init ) ->
126117 (!ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >) {
127118 scf.if %i1 {
128119 %index = arith.constant 8 : i32
129- // CHECK-NEXT: %4 -> %0,%1
120+ // expected-remark @below {{ %4 -> %0,%1}}
130121 %cst0 = ttg.memdesc_subview %a_shared [%index , %index ] : !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> !ttg.memdesc <32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
131122 scf.yield
132123 }
@@ -135,32 +126,31 @@ tt.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :
135126 tt.return
136127}
137128
138- // CHECK-LABEL: for_for_if
139129tt.func @for_for_if (%lb : index , %ub : index , %step : index , %A : !tt.ptr <f16 >, %B : !tt.ptr <f16 >, %i1 : i1 ) {
140- // CHECK: %0 -> %0
130+ // expected-remark @below {{ %0 -> %0}}
141131 %a_shared_init = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
142- // CHECK-NEXT: %1 -> %1
132+ // expected-remark @below {{ %1 -> %1}}
143133 %b_shared_init = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
144- // CHECK-NEXT: %2 -> %2
134+ // expected-remark @below {{ %2 -> %2}}
145135 %c_shared_init = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
146- // CHECK-NEXT: %arg7 -> %0
147- // CHECK-NEXT: %arg8 -> %1
148- // CHECK-NEXT: %arg9 -> %2
149- // CHECK-NEXT: %3#0 -> %0
150- // CHECK-NEXT: %3#1 -> %1
151- // CHECK-NEXT: %3#2 -> %2,%6,%6
136+ // expected-remark @below {{ %arg7 -> %0}}
137+ // expected-remark @below {{ %arg8 -> %1}}
138+ // expected-remark @below {{ %arg9 -> %2}}
139+ // expected-remark @below {{ %3#0 -> %0}}
140+ // expected-remark @below {{ %3#1 -> %1}}
141+ // expected-remark @below {{ %3#2 -> %2,%6,%6}}
152142 %a_shared , %b_shared , %c_shared = scf.for %iv = %lb to %ub step %step iter_args (%a_shared = %a_shared_init , %b_shared = %b_shared_init , %c_shared = %c_shared_init ) ->
153143 (!ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >) {
154- // CHECK-NEXT: %arg11 -> %2,%6,%6
155- // CHECK-NEXT: %4 -> %2,%6,%6
144+ // expected-remark @below {{ %arg11 -> %2,%6,%6}}
145+ // expected-remark @below {{ %4 -> %2,%6,%6}}
156146 %c_shared_next = scf.for %jv = %lb to %ub step %step iter_args (%c_shared_next = %c_shared ) -> (!ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >) {
157- // CHECK-NEXT: %5 -> %6,%6
147+ // expected-remark @below {{ %5 -> %6,%6}}
158148 %c_shared_next_next = scf.if %i1 -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > {
159- // CHECK-NEXT: %6 -> %6
149+ // expected-remark @below {{ %6 -> %6}}
160150 %cst0 = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
161151 scf.yield %cst0 : !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
162152 } else {
163- // CHECK-NEXT: %6 -> %6
153+ // expected-remark @below {{ %6 -> %6}}
164154 %cst0 = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
165155 scf.yield %cst0 : !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
166156 }
@@ -171,32 +161,31 @@ tt.func @for_for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>,
171161 tt.return
172162}
173163
174- // CHECK-LABEL: cf_for
175164tt.func @cf_for (%arg0: index , %arg1: index , %arg2: index , %arg3: !tt.ptr <f16 >, %arg4: !tt.ptr <f16 >) {
176165 %idx = arith.constant 0 : i32
177- // CHECK: %0 -> %0
166+ // expected-remark @below {{ %0 -> %0}}
178167 %cst = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
179- // CHECK-NEXT: %1 -> %1
168+ // expected-remark @below {{ %1 -> %1}}
180169 %cst_0 = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
181- // CHECK-NEXT: %2 -> %0
170+ // expected-remark @below {{ %2 -> %0}}
182171 %0 = ttg.memdesc_subview %cst [%idx , %idx ] : !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
183172 gpu.barrier
184- // CHECK-NEXT: %3 -> %3
173+ // expected-remark @below {{ %3 -> %3}}
185174 %cst_1 = ttg.local_alloc : () -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
186- // CHECK-NEXT: %5 -> %0,%1,%3
187- // CHECK-NEXT: %6 -> %0,%1,%3
188- // CHECK-NEXT: %7 -> %0,%1,%3
189175 cf.br ^bb1 (%arg0 , %cst , %cst_0 , %cst_1 : index , !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >)
190176^bb1 (%1: index , %2: !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, %3: !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, %4: !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >): // 2 preds: ^bb0, ^bb2
191177 %5 = arith.cmpi slt , %1 , %arg1 : index
178+ // expected-remark @below {{%5 -> %0,%1,%3}}
179+ // expected-remark @below {{%6 -> %0,%1,%3}}
180+ // expected-remark @below {{%7 -> %0,%1,%3}}
192181 cf.cond_br %5 , ^bb2 , ^bb3
193182^bb2 : // pred: ^bb1
194183 gpu.barrier
195184 %8 = arith.addi %1 , %arg2 : index
196185 cf.br ^bb1 (%8 , %4 , %2 , %3 : index , !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >, !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >)
197186^bb3 : // pred: ^bb1
198187 gpu.barrier
199- // CHECK-NEXT: %10 -> %0
188+ // expected-remark @below {{ %10 -> %0}}
200189 %9 = ttg.memdesc_subview %0 [%idx , %idx ] : !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> !ttg.memdesc <128 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
201190 tt.return
202191}
0 commit comments