@@ -201,22 +201,22 @@ tt.func @longlive(%A : !tt.ptr<f16>) {
201
201
202
202
// This example triggers graph coloring with > 1 colors.
203
203
// expected-remark @below {{multi_color}}
204
- // expected-remark @below {{size = 1376 }}
204
+ // expected-remark @below {{size = 1504 }}
205
205
tt.func @multi_color (%A : !tt.ptr <f16 >) {
206
- // expected-remark @below {{offset = 1024 , size = 64}}
206
+ // expected-remark @below {{offset = 1152 , size = 64}}
207
207
%cst = ttg.local_alloc : () -> !ttg.memdesc <4 x8 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
208
- // expected-remark @below {{offset = 1344 , size = 32}}
208
+ // expected-remark @below {{offset = 1472 , size = 32}}
209
209
%cst_0 = ttg.local_alloc : () -> !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
210
- // expected-remark @below {{offset = 1088 , size = 128}}
210
+ // expected-remark @below {{offset = 1216 , size = 128}}
211
211
%cst_1 = ttg.local_alloc : () -> !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
212
212
%cst_2 = arith.constant dense <0.000000e+00 > : tensor <16 x32 xf16 , #AL >
213
- // expected-remark @below {{scratch offset = 0, size = 1024 }}
213
+ // expected-remark @below {{scratch offset = 0, size = 1152 }}
214
214
%0 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
215
215
%1 = ttg.local_load %cst : !ttg.memdesc <4 x8 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x8 xf16 , #AL >
216
216
// expected-remark @below {{offset = 0, size = 128}}
217
217
%cst_3 = ttg.local_alloc : () -> !ttg.memdesc <4 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
218
218
%2 = ttg.local_load %cst_0 : !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x4 xf16 , #AL >
219
- // expected-remark @below {{scratch offset = 0, size = 1024 }}
219
+ // expected-remark @below {{scratch offset = 0, size = 1152 }}
220
220
%3 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
221
221
// expected-remark @below {{offset = 512, size = 256}}
222
222
%cst_4 = ttg.local_alloc : () -> !ttg.memdesc <4 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
@@ -226,7 +226,7 @@ tt.func @multi_color(%A : !tt.ptr<f16>) {
226
226
%5 = ttg.local_load %cst_5 : !ttg.memdesc <4 x8 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x8 xf16 , #AL >
227
227
// expected-remark @below {{offset = 0, size = 512}}
228
228
%cst_6 = ttg.local_alloc : () -> !ttg.memdesc <8 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
229
- // expected-remark @below {{offset = 1216 , size = 128}}
229
+ // expected-remark @below {{offset = 1344 , size = 128}}
230
230
%cst_7 = ttg.local_alloc : () -> !ttg.memdesc <2 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
231
231
%6 = ttg.local_load %cst_0 : !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x4 xf16 , #AL >
232
232
// expected-remark @below {{offset = 0, size = 512}}
@@ -237,7 +237,7 @@ tt.func @multi_color(%A : !tt.ptr<f16>) {
237
237
%cst_10 = ttg.local_alloc : () -> !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
238
238
%7 = ttg.local_load %cst_1 : !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <16 x4 xf16 , #AL >
239
239
%8 = ttg.local_load %cst_4 : !ttg.memdesc <4 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x32 xf16 , #AL >
240
- // expected-remark @below {{scratch offset = 0, size = 1024 }}
240
+ // expected-remark @below {{scratch offset = 0, size = 1152 }}
241
241
%9 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
242
242
%cst_11 = arith.constant dense <0.000000e+00 > : tensor <4 x4 xf16 , #AL >
243
243
%10 = ttg.local_load %cst_7 : !ttg.memdesc <2 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <2 x32 xf16 , #AL >
@@ -248,16 +248,16 @@ tt.func @multi_color(%A : !tt.ptr<f16>) {
248
248
249
249
// This example triggers graph coloring with multiple rounds
250
250
// expected-remark @below {{multi_color_multi_rounds}}
251
- // expected-remark @below {{size = 9376 }}
251
+ // expected-remark @below {{size = 9504 }}
252
252
tt.func @multi_color_multi_rounds (%arg0: !tt.ptr <f16 >) {
253
- // expected-remark @below {{offset = 9344 , size = 32}}
253
+ // expected-remark @below {{offset = 9472 , size = 32}}
254
254
%cst = ttg.local_alloc : () -> !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
255
- // expected-remark @below {{offset = 9216 , size = 128}}
255
+ // expected-remark @below {{offset = 9344 , size = 128}}
256
256
%cst_0 = ttg.local_alloc : () -> !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
257
257
// expected-remark @below {{offset = 0, size = 8192}}
258
258
%cst_1 = ttg.local_alloc : () -> !ttg.memdesc <1024 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
259
259
%cst_2 = arith.constant dense <0.000000e+00 > : tensor <16 x32 xf16 , #AL >
260
- // expected-remark @below {{scratch offset = 8192, size = 1024 }}
260
+ // expected-remark @below {{scratch offset = 8192, size = 1152 }}
261
261
%0 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
262
262
%1 = ttg.local_load %cst : !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x4 xf16 , #AL >
263
263
// expected-remark @below {{offset = 8704, size = 128}}
@@ -267,7 +267,7 @@ tt.func @multi_color_multi_rounds(%arg0: !tt.ptr<f16>) {
267
267
%cst_4 = ttg.local_alloc : () -> !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
268
268
%3 = ttg.local_load %cst_0 : !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <16 x4 xf16 , #AL >
269
269
%4 = ttg.local_load %cst_1 : !ttg.memdesc <1024 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <1024 x4 xf16 , #AL >
270
- // expected-remark @below {{scratch offset = 0, size = 1024 }}
270
+ // expected-remark @below {{scratch offset = 0, size = 1152 }}
271
271
%5 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
272
272
%6 = ttg.local_load %cst_3 : !ttg.memdesc <2 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <2 x32 xf16 , #AL >
273
273
tt.return
0 commit comments