@@ -201,22 +201,22 @@ tt.func @longlive(%A : !tt.ptr<f16>) {
201201
202202// This example triggers graph coloring with > 1 colors.
203203// expected-remark @below {{multi_color}}
204- // expected-remark @below {{size = 1504 }}
204+ // expected-remark @below {{size = 1376 }}
205205tt.func @multi_color (%A : !tt.ptr <f16 >) {
206- // expected-remark @below {{offset = 1152 , size = 64}}
206+ // expected-remark @below {{offset = 1024 , size = 64}}
207207 %cst = ttg.local_alloc : () -> !ttg.memdesc <4 x8 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
208- // expected-remark @below {{offset = 1472 , size = 32}}
208+ // expected-remark @below {{offset = 1344 , size = 32}}
209209 %cst_0 = ttg.local_alloc : () -> !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
210- // expected-remark @below {{offset = 1216 , size = 128}}
210+ // expected-remark @below {{offset = 1088 , size = 128}}
211211 %cst_1 = ttg.local_alloc : () -> !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
212212 %cst_2 = arith.constant dense <0.000000e+00 > : tensor <16 x32 xf16 , #AL >
213- // expected-remark @below {{scratch offset = 0, size = 1152 }}
213+ // expected-remark @below {{scratch offset = 0, size = 1024 }}
214214 %0 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
215215 %1 = ttg.local_load %cst : !ttg.memdesc <4 x8 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x8 xf16 , #AL >
216216 // expected-remark @below {{offset = 0, size = 128}}
217217 %cst_3 = ttg.local_alloc : () -> !ttg.memdesc <4 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
218218 %2 = ttg.local_load %cst_0 : !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x4 xf16 , #AL >
219- // expected-remark @below {{scratch offset = 0, size = 1152 }}
219+ // expected-remark @below {{scratch offset = 0, size = 1024 }}
220220 %3 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
221221 // expected-remark @below {{offset = 512, size = 256}}
222222 %cst_4 = ttg.local_alloc : () -> !ttg.memdesc <4 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
@@ -226,7 +226,7 @@ tt.func @multi_color(%A : !tt.ptr<f16>) {
226226 %5 = ttg.local_load %cst_5 : !ttg.memdesc <4 x8 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x8 xf16 , #AL >
227227 // expected-remark @below {{offset = 0, size = 512}}
228228 %cst_6 = ttg.local_alloc : () -> !ttg.memdesc <8 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
229- // expected-remark @below {{offset = 1344 , size = 128}}
229+ // expected-remark @below {{offset = 1216 , size = 128}}
230230 %cst_7 = ttg.local_alloc : () -> !ttg.memdesc <2 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
231231 %6 = ttg.local_load %cst_0 : !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x4 xf16 , #AL >
232232 // expected-remark @below {{offset = 0, size = 512}}
@@ -237,7 +237,7 @@ tt.func @multi_color(%A : !tt.ptr<f16>) {
237237 %cst_10 = ttg.local_alloc : () -> !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
238238 %7 = ttg.local_load %cst_1 : !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <16 x4 xf16 , #AL >
239239 %8 = ttg.local_load %cst_4 : !ttg.memdesc <4 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x32 xf16 , #AL >
240- // expected-remark @below {{scratch offset = 0, size = 1152 }}
240+ // expected-remark @below {{scratch offset = 0, size = 1024 }}
241241 %9 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
242242 %cst_11 = arith.constant dense <0.000000e+00 > : tensor <4 x4 xf16 , #AL >
243243 %10 = ttg.local_load %cst_7 : !ttg.memdesc <2 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <2 x32 xf16 , #AL >
@@ -248,16 +248,16 @@ tt.func @multi_color(%A : !tt.ptr<f16>) {
248248
249249// This example triggers graph coloring with multiple rounds
250250// expected-remark @below {{multi_color_multi_rounds}}
251- // expected-remark @below {{size = 9504 }}
251+ // expected-remark @below {{size = 9376 }}
252252tt.func @multi_color_multi_rounds (%arg0: !tt.ptr <f16 >) {
253- // expected-remark @below {{offset = 9472 , size = 32}}
253+ // expected-remark @below {{offset = 9344 , size = 32}}
254254 %cst = ttg.local_alloc : () -> !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
255- // expected-remark @below {{offset = 9344 , size = 128}}
255+ // expected-remark @below {{offset = 9216 , size = 128}}
256256 %cst_0 = ttg.local_alloc : () -> !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
257257 // expected-remark @below {{offset = 0, size = 8192}}
258258 %cst_1 = ttg.local_alloc : () -> !ttg.memdesc <1024 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
259259 %cst_2 = arith.constant dense <0.000000e+00 > : tensor <16 x32 xf16 , #AL >
260- // expected-remark @below {{scratch offset = 8192, size = 1152 }}
260+ // expected-remark @below {{scratch offset = 8192, size = 1024 }}
261261 %0 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
262262 %1 = ttg.local_load %cst : !ttg.memdesc <4 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <4 x4 xf16 , #AL >
263263 // expected-remark @below {{offset = 8704, size = 128}}
@@ -267,7 +267,7 @@ tt.func @multi_color_multi_rounds(%arg0: !tt.ptr<f16>) {
267267 %cst_4 = ttg.local_alloc : () -> !ttg.memdesc <1 x16 x16 xf16 , #A_SHARED , #ttg.shared_memory , mutable >
268268 %3 = ttg.local_load %cst_0 : !ttg.memdesc <16 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <16 x4 xf16 , #AL >
269269 %4 = ttg.local_load %cst_1 : !ttg.memdesc <1024 x4 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <1024 x4 xf16 , #AL >
270- // expected-remark @below {{scratch offset = 0, size = 1152 }}
270+ // expected-remark @below {{scratch offset = 0, size = 1024 }}
271271 %5 = ttg.convert_layout %cst_2 : tensor <16 x32 xf16 , #AL > -> tensor <16 x32 xf16 , #BL >
272272 %6 = ttg.local_load %cst_3 : !ttg.memdesc <2 x32 xf16 , #A_SHARED , #ttg.shared_memory , mutable > -> tensor <2 x32 xf16 , #AL >
273273 tt.return
0 commit comments