@@ -8,7 +8,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
88 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
99 %0 = tt.get_program_id x : i32
1010 %M_i64 = arith.constant 16 : i64
11- %N_i64 = arith.constant 16 : i64
11+ %N_i64 = arith.constant 64 : i64
1212 %c1_i64 = arith.constant 1 : i64
1313 %c0_i32 = arith.constant 0 : i32
1414
@@ -29,7 +29,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
2929 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
3030 %0 = tt.get_program_id x : i32
3131 %M_i64 = arith.constant 16 : i64
32- %N_i64 = arith.constant 16 : i64
32+ %N_i64 = arith.constant 64 : i64
3333 %c1_i64 = arith.constant 1 : i64
3434 %c0_i32 = arith.constant 0 : i32
3535
@@ -50,7 +50,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
5050 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
5151 %0 = tt.get_program_id x : i32
5252 %M_i64 = arith.constant 16 : i64
53- %N_i64 = arith.constant 16 : i64
53+ %N_i64 = arith.constant 64 : i64
5454 %c1_i64 = arith.constant 1 : i64
5555 %c0_i32 = arith.constant 0 : i32
5656
@@ -71,7 +71,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
7171 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
7272 %0 = tt.get_program_id x : i32
7373 %M_i64 = arith.constant 16 : i64
74- %N_i64 = arith.constant 16 : i64
74+ %N_i64 = arith.constant 64 : i64
7575 %c1_i64 = arith.constant 1 : i64
7676 %c0_i32 = arith.constant 0 : i32
7777
@@ -92,7 +92,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
9292 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
9393 %0 = tt.get_program_id x : i32
9494 %M_i64 = arith.constant 32 : i64
95- %N_i64 = arith.constant 16 : i64
95+ %N_i64 = arith.constant 64 : i64
9696 %c1_i64 = arith.constant 1 : i64
9797 %c0_i32 = arith.constant 0 : i32
9898
@@ -113,7 +113,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
113113 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
114114 %0 = tt.get_program_id x : i32
115115 %M_i64 = arith.constant 32 : i64
116- %N_i64 = arith.constant 16 : i64
116+ %N_i64 = arith.constant 64 : i64
117117 %c1_i64 = arith.constant 1 : i64
118118 %c0_i32 = arith.constant 0 : i32
119119
@@ -134,7 +134,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
134134 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
135135 %0 = tt.get_program_id x : i32
136136 %M_i64 = arith.constant 32 : i64
137- %N_i64 = arith.constant 16 : i64
137+ %N_i64 = arith.constant 64 : i64
138138 %c1_i64 = arith.constant 1 : i64
139139 %c0_i32 = arith.constant 0 : i32
140140
@@ -155,7 +155,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
155155 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
156156 %0 = tt.get_program_id x : i32
157157 %M_i64 = arith.constant 32 : i64
158- %N_i64 = arith.constant 16 : i64
158+ %N_i64 = arith.constant 64 : i64
159159 %c1_i64 = arith.constant 1 : i64
160160 %c0_i32 = arith.constant 0 : i32
161161
@@ -176,7 +176,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
176176 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
177177 %0 = tt.get_program_id x : i32
178178 %M_i64 = arith.constant 64 : i64
179- %N_i64 = arith.constant 16 : i64
179+ %N_i64 = arith.constant 64 : i64
180180 %c1_i64 = arith.constant 1 : i64
181181 %c0_i32 = arith.constant 0 : i32
182182
@@ -197,7 +197,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
197197 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
198198 %0 = tt.get_program_id x : i32
199199 %M_i64 = arith.constant 64 : i64
200- %N_i64 = arith.constant 16 : i64
200+ %N_i64 = arith.constant 64 : i64
201201 %c1_i64 = arith.constant 1 : i64
202202 %c0_i32 = arith.constant 0 : i32
203203
@@ -218,7 +218,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
218218 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
219219 %0 = tt.get_program_id x : i32
220220 %M_i64 = arith.constant 64 : i64
221- %N_i64 = arith.constant 16 : i64
221+ %N_i64 = arith.constant 64 : i64
222222 %c1_i64 = arith.constant 1 : i64
223223 %c0_i32 = arith.constant 0 : i32
224224
@@ -239,7 +239,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
239239 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
240240 %0 = tt.get_program_id x : i32
241241 %M_i64 = arith.constant 64 : i64
242- %N_i64 = arith.constant 32 : i64
242+ %N_i64 = arith.constant 64 : i64
243243 %c1_i64 = arith.constant 1 : i64
244244 %c0_i32 = arith.constant 0 : i32
245245
@@ -260,7 +260,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
260260 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
261261 %0 = tt.get_program_id x : i32
262262 %M_i64 = arith.constant 64 : i64
263- %N_i64 = arith.constant 32 : i64
263+ %N_i64 = arith.constant 64 : i64
264264 %c1_i64 = arith.constant 1 : i64
265265 %c0_i32 = arith.constant 0 : i32
266266
@@ -281,7 +281,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
281281 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
282282 %0 = tt.get_program_id x : i32
283283 %M_i64 = arith.constant 64 : i64
284- %N_i64 = arith.constant 32 : i64
284+ %N_i64 = arith.constant 64 : i64
285285 %c1_i64 = arith.constant 1 : i64
286286 %c0_i32 = arith.constant 0 : i32
287287
@@ -302,7 +302,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
302302 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
303303 %0 = tt.get_program_id x : i32
304304 %M_i64 = arith.constant 128 : i64
305- %N_i64 = arith.constant 32 : i64
305+ %N_i64 = arith.constant 64 : i64
306306 %c1_i64 = arith.constant 1 : i64
307307 %c0_i32 = arith.constant 0 : i32
308308
@@ -323,7 +323,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
323323 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
324324 %0 = tt.get_program_id x : i32
325325 %M_i64 = arith.constant 256 : i64
326- %N_i64 = arith.constant 32 : i64
326+ %N_i64 = arith.constant 64 : i64
327327 %c1_i64 = arith.constant 1 : i64
328328 %c0_i32 = arith.constant 0 : i32
329329
@@ -344,7 +344,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
344344 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
345345 %0 = tt.get_program_id x : i32
346346 %M_i64 = arith.constant 256 : i64
347- %N_i64 = arith.constant 32 : i64
347+ %N_i64 = arith.constant 64 : i64
348348 %c1_i64 = arith.constant 1 : i64
349349 %c0_i32 = arith.constant 0 : i32
350350
@@ -365,7 +365,7 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
365365 tt.func public @subgroup_2d_block_load (%arg0: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }, %arg3: !tt.ptr <f16 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
366366 %0 = tt.get_program_id x : i32
367367 %M_i64 = arith.constant 256 : i64
368- %N_i64 = arith.constant 32 : i64
368+ %N_i64 = arith.constant 64 : i64
369369 %c1_i64 = arith.constant 1 : i64
370370 %c0_i32 = arith.constant 0 : i32
371371
0 commit comments