@@ -81,7 +81,6 @@ module attributes {"ttg.num-ctas" = 8 : i32, "ttg.num-warps" = 4 : i32, ttg.shar
8181 // CHECK-LABEL: async_load_multicast_to_half_ctas
8282 tt.func public @async_load_multicast_to_half_ctas (%arg0: tensor <32 x32 x!tt.ptr <f32 >, #blocked > {tt.divisibility = dense <[16 , 16 ]> : tensor <2 xi32 >, tt.contiguity = dense <[16 , 16 ]> : tensor <2 xi32 >, tt.constancy = dense <[1 , 1 ]> : tensor <2 xi32 >},
8383 %arg1: !ttg.memdesc <32 x32 xf32 , #shared , #smem , mutable >) {
84- // CHECK: llvm.amdgcn.cluster.workgroup.id.x
8584 // CHECK: %[[CTA_ID:.*]] = {{.*}}llvm.amdgcn.cluster.workgroup.id.x
8685 // CHECK: %[[NON_FREE_BITS:.*]] = llvm.mlir.constant(-7 : i32) : i32
8786 // CHECK: %[[SHIFT_AMOUNT:.*]] = llvm.and %[[CTA_ID]], %[[NON_FREE_BITS]]
@@ -104,7 +103,6 @@ module attributes {"ttg.num-ctas" = 16 : i32, "ttg.num-warps" = 4 : i32, ttg.sha
104103 tt.func public @async_load_multicast_group_of_2_strided_by_8 (%arg0: tensor <32 x32 x!tt.ptr <f32 >, #blocked > {tt.divisibility = dense <[16 , 16 ]> : tensor <2 xi32 >, tt.contiguity = dense <[16 , 16 ]> : tensor <2 xi32 >, tt.constancy = dense <[1 , 1 ]> : tensor <2 xi32 >},
105104 %arg1: !ttg.memdesc <32 x32 xf32 , #shared , #smem , mutable >) {
106105 // Skip the first cluster id because it's emitted for address calculation
107- // CHECK: llvm.amdgcn.cluster.workgroup.id.x
108106 // CHECK: %[[CTA_ID:.*]] = {{.*}}llvm.amdgcn.cluster.workgroup.id.x
109107 // CHECK: %[[NON_FREE_BITS:.*]] = llvm.mlir.constant(-9 : i32) : i32
110108 // CHECK: %[[SHIFT_AMOUNT:.*]] = llvm.and %[[CTA_ID]], %[[NON_FREE_BITS]]
@@ -146,7 +144,6 @@ module attributes {"ttg.num-ctas" = 16 : i32, "ttg.num-warps" = 4 : i32, ttg.sha
146144 tt.func public @async_load_multi_cta_linear_layout (%arg0: tensor <32 x32 x!tt.ptr <f32 >, #linear > {tt.divisibility = dense <[16 , 16 ]> : tensor <2 xi32 >, tt.contiguity = dense <[16 , 16 ]> : tensor <2 xi32 >, tt.constancy = dense <[1 , 1 ]> : tensor <2 xi32 >},
147145 %arg1: !ttg.memdesc <32 x32 xf32 , #shared , #smem , mutable >) {
148146 // Skip the first cluster id because it's emitted for address calculation
149- // CHECK: llvm.amdgcn.cluster.workgroup.id.x
150147 // CHECK: %[[CTA_ID:.*]] = {{.*}}llvm.amdgcn.cluster.workgroup.id.x
151148 // CHECK: %[[NON_FREE_BITS:.*]] = llvm.mlir.constant(-9 : i32) : i32
152149 // CHECK: %[[SHIFT_AMOUNT:.*]] = llvm.and %[[CTA_ID]], %[[NON_FREE_BITS]]
0 commit comments