@@ -193,25 +193,13 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 16 : i32, ttg.sha
193193 // CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_ca]]
194194 %2 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = ca : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
195195 // CHECK: llvm.getelementptr
196- // CHECK: %[[aux_cg:.*]] = llvm.mlir.constant(0 : i32) : i32
196+ // CHECK: %[[aux_cg:.*]] = llvm.mlir.constant(3 : i32) : i32
197197 // CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_cg]]
198198 %3 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = cg : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
199199 // CHECK: llvm.getelementptr
200- // CHECK: %[[aux_cs:.*]] = llvm.mlir.constant(3 : i32) : i32
201- // CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_cs]]
202- %5 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = cs : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
203- // CHECK: llvm.getelementptr
204- // CHECK: %[[aux_cv:.*]] = llvm.mlir.constant(9 : i32) : i32
200+ // CHECK: %[[aux_cv:.*]] = llvm.mlir.constant(11 : i32) : i32
205201 // CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_cv]]
206- %6 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = cv : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
207- // CHECK: llvm.getelementptr
208- // CHECK: %[[aux_wb:.*]] = llvm.mlir.constant(0 : i32) : i32
209- // CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_wb]]
210- %7 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = wb : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
211- // CHECK: llvm.getelementptr
212- // CHECK: %[[aux_wt:.*]] = llvm.mlir.constant(8 : i32) : i32
213- // CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_wt]]
214- %8 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = wt : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
202+ %4 = ttg.async_copy_global_to_local %1 , %arg2 cacheModifier = cv : tensor <32 x32 x!tt.ptr <f16 >, #blocked > -> <32 x32 xf16 , #shared , #smem , mutable >
215203 tt.return
216204 }
217205}
0 commit comments