@@ -24,7 +24,7 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
2424 // COM: Start of ttg.local_load. Load the value from SLM to register.
2525 // CHECK: %[[WORK_ITEM_ID_:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[CST_0]])
2626 // CHECK: %[[WORK_ITEM_ID:.*]] = llvm.trunc %[[WORK_ITEM_ID_]] : i64 to i32
27- // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} {alignment = 2 : i64} : !llvm.ptr<3> -> vector<1xf16>
27+ // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> vector<1xf16>
2828 %AA_DOT = ttg.local_load %AA : !ttg.memdesc <128 x64 xf16 , #shared , #ttg.shared_memory > -> tensor <128 x64 xf16 , #dot_operand_a >
2929
3030 %cst0 = arith.constant dense <0.000000e+00 > : tensor <128 x256 xf32 , #dpas >
@@ -62,7 +62,7 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
6262 // COM: Start of ttg.local_load. Load the value from SLM to register.
6363 // CHECK: %[[WORK_ITEM_ID_:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[CST_0]])
6464 // CHECK: %[[WORK_ITEM_ID:.*]] = llvm.trunc %[[WORK_ITEM_ID_]] : i64 to i32
65- // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} {alignment = 2 : i64} : !llvm.ptr<3> -> vector<1xf16>
65+ // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> vector<1xf16>
6666 %AA_DOT = ttg.local_load %AA : !ttg.memdesc <128 x64 xf16 , #shared , #ttg.shared_memory > -> tensor <128 x64 xf16 , #dot_operand_a >
6767
6868 %cst0 = arith.constant dense <0.000000e+00 > : tensor <128 x256 xf32 , #dpas >
@@ -87,23 +87,21 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
8787 // CHECK-SAME: %[[PTR_1:.*]]: !llvm.ptr<1>)
8888 // CHECK-SAME: attributes {intel_reqd_sub_group_size = 16 : i32, {{.*}}} {
8989 tt.func @convert_dot (%B: tensor <64 x256 xf16 , #blocked1 >) {
90- // CHECK-DAG: %[[CST_128:.*]] = llvm.mlir.constant(128 : i32) : i32
91- // CHECK-DAG: %[[CST_256:.*]] = llvm.mlir.constant(256 : i32) : i32
92- // CHECK-DAG: %[[CST_32:.*]] = llvm.mlir.constant(32 : i32) : i32
93- // CHECK-DAG: %[[CST_4:.*]] = llvm.mlir.constant(4 : i32) : i32
94- // CHECK-DAG: %[[CST_2:.*]] = llvm.mlir.constant(2 : i32) : i32
90+ // CHECK-DAG: %[[CST_14:.*]] = llvm.mlir.constant(14 : i32) : i32
91+ // CHECK-DAG: %[[CST_13:.*]] = llvm.mlir.constant(13 : i32) : i32
92+ // CHECK-DAG: %[[CST_12:.*]] = llvm.mlir.constant(12 : i32) : i32
93+ // CHECK-DAG: %[[CST_11:.*]] = llvm.mlir.constant(11 : i32) : i32
94+ // CHECK-DAG: %[[CST_10:.*]] = llvm.mlir.constant(10 : i32) : i32
95+ // CHECK-DAG: %[[CST_9:.*]] = llvm.mlir.constant(9 : i32) : i32
9596 // CHECK-DAG: %[[CST_8:.*]] = llvm.mlir.constant(8 : i32) : i32
96- // CHECK-DAG: %[[CST_16:.*]] = llvm.mlir.constant(16 : i32) : i32
97- // CHECK-DAG: %[[CST_64:.*]] = llvm.mlir.constant(64 : i32) : i32
98- // CHECK-DAG: %[[CST_1:.*]] = llvm.mlir.constant(1 : i32) : i32
9997 // CHECK-DAG: %[[CST_0:.*]] = llvm.mlir.constant(0 : i32) : i32
10098 %BB = ttg.local_alloc %B : (tensor <64 x256 xf16 , #blocked1 >) -> !ttg.memdesc <64 x256 xf16 , #shared , #ttg.shared_memory >
10199
102100 // CHECK: llvm.call spir_funccc @_Z7barrierj
103101 // COM: Start of ttg.local_load. Load the value from SLM to register.
104102 // CHECK: %[[WORK_ITEM_ID_:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[CST_0]])
105103 // CHECK: %[[WORK_ITEM_ID:.*]] = llvm.trunc %[[WORK_ITEM_ID_]] : i64 to i32
106- // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} {alignment = 2 : i64} : !llvm.ptr<3> -> vector<1xf16>
104+ // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> vector<1xf16>
107105 %BB_DOT = ttg.local_load %BB : !ttg.memdesc <64 x256 xf16 , #shared , #ttg.shared_memory > -> tensor <64 x256 xf16 , #dot_operand_b >
108106 %cst0 = arith.constant dense <0.000000e+00 > : tensor <128 x256 xf32 , #dpas >
109107 %cst1 = arith.constant dense <0.000000e+00 > : tensor <128 x64 xf16 , #dot_operand_a >
0 commit comments