@@ -24,7 +24,7 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
24
24
// COM: Start of ttg.local_load. Load the value from SLM to register.
25
25
// CHECK: %[[WORK_ITEM_ID_:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[CST_0]])
26
26
// CHECK: %[[WORK_ITEM_ID:.*]] = llvm.trunc %[[WORK_ITEM_ID_]] : i64 to i32
27
- // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} {alignment = 2 : i64} : !llvm.ptr<3> -> vector<1xf16>
27
+ // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> vector<1xf16>
28
28
%AA_DOT = ttg.local_load %AA : !ttg.memdesc <128 x64 xf16 , #shared , #ttg.shared_memory > -> tensor <128 x64 xf16 , #dot_operand_a >
29
29
30
30
%cst0 = arith.constant dense <0.000000e+00 > : tensor <128 x256 xf32 , #dpas >
@@ -62,7 +62,7 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
62
62
// COM: Start of ttg.local_load. Load the value from SLM to register.
63
63
// CHECK: %[[WORK_ITEM_ID_:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[CST_0]])
64
64
// CHECK: %[[WORK_ITEM_ID:.*]] = llvm.trunc %[[WORK_ITEM_ID_]] : i64 to i32
65
- // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} {alignment = 2 : i64} : !llvm.ptr<3> -> vector<1xf16>
65
+ // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> vector<1xf16>
66
66
%AA_DOT = ttg.local_load %AA : !ttg.memdesc <128 x64 xf16 , #shared , #ttg.shared_memory > -> tensor <128 x64 xf16 , #dot_operand_a >
67
67
68
68
%cst0 = arith.constant dense <0.000000e+00 > : tensor <128 x256 xf32 , #dpas >
@@ -87,23 +87,21 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
87
87
// CHECK-SAME: %[[PTR_1:.*]]: !llvm.ptr<1>)
88
88
// CHECK-SAME: attributes {intel_reqd_sub_group_size = 16 : i32, {{.*}}} {
89
89
tt.func @convert_dot (%B: tensor <64 x256 xf16 , #blocked1 >) {
90
- // CHECK-DAG: %[[CST_128:.*]] = llvm.mlir.constant(128 : i32) : i32
91
- // CHECK-DAG: %[[CST_256:.*]] = llvm.mlir.constant(256 : i32) : i32
92
- // CHECK-DAG: %[[CST_32:.*]] = llvm.mlir.constant(32 : i32) : i32
93
- // CHECK-DAG: %[[CST_4:.*]] = llvm.mlir.constant(4 : i32) : i32
94
- // CHECK-DAG: %[[CST_2:.*]] = llvm.mlir.constant(2 : i32) : i32
90
+ // CHECK-DAG: %[[CST_14:.*]] = llvm.mlir.constant(14 : i32) : i32
91
+ // CHECK-DAG: %[[CST_13:.*]] = llvm.mlir.constant(13 : i32) : i32
92
+ // CHECK-DAG: %[[CST_12:.*]] = llvm.mlir.constant(12 : i32) : i32
93
+ // CHECK-DAG: %[[CST_11:.*]] = llvm.mlir.constant(11 : i32) : i32
94
+ // CHECK-DAG: %[[CST_10:.*]] = llvm.mlir.constant(10 : i32) : i32
95
+ // CHECK-DAG: %[[CST_9:.*]] = llvm.mlir.constant(9 : i32) : i32
95
96
// CHECK-DAG: %[[CST_8:.*]] = llvm.mlir.constant(8 : i32) : i32
96
- // CHECK-DAG: %[[CST_16:.*]] = llvm.mlir.constant(16 : i32) : i32
97
- // CHECK-DAG: %[[CST_64:.*]] = llvm.mlir.constant(64 : i32) : i32
98
- // CHECK-DAG: %[[CST_1:.*]] = llvm.mlir.constant(1 : i32) : i32
99
97
// CHECK-DAG: %[[CST_0:.*]] = llvm.mlir.constant(0 : i32) : i32
100
98
%BB = ttg.local_alloc %B : (tensor <64 x256 xf16 , #blocked1 >) -> !ttg.memdesc <64 x256 xf16 , #shared , #ttg.shared_memory >
101
99
102
100
// CHECK: llvm.call spir_funccc @_Z7barrierj
103
101
// COM: Start of ttg.local_load. Load the value from SLM to register.
104
102
// CHECK: %[[WORK_ITEM_ID_:.*]] = llvm.call spir_funccc @_Z12get_local_idj(%[[CST_0]])
105
103
// CHECK: %[[WORK_ITEM_ID:.*]] = llvm.trunc %[[WORK_ITEM_ID_]] : i64 to i32
106
- // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} {alignment = 2 : i64} : !llvm.ptr<3> -> vector<1xf16>
104
+ // CHECK-COUNT-128: %[[LD_RES:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> vector<1xf16>
107
105
%BB_DOT = ttg.local_load %BB : !ttg.memdesc <64 x256 xf16 , #shared , #ttg.shared_memory > -> tensor <64 x256 xf16 , #dot_operand_b >
108
106
%cst0 = arith.constant dense <0.000000e+00 > : tensor <128 x256 xf32 , #dpas >
109
107
%cst1 = arith.constant dense <0.000000e+00 > : tensor <128 x64 xf16 , #dot_operand_a >
0 commit comments