@@ -19,6 +19,7 @@ func.func @zero_za_b() {
1919func.func @zero_za_h () {
2020 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}> : () -> ()
2121 %zero_za0h = arm_sme.zero : vector <[8 ]x[8 ]xi16 >
22+ " test.prevent_zero_merge" () : () -> ()
2223 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 170 : i32}> : () -> ()
2324 %zero_za1h = arm_sme.zero : vector <[8 ]x[8 ]xf16 >
2425 " test.some_use" (%zero_za0h ) : (vector <[8 ]x[8 ]xi16 >) -> ()
@@ -32,10 +33,13 @@ func.func @zero_za_h() {
3233func.func @zero_za_s () {
3334 // CHECK: arm_sme.intr.zero"() <{tile_mask = 17 : i32}> : () -> ()
3435 %zero_za0s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
36+ " test.prevent_zero_merge" () : () -> ()
3537 // CHECK: arm_sme.intr.zero"() <{tile_mask = 34 : i32}> : () -> ()
3638 %zero_za1s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
39+ " test.prevent_zero_merge" () : () -> ()
3740 // CHECK: arm_sme.intr.zero"() <{tile_mask = 68 : i32}> : () -> ()
3841 %zero_za2s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
42+ " test.prevent_zero_merge" () : () -> ()
3943 // CHECK: arm_sme.intr.zero"() <{tile_mask = 136 : i32}> : () -> ()
4044 %zero_za3s = arm_sme.zero : vector <[4 ]x[4 ]xf32 >
4145 " test.some_use" (%zero_za0s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
@@ -51,18 +55,25 @@ func.func @zero_za_s() {
5155func.func @zero_za_d () {
5256 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 1 : i32}> : () -> ()
5357 %zero_za0d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
58+ " test.prevent_zero_merge" () : () -> ()
5459 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 2 : i32}> : () -> ()
5560 %zero_za1d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
61+ " test.prevent_zero_merge" () : () -> ()
5662 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 4 : i32}> : () -> ()
5763 %zero_za2d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
64+ " test.prevent_zero_merge" () : () -> ()
5865 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 8 : i32}> : () -> ()
5966 %zero_za3d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
67+ " test.prevent_zero_merge" () : () -> ()
6068 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 16 : i32}> : () -> ()
6169 %zero_za4d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
70+ " test.prevent_zero_merge" () : () -> ()
6271 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 32 : i32}> : () -> ()
6372 %zero_za5d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
73+ " test.prevent_zero_merge" () : () -> ()
6474 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 64 : i32}> : () -> ()
6575 %zero_za6d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
76+ " test.prevent_zero_merge" () : () -> ()
6677 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 128 : i32}> : () -> ()
6778 %zero_za7d = arm_sme.zero : vector <[2 ]x[2 ]xf64 >
6879 " test.some_use" (%zero_za0d ) : (vector <[2 ]x[2 ]xi64 >) -> ()
@@ -75,3 +86,45 @@ func.func @zero_za_d() {
7586 " test.some_use" (%zero_za7d ) : (vector <[2 ]x[2 ]xf64 >) -> ()
7687 return
7788}
89+
90+ // -----
91+
92+ // CHECK-LABEL: merge_consecutive_tile_zero_ops
93+ func.func @merge_consecutive_tile_zero_ops () {
94+ // CHECK-NOT: arm_sme.intr.zero
95+ // CHECK: "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> ()
96+ // CHECK-NOT: arm_sme.intr.zero
97+ %zero_za0s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
98+ %zero_za1s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
99+ %zero_za2s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
100+ %zero_za3s = arm_sme.zero : vector <[4 ]x[4 ]xf32 >
101+ " test.some_use" (%zero_za0s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
102+ " test.some_use" (%zero_za1s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
103+ " test.some_use" (%zero_za2s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
104+ " test.some_use" (%zero_za3s ) : (vector <[4 ]x[4 ]xf32 >) -> ()
105+ return
106+ }
107+
108+ // -----
109+
110+ /// arm_sme.intr.zero intrinsics are not merged when there is an op other than
111+ /// arm_sme.intr.zero between them.
112+
113+ // CHECK-LABEL: merge_consecutive_tile_zero_ops_with_barrier
114+ func.func @merge_consecutive_tile_zero_ops_with_barrier () {
115+ // CHECK-NOT: arm_sme.intr.zero
116+ // CHECK: "arm_sme.intr.zero"() <{tile_mask = 51 : i32}> : () -> ()
117+ // CHECK-NOT: arm_sme.intr.zero
118+ %zero_za0s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
119+ %zero_za1s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
120+ " test.prevent_zero_merge" () : () -> ()
121+ // CHECK: "arm_sme.intr.zero"() <{tile_mask = 204 : i32}> : () -> ()
122+ // CHECK-NOT: arm_sme.intr.zero
123+ %zero_za2s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
124+ %zero_za3s = arm_sme.zero : vector <[4 ]x[4 ]xf32 >
125+ " test.some_use" (%zero_za0s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
126+ " test.some_use" (%zero_za1s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
127+ " test.some_use" (%zero_za2s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
128+ " test.some_use" (%zero_za3s ) : (vector <[4 ]x[4 ]xf32 >) -> ()
129+ return
130+ }
0 commit comments