@@ -44,20 +44,6 @@ define <vscale x 2 x i64> @mul_neg_fold_i64(<vscale x 2 x i1> %pg, <vscale x 2 x
4444 ret <vscale x 2 x i64 > %2
4545}
4646
47- define <vscale x 8 x i16 > @mul_neg_fold_two_dups (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a ) {
48- ; Edge case -- make sure that the case where we're multiplying two dups
49- ; together is sane.
50- ; CHECK-LABEL: mul_neg_fold_two_dups:
51- ; CHECK: // %bb.0:
52- ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
53- ; CHECK-NEXT: neg z0.h, p0/m, z0.h
54- ; CHECK-NEXT: ret
55- %1 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -1 )
56- %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -1 )
57- %3 = call <vscale x 8 x i16 > @llvm.aarch64.sve.mul.nxv8i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %2 )
58- ret <vscale x 8 x i16 > %3
59- }
60-
6147define <vscale x 16 x i8 > @mul_neg_fold_u_i8 (<vscale x 16 x i1 > %pg , <vscale x 16 x i8 > %a ) {
6248; CHECK-LABEL: mul_neg_fold_u_i8:
6349; CHECK: // %bb.0:
@@ -98,136 +84,54 @@ define <vscale x 2 x i64> @mul_neg_fold_u_i64(<vscale x 2 x i1> %pg, <vscale x 2
9884 ret <vscale x 2 x i64 > %2
9985}
10086
101- define <vscale x 8 x i16 > @mul_neg_fold_u_two_dups (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a ) {
102- ; CHECK-LABEL: mul_neg_fold_u_two_dups:
103- ; CHECK: // %bb.0:
104- ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
105- ; CHECK-NEXT: neg z0.h, p0/m, z0.h
106- ; CHECK-NEXT: ret
107- %1 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -1 )
108- %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -1 )
109- %3 = call <vscale x 8 x i16 > @llvm.aarch64.sve.mul.u.nxv8i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %2 )
110- ret <vscale x 8 x i16 > %3
111- }
112-
113- ; Undefined mul is commutative
114- define <vscale x 16 x i8 > @mul_neg_fold_u_different_argument_order_i8 (<vscale x 16 x i1 > %pg , <vscale x 16 x i8 > %a ) {
115- ; CHECK-LABEL: mul_neg_fold_u_different_argument_order_i8:
87+ define <vscale x 16 x i8 > @mul_neg_fold_different_argument_order_i8 (<vscale x 16 x i1 > %pg , <vscale x 16 x i8 > %a ) {
88+ ; CHECK-LABEL: mul_neg_fold_different_argument_order_i8:
11689; CHECK: // %bb.0:
117- ; CHECK-NEXT: neg z0.b, p0/m, z0.b
90+ ; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff
91+ ; CHECK-NEXT: neg z1.b, p0/m, z0.b
92+ ; CHECK-NEXT: mov z0.d, z1.d
11893; CHECK-NEXT: ret
11994 %1 = call <vscale x 16 x i8 > @llvm.aarch64.sve.dup.x.nxv16i8 (i8 -1 )
120- %2 = call <vscale x 16 x i8 > @llvm.aarch64.sve.mul.u. nxv16i8 (<vscale x 16 x i1 > %pg , <vscale x 16 x i8 > %1 , <vscale x 16 x i8 > %a )
95+ %2 = call <vscale x 16 x i8 > @llvm.aarch64.sve.mul.nxv16i8 (<vscale x 16 x i1 > %pg , <vscale x 16 x i8 > %1 , <vscale x 16 x i8 > %a )
12196 ret <vscale x 16 x i8 > %2
12297}
12398
124- define <vscale x 8 x i16 > @mul_neg_fold_u_different_argument_order_i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a ) {
125- ; CHECK-LABEL: mul_neg_fold_u_different_argument_order_i16 :
99+ define <vscale x 8 x i16 > @mul_neg_fold_different_argument_order_i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a ) {
100+ ; CHECK-LABEL: mul_neg_fold_different_argument_order_i16 :
126101; CHECK: // %bb.0:
127- ; CHECK-NEXT: neg z0.h, p0/m, z0.h
102+ ; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
103+ ; CHECK-NEXT: neg z1.h, p0/m, z0.h
104+ ; CHECK-NEXT: mov z0.d, z1.d
128105; CHECK-NEXT: ret
129106 %1 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -1 )
130- %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.mul.u. nxv8i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %a )
107+ %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.mul.nxv8i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %1 , <vscale x 8 x i16 > %a )
131108 ret <vscale x 8 x i16 > %2
132109}
133110
134- define <vscale x 4 x i32 > @mul_neg_fold_u_different_argument_order_i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %a ) {
135- ; CHECK-LABEL: mul_neg_fold_u_different_argument_order_i32 :
111+ define <vscale x 4 x i32 > @mul_neg_fold_different_argument_order_i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %a ) {
112+ ; CHECK-LABEL: mul_neg_fold_different_argument_order_i32 :
136113; CHECK: // %bb.0:
137- ; CHECK-NEXT: neg z0.s, p0/m, z0.s
114+ ; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
115+ ; CHECK-NEXT: neg z1.s, p0/m, z0.s
116+ ; CHECK-NEXT: mov z0.d, z1.d
138117; CHECK-NEXT: ret
139118 %1 = call <vscale x 4 x i32 > @llvm.aarch64.sve.dup.x.nxv4i32 (i32 -1 )
140- %2 = call <vscale x 4 x i32 > @llvm.aarch64.sve.mul.u. nxv4i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %1 , <vscale x 4 x i32 > %a )
119+ %2 = call <vscale x 4 x i32 > @llvm.aarch64.sve.mul.nxv4i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %1 , <vscale x 4 x i32 > %a )
141120 ret <vscale x 4 x i32 > %2
142121}
143122
144- define <vscale x 2 x i64 > @mul_neg_fold_u_different_argument_order_i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a ) {
145- ; CHECK-LABEL: mul_neg_fold_u_different_argument_order_i64:
146- ; CHECK: // %bb.0:
147- ; CHECK-NEXT: neg z0.d, p0/m, z0.d
148- ; CHECK-NEXT: ret
149- %1 = call <vscale x 2 x i64 > @llvm.aarch64.sve.dup.x.nxv2i64 (i64 -1 )
150- %2 = call <vscale x 2 x i64 > @llvm.aarch64.sve.mul.u.nxv2i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %1 , <vscale x 2 x i64 > %a )
151- ret <vscale x 2 x i64 > %2
152- }
153-
154- ; Non foldable muls -- we don't expect these to be optimised out.
155- define <vscale x 8 x i16 > @no_mul_neg_fold_i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a ) {
156- ; CHECK-LABEL: no_mul_neg_fold_i16:
157- ; CHECK: // %bb.0:
158- ; CHECK-NEXT: mov z1.h, #-2 // =0xfffffffffffffffe
159- ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
160- ; CHECK-NEXT: ret
161- %1 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -2 )
162- %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.mul.nxv8i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a , <vscale x 8 x i16 > %1 )
163- ret <vscale x 8 x i16 > %2
164- }
165-
166- define <vscale x 4 x i32 > @no_mul_neg_fold_i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %a ) {
167- ; CHECK-LABEL: no_mul_neg_fold_i32:
168- ; CHECK: // %bb.0:
169- ; CHECK-NEXT: mov z1.s, #-2 // =0xfffffffffffffffe
170- ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
171- ; CHECK-NEXT: ret
172- %1 = call <vscale x 4 x i32 > @llvm.aarch64.sve.dup.x.nxv4i32 (i32 -2 )
173- %2 = call <vscale x 4 x i32 > @llvm.aarch64.sve.mul.nxv4i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %a , <vscale x 4 x i32 > %1 )
174- ret <vscale x 4 x i32 > %2
175- }
176-
177- define <vscale x 2 x i64 > @no_mul_neg_fold_i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a ) {
178- ; CHECK-LABEL: no_mul_neg_fold_i64:
179- ; CHECK: // %bb.0:
180- ; CHECK-NEXT: mov z1.d, #-2 // =0xfffffffffffffffe
181- ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
182- ; CHECK-NEXT: ret
183- %1 = call <vscale x 2 x i64 > @llvm.aarch64.sve.dup.x.nxv2i64 (i64 -2 )
184- %2 = call <vscale x 2 x i64 > @llvm.aarch64.sve.mul.nxv2i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a , <vscale x 2 x i64 > %1 )
185- ret <vscale x 2 x i64 > %2
186- }
187-
188- ; Merge mul is non commutative
189- define <vscale x 2 x i64 > @no_mul_neg_fold_different_argument_order (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a ) {
190- ; CHECK-LABEL: no_mul_neg_fold_different_argument_order:
123+ define <vscale x 2 x i64 > @mul_neg_fold_different_argument_order_i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a ) {
124+ ; CHECK-LABEL: mul_neg_fold_different_argument_order_i64:
191125; CHECK: // %bb.0:
192126; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
193- ; CHECK-NEXT: mul z1.d, p0/m, z1.d , z0.d
127+ ; CHECK-NEXT: neg z1.d, p0/m, z0.d
194128; CHECK-NEXT: mov z0.d, z1.d
195129; CHECK-NEXT: ret
196130 %1 = call <vscale x 2 x i64 > @llvm.aarch64.sve.dup.x.nxv2i64 (i64 -1 )
197131 %2 = call <vscale x 2 x i64 > @llvm.aarch64.sve.mul.nxv2i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %1 , <vscale x 2 x i64 > %a )
198132 ret <vscale x 2 x i64 > %2
199133}
200134
201- define <vscale x 8 x i16 > @no_mul_neg_fold_u_i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a ) {
202- ; CHECK-LABEL: no_mul_neg_fold_u_i16:
203- ; CHECK: // %bb.0:
204- ; CHECK-NEXT: mul z0.h, z0.h, #-2
205- ; CHECK-NEXT: ret
206- %1 = call <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 -2 )
207- %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.mul.u.nxv8i16 (<vscale x 8 x i1 > %pg , <vscale x 8 x i16 > %a , <vscale x 8 x i16 > %1 )
208- ret <vscale x 8 x i16 > %2
209- }
210-
211- define <vscale x 4 x i32 > @no_mul_neg_fold_u_i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %a ) {
212- ; CHECK-LABEL: no_mul_neg_fold_u_i32:
213- ; CHECK: // %bb.0:
214- ; CHECK-NEXT: mul z0.s, z0.s, #-2
215- ; CHECK-NEXT: ret
216- %1 = call <vscale x 4 x i32 > @llvm.aarch64.sve.dup.x.nxv4i32 (i32 -2 )
217- %2 = call <vscale x 4 x i32 > @llvm.aarch64.sve.mul.u.nxv4i32 (<vscale x 4 x i1 > %pg , <vscale x 4 x i32 > %a , <vscale x 4 x i32 > %1 )
218- ret <vscale x 4 x i32 > %2
219- }
220-
221- define <vscale x 2 x i64 > @no_mul_neg_fold_u_i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a ) {
222- ; CHECK-LABEL: no_mul_neg_fold_u_i64:
223- ; CHECK: // %bb.0:
224- ; CHECK-NEXT: mul z0.d, z0.d, #-2
225- ; CHECK-NEXT: ret
226- %1 = call <vscale x 2 x i64 > @llvm.aarch64.sve.dup.x.nxv2i64 (i64 -2 )
227- %2 = call <vscale x 2 x i64 > @llvm.aarch64.sve.mul.u.nxv2i64 (<vscale x 2 x i1 > %pg , <vscale x 2 x i64 > %a , <vscale x 2 x i64 > %1 )
228- ret <vscale x 2 x i64 > %2
229- }
230-
231135declare <vscale x 16 x i8 > @llvm.aarch64.sve.dup.x.nxv16i8 (i8 )
232136declare <vscale x 8 x i16 > @llvm.aarch64.sve.dup.x.nxv8i16 (i16 )
233137declare <vscale x 4 x i32 > @llvm.aarch64.sve.dup.x.nxv4i32 (i32 )
0 commit comments