You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The MVE VSBCI instruction should be selected when the carry-in is set,
unlike the VADCI instruction which should be selected when the carry-in
is zero. This was already implemented in the code, but the function was
always called with Add=1, even for the subtract instructions.
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll
+90-2Lines changed: 90 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -108,7 +108,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_s32(<4 x i32> %a, <4 x i32> %b, pt
108
108
; CHECK-NEXT: str r1, [r0]
109
109
; CHECK-NEXT: bx lr
110
110
entry:
111
-
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i320)
111
+
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32536870912)
112
112
%1 = extractvalue { <4 x i32>, i32 } %0, 1
113
113
%2 = lshri32%1, 29
114
114
%3 = andi32%2, 1
@@ -125,6 +125,46 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_u32(<4 x i32> %a, <4 x i32> %b, pt
125
125
; CHECK-NEXT: ubfx r1, r1, #29, #1
126
126
; CHECK-NEXT: str r1, [r0]
127
127
; CHECK-NEXT: bx lr
128
+
entry:
129
+
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32536870912)
130
+
%1 = extractvalue { <4 x i32>, i32 } %0, 1
131
+
%2 = lshri32%1, 29
132
+
%3 = andi32%2, 1
133
+
storei32%3, ptr%carry_out, align4
134
+
%4 = extractvalue { <4 x i32>, i32 } %0, 0
135
+
ret <4 x i32> %4
136
+
}
137
+
138
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_s32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out) {
139
+
; CHECK-LABEL: test_vsbcq_s32_carry_in_zero:
140
+
; CHECK: @ %bb.0: @ %entry
141
+
; CHECK-NEXT: movs r1, #0
142
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
143
+
; CHECK-NEXT: vsbc.i32 q0, q0, q1
144
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
145
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
146
+
; CHECK-NEXT: str r1, [r0]
147
+
; CHECK-NEXT: bx lr
148
+
entry:
149
+
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i320)
150
+
%1 = extractvalue { <4 x i32>, i32 } %0, 1
151
+
%2 = lshri32%1, 29
152
+
%3 = andi32%2, 1
153
+
storei32%3, ptr%carry_out, align4
154
+
%4 = extractvalue { <4 x i32>, i32 } %0, 0
155
+
ret <4 x i32> %4
156
+
}
157
+
158
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_u32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out) {
159
+
; CHECK-LABEL: test_vsbcq_u32_carry_in_zero:
160
+
; CHECK: @ %bb.0: @ %entry
161
+
; CHECK-NEXT: movs r1, #0
162
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
163
+
; CHECK-NEXT: vsbc.i32 q0, q0, q1
164
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
165
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
166
+
; CHECK-NEXT: str r1, [r0]
167
+
; CHECK-NEXT: bx lr
128
168
entry:
129
169
%0 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i320)
130
170
%1 = extractvalue { <4 x i32>, i32 } %0, 1
@@ -196,7 +236,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_s32(<4 x i32> %inactive, <4 x i3
196
236
entry:
197
237
%0 = zexti16%ptoi32
198
238
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
199
-
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i320, <4 x i1> %1)
239
+
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32536870912, <4 x i1> %1)
200
240
%3 = extractvalue { <4 x i32>, i32 } %2, 1
201
241
%4 = lshri32%3, 29
202
242
%5 = andi32%4, 1
@@ -215,6 +255,54 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_u32(<4 x i32> %inactive, <4 x i3
215
255
; CHECK-NEXT: ubfx r1, r1, #29, #1
216
256
; CHECK-NEXT: str r1, [r0]
217
257
; CHECK-NEXT: bx lr
258
+
entry:
259
+
%0 = zexti16%ptoi32
260
+
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
261
+
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32536870912, <4 x i1> %1)
262
+
%3 = extractvalue { <4 x i32>, i32 } %2, 1
263
+
%4 = lshri32%3, 29
264
+
%5 = andi32%4, 1
265
+
storei32%5, ptr%carry_out, align4
266
+
%6 = extractvalue { <4 x i32>, i32 } %2, 0
267
+
ret <4 x i32> %6
268
+
}
269
+
270
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_s32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out, i16zeroext%p) {
271
+
; CHECK-LABEL: test_vsbcq_m_s32_carry_in_zero:
272
+
; CHECK: @ %bb.0: @ %entry
273
+
; CHECK-NEXT: movs r2, #0
274
+
; CHECK-NEXT: vmsr p0, r1
275
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
276
+
; CHECK-NEXT: vpst
277
+
; CHECK-NEXT: vsbct.i32 q0, q1, q2
278
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
279
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
280
+
; CHECK-NEXT: str r1, [r0]
281
+
; CHECK-NEXT: bx lr
282
+
entry:
283
+
%0 = zexti16%ptoi32
284
+
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
285
+
%2 = tailcall { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i320, <4 x i1> %1)
286
+
%3 = extractvalue { <4 x i32>, i32 } %2, 1
287
+
%4 = lshri32%3, 29
288
+
%5 = andi32%4, 1
289
+
storei32%5, ptr%carry_out, align4
290
+
%6 = extractvalue { <4 x i32>, i32 } %2, 0
291
+
ret <4 x i32> %6
292
+
}
293
+
294
+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_u32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptrnocapture%carry_out, i16zeroext%p) {
295
+
; CHECK-LABEL: test_vsbcq_m_u32_carry_in_zero:
296
+
; CHECK: @ %bb.0: @ %entry
297
+
; CHECK-NEXT: movs r2, #0
298
+
; CHECK-NEXT: vmsr p0, r1
299
+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
300
+
; CHECK-NEXT: vpst
301
+
; CHECK-NEXT: vsbct.i32 q0, q1, q2
302
+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
303
+
; CHECK-NEXT: ubfx r1, r1, #29, #1
304
+
; CHECK-NEXT: str r1, [r0]
305
+
; CHECK-NEXT: bx lr
218
306
entry:
219
307
%0 = zexti16%ptoi32
220
308
%1 = tailcall <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32%0)
0 commit comments