@@ -9,10 +9,11 @@ target triple = "aarch64-unknown-linux-gnu"
99define <4 x i32 > @sdiv_v4i32_negative_pow2_divisor_packed (<4 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
1010; CHECK-LABEL: sdiv_v4i32_negative_pow2_divisor_packed:
1111; CHECK: // %bb.0:
12- ; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
13- ; CHECK-NEXT: usra v0.4s, v1.4s, #29
14- ; CHECK-NEXT: sshr v0.4s, v0.4s, #3
15- ; CHECK-NEXT: neg v0.4s, v0.4s
12+ ; CHECK-NEXT: ptrue p0.s, vl4
13+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
14+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
15+ ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
16+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1617; CHECK-NEXT: ret
1718 %res = sdiv <4 x i32 > %op1 , splat (i32 -8 )
1819 ret <4 x i32 > %res
@@ -21,10 +22,11 @@ define <4 x i32> @sdiv_v4i32_negative_pow2_divisor_packed(<4 x i32> %op1) vscale
2122define <2 x i32 > @sdiv_v2i32_negative_pow2_divisor_unpacked (<2 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
2223; CHECK-LABEL: sdiv_v2i32_negative_pow2_divisor_unpacked:
2324; CHECK: // %bb.0:
24- ; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
25- ; CHECK-NEXT: usra v0.2s, v1.2s, #29
26- ; CHECK-NEXT: sshr v0.2s, v0.2s, #3
27- ; CHECK-NEXT: neg v0.2s, v0.2s
25+ ; CHECK-NEXT: ptrue p0.s, vl2
26+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
27+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
28+ ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
29+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2830; CHECK-NEXT: ret
2931 %res = sdiv <2 x i32 > %op1 , splat (i32 -8 )
3032 ret <2 x i32 > %res
@@ -33,9 +35,10 @@ define <2 x i32> @sdiv_v2i32_negative_pow2_divisor_unpacked(<2 x i32> %op1) vsca
3335define <4 x i32 > @sdiv_v4i32_positive_pow2_divisor_packed (<4 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
3436; CHECK-LABEL: sdiv_v4i32_positive_pow2_divisor_packed:
3537; CHECK: // %bb.0:
36- ; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
37- ; CHECK-NEXT: usra v0.4s, v1.4s, #29
38- ; CHECK-NEXT: sshr v0.4s, v0.4s, #3
38+ ; CHECK-NEXT: ptrue p0.s, vl4
39+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
40+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
41+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3942; CHECK-NEXT: ret
4043 %res = sdiv <4 x i32 > %op1 , splat (i32 8 )
4144 ret <4 x i32 > %res
@@ -44,9 +47,10 @@ define <4 x i32> @sdiv_v4i32_positive_pow2_divisor_packed(<4 x i32> %op1) vscale
4447define <2 x i32 > @sdiv_v2i32_positive_pow2_divisor_unpacked (<2 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
4548; CHECK-LABEL: sdiv_v2i32_positive_pow2_divisor_unpacked:
4649; CHECK: // %bb.0:
47- ; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
48- ; CHECK-NEXT: usra v0.2s, v1.2s, #29
49- ; CHECK-NEXT: sshr v0.2s, v0.2s, #3
50+ ; CHECK-NEXT: ptrue p0.s, vl2
51+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
52+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
53+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
5054; CHECK-NEXT: ret
5155 %res = sdiv <2 x i32 > %op1 , splat (i32 8 )
5256 ret <2 x i32 > %res
@@ -95,19 +99,12 @@ define void @sdiv_v64i8(ptr %a) #0 {
9599; VBITS_GE_128-LABEL: sdiv_v64i8:
96100; VBITS_GE_128: // %bb.0:
97101; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
98- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
99- ; VBITS_GE_128-NEXT: cmlt v2.16b, v0.16b, #0
100- ; VBITS_GE_128-NEXT: cmlt v5.16b, v1.16b, #0
101- ; VBITS_GE_128-NEXT: cmlt v6.16b, v3.16b, #0
102- ; VBITS_GE_128-NEXT: usra v0.16b, v2.16b, #3
103- ; VBITS_GE_128-NEXT: cmlt v2.16b, v4.16b, #0
104- ; VBITS_GE_128-NEXT: usra v1.16b, v5.16b, #3
105- ; VBITS_GE_128-NEXT: usra v3.16b, v6.16b, #3
106- ; VBITS_GE_128-NEXT: usra v4.16b, v2.16b, #3
107- ; VBITS_GE_128-NEXT: sshr v0.16b, v0.16b, #5
108- ; VBITS_GE_128-NEXT: sshr v1.16b, v1.16b, #5
109- ; VBITS_GE_128-NEXT: sshr v2.16b, v3.16b, #5
110- ; VBITS_GE_128-NEXT: sshr v3.16b, v4.16b, #5
102+ ; VBITS_GE_128-NEXT: ptrue p0.b, vl16
103+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
104+ ; VBITS_GE_128-NEXT: asrd z0.b, p0/m, z0.b, #5
105+ ; VBITS_GE_128-NEXT: asrd z1.b, p0/m, z1.b, #5
106+ ; VBITS_GE_128-NEXT: asrd z2.b, p0/m, z2.b, #5
107+ ; VBITS_GE_128-NEXT: asrd z3.b, p0/m, z3.b, #5
111108; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
112109; VBITS_GE_128-NEXT: stp q2, q3, [x0]
113110; VBITS_GE_128-NEXT: ret
@@ -209,19 +206,12 @@ define void @sdiv_v32i16(ptr %a) #0 {
209206; VBITS_GE_128-LABEL: sdiv_v32i16:
210207; VBITS_GE_128: // %bb.0:
211208; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
212- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
213- ; VBITS_GE_128-NEXT: cmlt v2.8h, v0.8h, #0
214- ; VBITS_GE_128-NEXT: cmlt v5.8h, v1.8h, #0
215- ; VBITS_GE_128-NEXT: cmlt v6.8h, v3.8h, #0
216- ; VBITS_GE_128-NEXT: usra v0.8h, v2.8h, #11
217- ; VBITS_GE_128-NEXT: cmlt v2.8h, v4.8h, #0
218- ; VBITS_GE_128-NEXT: usra v1.8h, v5.8h, #11
219- ; VBITS_GE_128-NEXT: usra v3.8h, v6.8h, #11
220- ; VBITS_GE_128-NEXT: usra v4.8h, v2.8h, #11
221- ; VBITS_GE_128-NEXT: sshr v0.8h, v0.8h, #5
222- ; VBITS_GE_128-NEXT: sshr v1.8h, v1.8h, #5
223- ; VBITS_GE_128-NEXT: sshr v2.8h, v3.8h, #5
224- ; VBITS_GE_128-NEXT: sshr v3.8h, v4.8h, #5
209+ ; VBITS_GE_128-NEXT: ptrue p0.h, vl8
210+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
211+ ; VBITS_GE_128-NEXT: asrd z0.h, p0/m, z0.h, #5
212+ ; VBITS_GE_128-NEXT: asrd z1.h, p0/m, z1.h, #5
213+ ; VBITS_GE_128-NEXT: asrd z2.h, p0/m, z2.h, #5
214+ ; VBITS_GE_128-NEXT: asrd z3.h, p0/m, z3.h, #5
225215; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
226216; VBITS_GE_128-NEXT: stp q2, q3, [x0]
227217; VBITS_GE_128-NEXT: ret
@@ -324,19 +314,12 @@ define void @sdiv_v16i32(ptr %a) #0 {
324314; VBITS_GE_128-LABEL: sdiv_v16i32:
325315; VBITS_GE_128: // %bb.0:
326316; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
327- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
328- ; VBITS_GE_128-NEXT: cmlt v2.4s, v0.4s, #0
329- ; VBITS_GE_128-NEXT: cmlt v5.4s, v1.4s, #0
330- ; VBITS_GE_128-NEXT: cmlt v6.4s, v3.4s, #0
331- ; VBITS_GE_128-NEXT: usra v0.4s, v2.4s, #27
332- ; VBITS_GE_128-NEXT: cmlt v2.4s, v4.4s, #0
333- ; VBITS_GE_128-NEXT: usra v1.4s, v5.4s, #27
334- ; VBITS_GE_128-NEXT: usra v3.4s, v6.4s, #27
335- ; VBITS_GE_128-NEXT: usra v4.4s, v2.4s, #27
336- ; VBITS_GE_128-NEXT: sshr v0.4s, v0.4s, #5
337- ; VBITS_GE_128-NEXT: sshr v1.4s, v1.4s, #5
338- ; VBITS_GE_128-NEXT: sshr v2.4s, v3.4s, #5
339- ; VBITS_GE_128-NEXT: sshr v3.4s, v4.4s, #5
317+ ; VBITS_GE_128-NEXT: ptrue p0.s, vl4
318+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
319+ ; VBITS_GE_128-NEXT: asrd z0.s, p0/m, z0.s, #5
320+ ; VBITS_GE_128-NEXT: asrd z1.s, p0/m, z1.s, #5
321+ ; VBITS_GE_128-NEXT: asrd z2.s, p0/m, z2.s, #5
322+ ; VBITS_GE_128-NEXT: asrd z3.s, p0/m, z3.s, #5
340323; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
341324; VBITS_GE_128-NEXT: stp q2, q3, [x0]
342325; VBITS_GE_128-NEXT: ret
@@ -439,19 +422,12 @@ define void @sdiv_v8i64(ptr %a) #0 {
439422; VBITS_GE_128-LABEL: sdiv_v8i64:
440423; VBITS_GE_128: // %bb.0:
441424; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
442- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
443- ; VBITS_GE_128-NEXT: cmlt v2.2d, v0.2d, #0
444- ; VBITS_GE_128-NEXT: cmlt v5.2d, v1.2d, #0
445- ; VBITS_GE_128-NEXT: cmlt v6.2d, v3.2d, #0
446- ; VBITS_GE_128-NEXT: usra v0.2d, v2.2d, #59
447- ; VBITS_GE_128-NEXT: cmlt v2.2d, v4.2d, #0
448- ; VBITS_GE_128-NEXT: usra v1.2d, v5.2d, #59
449- ; VBITS_GE_128-NEXT: usra v3.2d, v6.2d, #59
450- ; VBITS_GE_128-NEXT: usra v4.2d, v2.2d, #59
451- ; VBITS_GE_128-NEXT: sshr v0.2d, v0.2d, #5
452- ; VBITS_GE_128-NEXT: sshr v1.2d, v1.2d, #5
453- ; VBITS_GE_128-NEXT: sshr v2.2d, v3.2d, #5
454- ; VBITS_GE_128-NEXT: sshr v3.2d, v4.2d, #5
425+ ; VBITS_GE_128-NEXT: ptrue p0.d, vl2
426+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
427+ ; VBITS_GE_128-NEXT: asrd z0.d, p0/m, z0.d, #5
428+ ; VBITS_GE_128-NEXT: asrd z1.d, p0/m, z1.d, #5
429+ ; VBITS_GE_128-NEXT: asrd z2.d, p0/m, z2.d, #5
430+ ; VBITS_GE_128-NEXT: asrd z3.d, p0/m, z3.d, #5
455431; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
456432; VBITS_GE_128-NEXT: stp q2, q3, [x0]
457433; VBITS_GE_128-NEXT: ret
0 commit comments