Skip to content

Commit d282cc8

Browse files
committed
address comments
1 parent c2506b5 commit d282cc8

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18421,9 +18421,10 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
1842118421

1842218422
EVT VT = N->getValueType(0);
1842318423

18424-
// For negative divisor, this yeilds (ptrue + asrd + subr) which is not
18425-
// profitable as compared to Neon sequence (cmlt + usra + sshr).
18426-
if (Subtarget->hasSVE() && !Divisor.isNegatedPowerOf2())
18424+
// If SVE is available, we can generate
18425+
// sdiv(x,y) -> ptrue + asrd , where 'y' is positive pow-2 divisor.
18426+
// sdiv(x,y) -> ptrue + asrd + subr , where 'y' is negative pow-2 divisor.
18427+
if (Subtarget->hasSVE() && N->getValueType(0).isVector())
1842718428
return SDValue(N, 0);
1842818429

1842918430
// For scalable and fixed types, mark them as cheap so we can handle it much

llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@ target triple = "aarch64-unknown-linux-gnu"
99
define <4 x i32> @sdiv_v4i32_negative_pow2_divisor_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
1010
; CHECK-LABEL: sdiv_v4i32_negative_pow2_divisor_packed:
1111
; CHECK: // %bb.0:
12-
; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
13-
; CHECK-NEXT: usra v0.4s, v1.4s, #29
14-
; CHECK-NEXT: sshr v0.4s, v0.4s, #3
15-
; CHECK-NEXT: neg v0.4s, v0.4s
12+
; CHECK-NEXT: ptrue p0.s, vl4
13+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
14+
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
15+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
16+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1617
; CHECK-NEXT: ret
1718
%res = sdiv <4 x i32> %op1, splat (i32 -8)
1819
ret <4 x i32> %res
@@ -21,10 +22,11 @@ define <4 x i32> @sdiv_v4i32_negative_pow2_divisor_packed(<4 x i32> %op1) vscale
2122
define <2 x i32> @sdiv_v2i32_negative_pow2_divisor_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
2223
; CHECK-LABEL: sdiv_v2i32_negative_pow2_divisor_unpacked:
2324
; CHECK: // %bb.0:
24-
; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
25-
; CHECK-NEXT: usra v0.2s, v1.2s, #29
26-
; CHECK-NEXT: sshr v0.2s, v0.2s, #3
27-
; CHECK-NEXT: neg v0.2s, v0.2s
25+
; CHECK-NEXT: ptrue p0.s, vl2
26+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
27+
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
28+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
29+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2830
; CHECK-NEXT: ret
2931
%res = sdiv <2 x i32> %op1, splat (i32 -8)
3032
ret <2 x i32> %res

0 commit comments

Comments
 (0)