Skip to content

Commit 1c5bba1

Browse files
[LLVM][CodeGen][SVE] ASRD cannot represent sdiv-by-one. (#162708)
We lower signed divides by a power-of-two to ASRD. However, ASRD's immediate is log2(shift_amount) in the range 1 to elt-bitwidth, which means it cannot represent sdiv-by-one. Fixes #162616
1 parent 80f48b8 commit 1c5bba1

File tree

2 files changed

+59
-2
lines changed

2 files changed

+59
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16249,7 +16249,9 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1624916249

1625016250
bool Negated;
1625116251
uint64_t SplatVal;
16252-
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
16252+
// NOTE: SRAD cannot be used to represent sdiv-by-one.
16253+
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated) &&
16254+
SplatVal > 1) {
1625316255
SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
1625416256
SDValue Res =
1625516257
DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
@@ -30034,7 +30036,9 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
3003430036

3003530037
bool Negated;
3003630038
uint64_t SplatVal;
30037-
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
30039+
// NOTE: SRAD cannot be used to represent sdiv-by-one.
30040+
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated) &&
30041+
SplatVal > 1) {
3003830042
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
3003930043
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
3004030044
SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32);
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mattr=+sve -combiner-disabled < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
; Ensure we don't try to represent sdiv-by-one using ARSD.
7+
define <16 x i16> @sdiv_by_one_v16i16(<16 x i16> %a) vscale_range(2,2) {
8+
; CHECK-LABEL: sdiv_by_one_v16i16:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: ptrue p0.h
11+
; CHECK-NEXT: adrp x8, .LCPI0_0
12+
; CHECK-NEXT: add x8, x8, :lo12:.LCPI0_0
13+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
14+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
15+
; CHECK-NEXT: ld1h { z2.h }, p0/z, [x8]
16+
; CHECK-NEXT: sunpklo z0.s, z0.h
17+
; CHECK-NEXT: sunpklo z1.s, z1.h
18+
; CHECK-NEXT: ptrue p0.s
19+
; CHECK-NEXT: sunpklo z3.s, z2.h
20+
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #16
21+
; CHECK-NEXT: sunpklo z2.s, z2.h
22+
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z3.s
23+
; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z2.s
24+
; CHECK-NEXT: ptrue p0.h, vl8
25+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
26+
; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
27+
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
28+
; CHECK-NEXT: movprfx z1, z0
29+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
30+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
31+
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
32+
; CHECK-NEXT: ret
33+
%res = sdiv <16 x i16> %a, splat(i16 1)
34+
ret <16 x i16> %res
35+
}
36+
37+
; Ensure we don't try to represent sdiv-by-one using ARSD.
38+
define <vscale x 8 x i16> @sdiv_by_one_nxv8i16(<vscale x 8 x i16> %a) {
39+
; CHECK-LABEL: sdiv_by_one_nxv8i16:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: mov z1.h, #1 // =0x1
42+
; CHECK-NEXT: sunpkhi z2.s, z0.h
43+
; CHECK-NEXT: sunpklo z0.s, z0.h
44+
; CHECK-NEXT: ptrue p0.s
45+
; CHECK-NEXT: sunpkhi z3.s, z1.h
46+
; CHECK-NEXT: sunpklo z1.s, z1.h
47+
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z3.s
48+
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
49+
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
50+
; CHECK-NEXT: ret
51+
%res = sdiv <vscale x 8 x i16> %a, splat(i16 1)
52+
ret <vscale x 8 x i16> %res
53+
}

0 commit comments

Comments
 (0)