Skip to content

Commit 773cc76

Browse files
- Add lowering of cntsd back into LowerINTRINSIC_WO_CHAIN
- Remove patterns for cntsd and add for AArch64rdsvl - Add nsw/nuw flags to mul of cntsd in EmitSMELd1St1
1 parent 166f030 commit 773cc76

File tree

5 files changed

+27
-41
lines changed

5 files changed

+27
-41
lines changed

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4307,7 +4307,8 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
43074307
CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
43084308
llvm::Value *StreamingVectorLengthCall =
43094309
Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4310-
llvm::ConstantInt::get(Int64Ty, 8), "svl");
4310+
llvm::ConstantInt::get(Int64Ty, 8), "svl",
4311+
/* HasNUW */ true, /* HasNSW */ true);
43114312
llvm::Value *Mulvl =
43124313
Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
43134314
// The type of the ptr parameter is void *, so use Int8Ty here.

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -940,20 +940,16 @@ bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
940940
return false;
941941
}
942942

943-
// Given `cntsd = (rdsvl, #1) >> 3`, attempt to return a suitable multiplier
944-
// for RDSVL to calculate `cntsd << N`, i.e. `rdsvl, #(1 << (N - 3))`.
943+
// Returns a suitable RDSVL multiplier from a left shift.
945944
template <signed Low, signed High>
946945
bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
947946
if (!isa<ConstantSDNode>(N))
948947
return false;
949948

950-
int64_t ShlImm = cast<ConstantSDNode>(N)->getSExtValue();
951-
if (ShlImm >= 3) {
952-
int64_t MulImm = 1 << (ShlImm - 3);
953-
if (MulImm >= Low && MulImm <= High) {
954-
Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
955-
return true;
956-
}
949+
int64_t MulImm = 1 << cast<ConstantSDNode>(N)->getSExtValue();
950+
if (MulImm >= Low && MulImm <= High) {
951+
Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
952+
return true;
957953
}
958954

959955
return false;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6266,6 +6266,16 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
62666266
case Intrinsic::aarch64_sve_clz:
62676267
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, DL, Op.getValueType(),
62686268
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
6269+
case Intrinsic::aarch64_sme_cntsd: {
6270+
auto Flags = SDNodeFlags();
6271+
Flags.setNoUnsignedWrap(true);
6272+
Flags.setNoSignedWrap(true);
6273+
Flags.setExact(true);
6274+
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
6275+
DAG.getConstant(1, DL, MVT::i32));
6276+
return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
6277+
DAG.getConstant(3, DL, MVT::i32), Flags);
6278+
}
62696279
case Intrinsic::aarch64_sve_cnt: {
62706280
SDValue Data = Op.getOperand(3);
62716281
// CTPOP only supports integer operands.
@@ -19180,9 +19190,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1918019190
if (ConstValue.sge(1) && ConstValue.sle(16))
1918119191
return SDValue();
1918219192

19183-
if (getIntrinsicID(N0.getNode()) == Intrinsic::aarch64_sme_cntsd)
19184-
return SDValue();
19185-
1918619193
// Multiplication of a power of two plus/minus one can be done more
1918719194
// cheaply as shift+add/sub. For now, this is true unilaterally. If
1918819195
// future CPUs have a cheaper MADD instruction, this may need to be

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -139,25 +139,10 @@ def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>
139139
def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>;
140140
def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>;
141141

142-
def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
143-
144-
// e.g. cntsb() * imm
145-
def : Pat<(i64 (mul (int_aarch64_sme_cntsd), (sme_cntsb_mul_imm i64:$imm))),
146-
(RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm))>;
147-
def : Pat<(i64 (mul (int_aarch64_sme_cntsd), (sme_cntsh_mul_imm i64:$imm))),
148-
(UBFMXri (RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm)), 1, 63)>;
149-
def : Pat<(i64 (mul (int_aarch64_sme_cntsd), (sme_cntsw_mul_imm i64:$imm))),
150-
(UBFMXri (RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm)), 2, 63)>;
151-
def : Pat<(i64 (mul (int_aarch64_sme_cntsd), (sme_cntsd_mul_imm i64:$imm))),
152-
(UBFMXri (RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm)), 3, 63)>;
153-
154-
def : Pat<(i64 (shl (int_aarch64_sme_cntsd), (sme_cnts_shl_imm i64:$imm))),
142+
def : Pat<(i64 (shl (AArch64rdsvl (i32 1)), (sme_cnts_shl_imm i64:$imm))),
155143
(RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm))>;
156144

157-
// cntsh, cntsw, cntsd
158-
def : Pat<(i64 (shl (int_aarch64_sme_cntsd), (i64 2))), (UBFMXri (RDSVLI_XI 1), 1, 63)>;
159-
def : Pat<(i64 (shl (int_aarch64_sme_cntsd), (i64 1))), (UBFMXri (RDSVLI_XI 1), 2, 63)>;
160-
def : Pat<(i64 (int_aarch64_sme_cntsd)), (UBFMXri (RDSVLI_XI 1), 3, 63)>;
145+
def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
161146
}
162147

163148
let Predicates = [HasSME] in {

llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,46 +46,43 @@ define i64 @cntsd() {
4646
define i64 @sme_cntsb_mul() {
4747
; CHECK-LABEL: sme_cntsb_mul:
4848
; CHECK: // %bb.0:
49-
; CHECK-NEXT: rdsvl x0, #2
49+
; CHECK-NEXT: rdsvl x0, #4
5050
; CHECK-NEXT: ret
5151
%v = call i64 @llvm.aarch64.sme.cntsd()
5252
%shl = shl nuw nsw i64 %v, 3
53-
%res = mul i64 %shl, 2
53+
%res = mul nuw nsw i64 %shl, 4
5454
ret i64 %res
5555
}
5656

5757
define i64 @sme_cntsh_mul() {
5858
; CHECK-LABEL: sme_cntsh_mul:
5959
; CHECK: // %bb.0:
60-
; CHECK-NEXT: rdsvl x8, #5
61-
; CHECK-NEXT: lsr x0, x8, #1
60+
; CHECK-NEXT: rdsvl x0, #4
6261
; CHECK-NEXT: ret
6362
%v = call i64 @llvm.aarch64.sme.cntsd()
6463
%shl = shl nuw nsw i64 %v, 2
65-
%res = mul i64 %shl, 5
64+
%res = mul nuw nsw i64 %shl, 8
6665
ret i64 %res
6766
}
6867

6968
define i64 @sme_cntsw_mul() {
7069
; CHECK-LABEL: sme_cntsw_mul:
7170
; CHECK: // %bb.0:
72-
; CHECK-NEXT: rdsvl x8, #7
73-
; CHECK-NEXT: lsr x0, x8, #2
71+
; CHECK-NEXT: rdsvl x0, #4
7472
; CHECK-NEXT: ret
7573
%v = call i64 @llvm.aarch64.sme.cntsd()
7674
%shl = shl nuw nsw i64 %v, 1
77-
%res = mul i64 %shl, 7
75+
%res = mul nuw nsw i64 %shl, 16
7876
ret i64 %res
7977
}
8078

8179
define i64 @sme_cntsd_mul() {
8280
; CHECK-LABEL: sme_cntsd_mul:
8381
; CHECK: // %bb.0:
84-
; CHECK-NEXT: rdsvl x8, #3
85-
; CHECK-NEXT: lsr x0, x8, #1
82+
; CHECK-NEXT: rdsvl x0, #4
8683
; CHECK-NEXT: ret
8784
%v = call i64 @llvm.aarch64.sme.cntsd()
88-
%res = mul i64 %v, 12
85+
%res = mul nuw nsw i64 %v, 32
8986
ret i64 %res
9087
}
9188

0 commit comments

Comments
 (0)