Skip to content

Commit ccaeebc

Browse files
[AArch64][SME] Improve codegen for aarch64.sme.cnts* when not in streaming mode (#154761)
Builtins for reading the streaming vector length are canonicalised to use the aarch64.sme.cntsd intrinisic and a multiply, i.e. - cntsb -> cntsd * 8 - cntsh -> cntsd * 4 - cntsw -> cntsd * 2 This patch also removes the LLVM intrinsics for cnts[b,h,w], and adds patterns to improve codegen when cntsd is multiplied by a constant.
1 parent 2331fbb commit ccaeebc

File tree

19 files changed

+195
-168
lines changed

19 files changed

+195
-168
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,16 +156,10 @@ let SMETargetGuard = "sme2p1" in {
156156
////////////////////////////////////////////////////////////////////////////////
157157
// SME - Counting elements in a streaming vector
158158

159-
multiclass ZACount<string n_suffix> {
160-
def NAME : SInst<"sv" # n_suffix, "nv", "", MergeNone,
161-
"aarch64_sme_" # n_suffix,
162-
[IsOverloadNone, IsStreamingCompatible]>;
163-
}
164-
165-
defm SVCNTSB : ZACount<"cntsb">;
166-
defm SVCNTSH : ZACount<"cntsh">;
167-
defm SVCNTSW : ZACount<"cntsw">;
168-
defm SVCNTSD : ZACount<"cntsd">;
159+
def SVCNTSB : SInst<"svcntsb", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
160+
def SVCNTSH : SInst<"svcntsh", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
161+
def SVCNTSW : SInst<"svcntsw", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
162+
def SVCNTSD : SInst<"svcntsd", "nv", "", MergeNone, "aarch64_sme_cntsd", [IsOverloadNone, IsStreamingCompatible]>;
169163

170164
////////////////////////////////////////////////////////////////////////////////
171165
// SME - ADDHA/ADDVA

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4304,9 +4304,11 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
43044304
// size in bytes.
43054305
if (Ops.size() == 5) {
43064306
Function *StreamingVectorLength =
4307-
CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
4307+
CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
43084308
llvm::Value *StreamingVectorLengthCall =
4309-
Builder.CreateCall(StreamingVectorLength);
4309+
Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4310+
llvm::ConstantInt::get(Int64Ty, 8), "svl",
4311+
/* HasNUW */ true, /* HasNSW */ true);
43104312
llvm::Value *Mulvl =
43114313
Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
43124314
// The type of the ptr parameter is void *, so use Int8Ty here.
@@ -4918,6 +4920,26 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
49184920
// Handle builtins which require their multi-vector operands to be swapped
49194921
swapCommutativeSMEOperands(BuiltinID, Ops);
49204922

4923+
auto isCntsBuiltin = [&]() {
4924+
switch (BuiltinID) {
4925+
default:
4926+
return 0;
4927+
case SME::BI__builtin_sme_svcntsb:
4928+
return 8;
4929+
case SME::BI__builtin_sme_svcntsh:
4930+
return 4;
4931+
case SME::BI__builtin_sme_svcntsw:
4932+
return 2;
4933+
}
4934+
};
4935+
4936+
if (auto Mul = isCntsBuiltin()) {
4937+
llvm::Value *Cntd =
4938+
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4939+
return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4940+
"mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4941+
}
4942+
49214943
// Should not happen!
49224944
if (Builtin->LLVMIntrinsic == 0)
49234945
return nullptr;

clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_cnt.c

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,49 +6,55 @@
66

77
#include <arm_sme.h>
88

9-
// CHECK-C-LABEL: define dso_local i64 @test_svcntsb(
9+
// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsb(
1010
// CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1111
// CHECK-C-NEXT: entry:
12-
// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
13-
// CHECK-C-NEXT: ret i64 [[TMP0]]
12+
// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
13+
// CHECK-C-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 3
14+
// CHECK-C-NEXT: ret i64 [[MULSVL]]
1415
//
15-
// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntsbv(
16+
// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntsbv(
1617
// CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1718
// CHECK-CXX-NEXT: entry:
18-
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
19-
// CHECK-CXX-NEXT: ret i64 [[TMP0]]
19+
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
20+
// CHECK-CXX-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 3
21+
// CHECK-CXX-NEXT: ret i64 [[MULSVL]]
2022
//
2123
uint64_t test_svcntsb() {
2224
return svcntsb();
2325
}
2426

25-
// CHECK-C-LABEL: define dso_local i64 @test_svcntsh(
27+
// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsh(
2628
// CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] {
2729
// CHECK-C-NEXT: entry:
28-
// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh()
29-
// CHECK-C-NEXT: ret i64 [[TMP0]]
30+
// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
31+
// CHECK-C-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 2
32+
// CHECK-C-NEXT: ret i64 [[MULSVL]]
3033
//
31-
// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntshv(
34+
// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntshv(
3235
// CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] {
3336
// CHECK-CXX-NEXT: entry:
34-
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh()
35-
// CHECK-CXX-NEXT: ret i64 [[TMP0]]
37+
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
38+
// CHECK-CXX-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 2
39+
// CHECK-CXX-NEXT: ret i64 [[MULSVL]]
3640
//
3741
uint64_t test_svcntsh() {
3842
return svcntsh();
3943
}
4044

41-
// CHECK-C-LABEL: define dso_local i64 @test_svcntsw(
45+
// CHECK-C-LABEL: define dso_local range(i64 0, -9223372036854775808) i64 @test_svcntsw(
4246
// CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] {
4347
// CHECK-C-NEXT: entry:
44-
// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw()
45-
// CHECK-C-NEXT: ret i64 [[TMP0]]
48+
// CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
49+
// CHECK-C-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 1
50+
// CHECK-C-NEXT: ret i64 [[MULSVL]]
4651
//
47-
// CHECK-CXX-LABEL: define dso_local noundef i64 @_Z12test_svcntswv(
52+
// CHECK-CXX-LABEL: define dso_local noundef range(i64 0, -9223372036854775808) i64 @_Z12test_svcntswv(
4853
// CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] {
4954
// CHECK-CXX-NEXT: entry:
50-
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw()
51-
// CHECK-CXX-NEXT: ret i64 [[TMP0]]
55+
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd()
56+
// CHECK-CXX-NEXT: [[MULSVL:%.*]] = shl nuw nsw i64 [[TMP0]], 1
57+
// CHECK-CXX-NEXT: ret i64 [[MULSVL]]
5258
//
5359
uint64_t test_svcntsw() {
5460
return svcntsw();

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3147,13 +3147,8 @@ let TargetPrefix = "aarch64" in {
31473147
// Counting elements
31483148
//
31493149

3150-
class AdvSIMD_SME_CNTSB_Intrinsic
3151-
: DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
3152-
3153-
def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic;
3154-
def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic;
3155-
def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic;
3156-
def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic;
3150+
def int_aarch64_sme_cntsd
3151+
: DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
31573152

31583153
//
31593154
// PSTATE Functions

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
7171
template <signed Low, signed High, signed Scale>
7272
bool SelectRDVLImm(SDValue N, SDValue &Imm);
7373

74+
template <signed Low, signed High>
75+
bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76+
7477
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
7578
bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
7679
bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
@@ -938,6 +941,21 @@ bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
938941
return false;
939942
}
940943

944+
// Returns a suitable RDSVL multiplier from a left shift.
945+
template <signed Low, signed High>
946+
bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
947+
if (!isa<ConstantSDNode>(N))
948+
return false;
949+
950+
int64_t MulImm = 1 << cast<ConstantSDNode>(N)->getSExtValue();
951+
if (MulImm >= Low && MulImm <= High) {
952+
Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
953+
return true;
954+
}
955+
956+
return false;
957+
}
958+
941959
/// SelectArithExtendedRegister - Select a "extended register" operand. This
942960
/// operand folds in an extend followed by an optional left shift.
943961
bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6392,25 +6392,11 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
63926392
case Intrinsic::aarch64_sve_clz:
63936393
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, DL, Op.getValueType(),
63946394
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
6395-
case Intrinsic::aarch64_sme_cntsb:
6396-
return DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
6397-
DAG.getConstant(1, DL, MVT::i32));
6398-
case Intrinsic::aarch64_sme_cntsh: {
6399-
SDValue One = DAG.getConstant(1, DL, MVT::i32);
6400-
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), One);
6401-
return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes, One);
6402-
}
6403-
case Intrinsic::aarch64_sme_cntsw: {
6404-
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
6405-
DAG.getConstant(1, DL, MVT::i32));
6406-
return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
6407-
DAG.getConstant(2, DL, MVT::i32));
6408-
}
64096395
case Intrinsic::aarch64_sme_cntsd: {
64106396
SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
64116397
DAG.getConstant(1, DL, MVT::i32));
64126398
return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
6413-
DAG.getConstant(3, DL, MVT::i32));
6399+
DAG.getConstant(3, DL, MVT::i32), SDNodeFlags::Exact);
64146400
}
64156401
case Intrinsic::aarch64_sve_cnt: {
64166402
SDValue Data = Op.getOperand(3);

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,16 @@ def : Pat<(AArch64_sme_state_alloc), (SMEStateAllocPseudo)>;
134134
def SDT_AArch64RDSVL : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
135135
def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>;
136136

137+
def sme_rdsvl_shl_imm : ComplexPattern<i64, 1, "SelectRDSVLShiftImm<1, 31>">;
138+
137139
let Predicates = [HasSMEandIsNonStreamingSafe] in {
138140
def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>;
139141
def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>;
140142
def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>;
141143

144+
def : Pat<(i64 (shl (AArch64rdsvl (i32 1)), (sme_rdsvl_shl_imm i64:$imm))),
145+
(RDSVLI_XI (!cast<SDNodeXForm>("trunc_imm") $imm))>;
146+
142147
def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
143148
}
144149

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2103,15 +2103,15 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
21032103
}
21042104

21052105
static std::optional<Instruction *>
2106-
instCombineSMECntsElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts,
2107-
const AArch64Subtarget *ST) {
2106+
instCombineSMECntsd(InstCombiner &IC, IntrinsicInst &II,
2107+
const AArch64Subtarget *ST) {
21082108
if (!ST->isStreaming())
21092109
return std::nullopt;
21102110

2111-
// In streaming-mode, aarch64_sme_cnts is equivalent to aarch64_sve_cnt
2111+
// In streaming-mode, aarch64_sme_cntds is equivalent to aarch64_sve_cntd
21122112
// with SVEPredPattern::all
2113-
Value *Cnt = IC.Builder.CreateElementCount(
2114-
II.getType(), ElementCount::getScalable(NumElts));
2113+
Value *Cnt =
2114+
IC.Builder.CreateElementCount(II.getType(), ElementCount::getScalable(2));
21152115
Cnt->takeName(&II);
21162116
return IC.replaceInstUsesWith(II, Cnt);
21172117
}
@@ -2826,13 +2826,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
28262826
case Intrinsic::aarch64_sve_cntb:
28272827
return instCombineSVECntElts(IC, II, 16);
28282828
case Intrinsic::aarch64_sme_cntsd:
2829-
return instCombineSMECntsElts(IC, II, 2, ST);
2830-
case Intrinsic::aarch64_sme_cntsw:
2831-
return instCombineSMECntsElts(IC, II, 4, ST);
2832-
case Intrinsic::aarch64_sme_cntsh:
2833-
return instCombineSMECntsElts(IC, II, 8, ST);
2834-
case Intrinsic::aarch64_sme_cntsb:
2835-
return instCombineSMECntsElts(IC, II, 16, ST);
2829+
return instCombineSMECntsd(IC, II, ST);
28362830
case Intrinsic::aarch64_sve_ptest_any:
28372831
case Intrinsic::aarch64_sve_ptest_first:
28382832
case Intrinsic::aarch64_sve_ptest_last:
Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,89 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
33

4-
define i64 @sme_cntsb() {
5-
; CHECK-LABEL: sme_cntsb:
4+
define i64 @cntsb() {
5+
; CHECK-LABEL: cntsb:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: rdsvl x0, #1
88
; CHECK-NEXT: ret
9-
%v = call i64 @llvm.aarch64.sme.cntsb()
10-
ret i64 %v
9+
%1 = call i64 @llvm.aarch64.sme.cntsd()
10+
%res = shl nuw nsw i64 %1, 3
11+
ret i64 %res
1112
}
1213

13-
define i64 @sme_cntsh() {
14-
; CHECK-LABEL: sme_cntsh:
14+
define i64 @cntsh() {
15+
; CHECK-LABEL: cntsh:
1516
; CHECK: // %bb.0:
1617
; CHECK-NEXT: rdsvl x8, #1
1718
; CHECK-NEXT: lsr x0, x8, #1
1819
; CHECK-NEXT: ret
19-
%v = call i64 @llvm.aarch64.sme.cntsh()
20-
ret i64 %v
20+
%1 = call i64 @llvm.aarch64.sme.cntsd()
21+
%res = shl nuw nsw i64 %1, 2
22+
ret i64 %res
2123
}
2224

23-
define i64 @sme_cntsw() {
24-
; CHECK-LABEL: sme_cntsw:
25+
define i64 @cntsw() {
26+
; CHECK-LABEL: cntsw:
2527
; CHECK: // %bb.0:
2628
; CHECK-NEXT: rdsvl x8, #1
2729
; CHECK-NEXT: lsr x0, x8, #2
2830
; CHECK-NEXT: ret
29-
%v = call i64 @llvm.aarch64.sme.cntsw()
30-
ret i64 %v
31+
%1 = call i64 @llvm.aarch64.sme.cntsd()
32+
%res = shl nuw nsw i64 %1, 1
33+
ret i64 %res
3134
}
3235

33-
define i64 @sme_cntsd() {
34-
; CHECK-LABEL: sme_cntsd:
36+
define i64 @cntsd() {
37+
; CHECK-LABEL: cntsd:
3538
; CHECK: // %bb.0:
3639
; CHECK-NEXT: rdsvl x8, #1
3740
; CHECK-NEXT: lsr x0, x8, #3
41+
; CHECK-NEXT: ret
42+
%res = call i64 @llvm.aarch64.sme.cntsd()
43+
ret i64 %res
44+
}
45+
46+
define i64 @sme_cntsb_mul() {
47+
; CHECK-LABEL: sme_cntsb_mul:
48+
; CHECK: // %bb.0:
49+
; CHECK-NEXT: rdsvl x0, #4
50+
; CHECK-NEXT: ret
51+
%v = call i64 @llvm.aarch64.sme.cntsd()
52+
%shl = shl nuw nsw i64 %v, 3
53+
%res = mul nuw nsw i64 %shl, 4
54+
ret i64 %res
55+
}
56+
57+
define i64 @sme_cntsh_mul() {
58+
; CHECK-LABEL: sme_cntsh_mul:
59+
; CHECK: // %bb.0:
60+
; CHECK-NEXT: rdsvl x0, #4
61+
; CHECK-NEXT: ret
62+
%v = call i64 @llvm.aarch64.sme.cntsd()
63+
%shl = shl nuw nsw i64 %v, 2
64+
%res = mul nuw nsw i64 %shl, 8
65+
ret i64 %res
66+
}
67+
68+
define i64 @sme_cntsw_mul() {
69+
; CHECK-LABEL: sme_cntsw_mul:
70+
; CHECK: // %bb.0:
71+
; CHECK-NEXT: rdsvl x0, #4
72+
; CHECK-NEXT: ret
73+
%v = call i64 @llvm.aarch64.sme.cntsd()
74+
%shl = shl nuw nsw i64 %v, 1
75+
%res = mul nuw nsw i64 %shl, 16
76+
ret i64 %res
77+
}
78+
79+
define i64 @sme_cntsd_mul() {
80+
; CHECK-LABEL: sme_cntsd_mul:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: rdsvl x0, #4
3883
; CHECK-NEXT: ret
3984
%v = call i64 @llvm.aarch64.sme.cntsd()
40-
ret i64 %v
85+
%res = mul nuw nsw i64 %v, 32
86+
ret i64 %res
4187
}
4288

43-
declare i64 @llvm.aarch64.sme.cntsb()
44-
declare i64 @llvm.aarch64.sme.cntsh()
45-
declare i64 @llvm.aarch64.sme.cntsw()
4689
declare i64 @llvm.aarch64.sme.cntsd()

llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,14 @@ entry:
7676
%Data1 = alloca <vscale x 16 x i8>, align 16
7777
%Data2 = alloca <vscale x 16 x i8>, align 16
7878
%Data3 = alloca <vscale x 16 x i8>, align 16
79-
%0 = tail call i64 @llvm.aarch64.sme.cntsb()
79+
%0 = tail call i64 @llvm.aarch64.sme.cntsd()
8080
call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
8181
%1 = load <vscale x 16 x i8>, ptr %Data1, align 16
8282
%vecext = extractelement <vscale x 16 x i8> %1, i64 0
8383
ret i8 %vecext
8484
}
8585

86-
declare i64 @llvm.aarch64.sme.cntsb()
86+
declare i64 @llvm.aarch64.sme.cntsd()
8787

8888
declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
8989

0 commit comments

Comments
 (0)