Skip to content

Commit 8789d19

Browse files
committed
[AArch64] Use i32 extract from UADDV in popcount lowering.
We need the top bits to be zeroes, but an v8i8->i32 EXTRACT_VECTOR_ELT will anyext into the top bits. The instruction we create (UADDV) is known to be zeroes in the upper bits, so we can convert to a larger v2i32 vector and extract from there, similar to the operation currently performed for i64 types.
1 parent e233002 commit 8789d19

File tree

2 files changed

+6
-9
lines changed

2 files changed

+6
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10852,13 +10852,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
1085210852

1085310853
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
1085410854
SDValue AddV = DAG.getNode(AArch64ISD::UADDV, DL, MVT::v8i8, CtPop);
10855-
if (VT == MVT::i32)
10856-
AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, AddV,
10857-
DAG.getConstant(0, DL, MVT::i64));
10858-
else
10859-
AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
10860-
DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, AddV),
10861-
DAG.getConstant(0, DL, MVT::i64));
10855+
AddV = DAG.getNode(AArch64ISD::NVCAST, DL,
10856+
VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
10857+
AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, AddV,
10858+
DAG.getConstant(0, DL, MVT::i64));
1086210859
if (IsParity)
1086310860
AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
1086410861
return AddV;

llvm/test/CodeGen/AArch64/popcount.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,7 @@ define i32 @ctpop_into_extract(ptr %p) {
686686
; CHECK-NEXT: fmov s1, w9
687687
; CHECK-NEXT: cnt v1.8b, v1.8b
688688
; CHECK-NEXT: addv b1, v1.8b
689-
; CHECK-NEXT: mov v2.b[4], v1.b[0]
689+
; CHECK-NEXT: mov v2.s[1], v1.s[0]
690690
; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
691691
; CHECK-NEXT: str d0, [x8]
692692
; CHECK-NEXT: ret
@@ -701,7 +701,7 @@ define i32 @ctpop_into_extract(ptr %p) {
701701
; BE-NEXT: fmov s1, w9
702702
; BE-NEXT: cnt v1.8b, v1.8b
703703
; BE-NEXT: addv b1, v1.8b
704-
; BE-NEXT: mov v2.b[4], v1.b[0]
704+
; BE-NEXT: mov v2.s[1], v1.s[0]
705705
; BE-NEXT: sub v0.2s, v0.2s, v2.2s
706706
; BE-NEXT: st1 { v0.2s }, [x8]
707707
; BE-NEXT: ret

0 commit comments

Comments
 (0)