Skip to content

Commit 8aeae0a

Browse files
[AArch64] Fix vectorToScalarBitmask BE (llvm#156314)
Closes llvm#156312
1 parent 5a86dc9 commit 8aeae0a

File tree

2 files changed

+83
-3
lines changed

2 files changed

+83
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23839,14 +23839,18 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2383923839
// Ensure that all elements' bits are either 0s or 1s.
2384023840
ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
2384123841

23842+
bool IsLE = DAG.getDataLayout().isLittleEndian();
2384223843
SmallVector<SDValue, 16> MaskConstants;
2384323844
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
2384423845
VecVT == MVT::v16i8) {
2384523846
// v16i8 is a special case, as we have 16 entries but only 8 positional bits
2384623847
// per entry. We split it into two halves, apply the mask, zip the halves to
2384723848
// create 8x 16-bit values, and the perform the vector reduce.
2384823849
for (unsigned Half = 0; Half < 2; ++Half) {
23849-
for (unsigned MaskBit = 1; MaskBit <= 128; MaskBit *= 2) {
23850+
for (unsigned I = 0; I < 8; ++I) {
23851+
// On big-endian targets, the lane order in sub-byte vector elements
23852+
// gets reversed, so we need to flip the bit index.
23853+
unsigned MaskBit = IsLE ? (1u << I) : (1u << (7 - I));
2385023854
MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i32));
2385123855
}
2385223856
}
@@ -23864,8 +23868,9 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2386423868
}
2386523869

2386623870
// All other vector sizes.
23867-
unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
23868-
for (unsigned MaskBit = 1; MaskBit <= MaxBitMask; MaskBit *= 2) {
23871+
unsigned NumEl = VecVT.getVectorNumElements();
23872+
for (unsigned I = 0; I < NumEl; ++I) {
23873+
unsigned MaskBit = IsLE ? (1u << I) : (1u << (NumEl - 1 - I));
2386923874
MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i64));
2387023875
}
2387123876

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; RUN: llc -O3 -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK-LE
2+
; RUN: llc -O3 -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK-BE
3+
4+
define i16 @convert_to_bitmask16(<16 x i8> %vec) {
5+
%cmp_result = icmp ne <16 x i8> %vec, zeroinitializer
6+
%bitmask = bitcast <16 x i1> %cmp_result to i16
7+
ret i16 %bitmask
8+
}
9+
10+
define i16 @convert_to_bitmask8(<8 x i16> %vec) {
11+
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
12+
%bitmask = bitcast <8 x i1> %cmp_result to i8
13+
%extended_bitmask = zext i8 %bitmask to i16
14+
ret i16 %extended_bitmask
15+
}
16+
17+
; Little endian
18+
19+
; CHECK-LE-LABEL: .LCPI0_0:
20+
; CHECK-LE-NEXT: .byte 1
21+
; CHECK-LE-NEXT: .byte 2
22+
; CHECK-LE-NEXT: .byte 4
23+
; CHECK-LE-NEXT: .byte 8
24+
; CHECK-LE-NEXT: .byte 16
25+
; CHECK-LE-NEXT: .byte 32
26+
; CHECK-LE-NEXT: .byte 64
27+
; CHECK-LE-NEXT: .byte 128
28+
; CHECK-LE-NEXT: .byte 1
29+
; CHECK-LE-NEXT: .byte 2
30+
; CHECK-LE-NEXT: .byte 4
31+
; CHECK-LE-NEXT: .byte 8
32+
; CHECK-LE-NEXT: .byte 16
33+
; CHECK-LE-NEXT: .byte 32
34+
; CHECK-LE-NEXT: .byte 64
35+
; CHECK-LE-NEXT: .byte 128
36+
37+
; CHECK-LE-LABEL: .LCPI1_0:
38+
; CHECK-LE-NEXT: .hword 1
39+
; CHECK-LE-NEXT: .hword 2
40+
; CHECK-LE-NEXT: .hword 4
41+
; CHECK-LE-NEXT: .hword 8
42+
; CHECK-LE-NEXT: .hword 16
43+
; CHECK-LE-NEXT: .hword 32
44+
; CHECK-LE-NEXT: .hword 64
45+
; CHECK-LE-NEXT: .hword 128
46+
47+
; Big endian
48+
49+
; CHECK-BE-LABEL: .LCPI0_0:
50+
; CHECK-BE-NEXT: .byte 128
51+
; CHECK-BE-NEXT: .byte 64
52+
; CHECK-BE-NEXT: .byte 32
53+
; CHECK-BE-NEXT: .byte 16
54+
; CHECK-BE-NEXT: .byte 8
55+
; CHECK-BE-NEXT: .byte 4
56+
; CHECK-BE-NEXT: .byte 2
57+
; CHECK-BE-NEXT: .byte 1
58+
; CHECK-BE-NEXT: .byte 128
59+
; CHECK-BE-NEXT: .byte 64
60+
; CHECK-BE-NEXT: .byte 32
61+
; CHECK-BE-NEXT: .byte 16
62+
; CHECK-BE-NEXT: .byte 8
63+
; CHECK-BE-NEXT: .byte 4
64+
; CHECK-BE-NEXT: .byte 2
65+
; CHECK-BE-NEXT: .byte 1
66+
67+
; CHECK-BE-LABEL: .LCPI1_0:
68+
; CHECK-BE-NEXT: .hword 128
69+
; CHECK-BE-NEXT: .hword 64
70+
; CHECK-BE-NEXT: .hword 32
71+
; CHECK-BE-NEXT: .hword 16
72+
; CHECK-BE-NEXT: .hword 8
73+
; CHECK-BE-NEXT: .hword 4
74+
; CHECK-BE-NEXT: .hword 2
75+
; CHECK-BE-NEXT: .hword 1

0 commit comments

Comments
 (0)