Skip to content

Commit 8fec9c5

Browse files
Automerge: [AArch64] Fix vectorToScalarBitmask BE (#156314)
Closes #156312
2 parents a6a7a83 + 9892dc1 commit 8fec9c5

File tree

2 files changed

+83
-3
lines changed

2 files changed

+83
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24329,14 +24329,18 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2432924329
// Ensure that all elements' bits are either 0s or 1s.
2433024330
ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
2433124331

24332+
bool IsLE = DAG.getDataLayout().isLittleEndian();
2433224333
SmallVector<SDValue, 16> MaskConstants;
2433324334
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
2433424335
VecVT == MVT::v16i8) {
2433524336
// v16i8 is a special case, as we have 16 entries but only 8 positional bits
2433624337
// per entry. We split it into two halves, apply the mask, zip the halves to
2433724338
// create 8x 16-bit values, and the perform the vector reduce.
2433824339
for (unsigned Half = 0; Half < 2; ++Half) {
24339-
for (unsigned MaskBit = 1; MaskBit <= 128; MaskBit *= 2) {
24340+
for (unsigned I = 0; I < 8; ++I) {
24341+
// On big-endian targets, the lane order in sub-byte vector elements
24342+
// gets reversed, so we need to flip the bit index.
24343+
unsigned MaskBit = IsLE ? (1u << I) : (1u << (7 - I));
2434024344
MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i32));
2434124345
}
2434224346
}
@@ -24354,8 +24358,9 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2435424358
}
2435524359

2435624360
// All other vector sizes.
24357-
unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
24358-
for (unsigned MaskBit = 1; MaskBit <= MaxBitMask; MaskBit *= 2) {
24361+
unsigned NumEl = VecVT.getVectorNumElements();
24362+
for (unsigned I = 0; I < NumEl; ++I) {
24363+
unsigned MaskBit = IsLE ? (1u << I) : (1u << (NumEl - 1 - I));
2435924364
MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i64));
2436024365
}
2436124366

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; RUN: llc -O3 -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK-LE
2+
; RUN: llc -O3 -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK-BE
3+
4+
define i16 @convert_to_bitmask16(<16 x i8> %vec) {
5+
%cmp_result = icmp ne <16 x i8> %vec, zeroinitializer
6+
%bitmask = bitcast <16 x i1> %cmp_result to i16
7+
ret i16 %bitmask
8+
}
9+
10+
define i16 @convert_to_bitmask8(<8 x i16> %vec) {
11+
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
12+
%bitmask = bitcast <8 x i1> %cmp_result to i8
13+
%extended_bitmask = zext i8 %bitmask to i16
14+
ret i16 %extended_bitmask
15+
}
16+
17+
; Little endian
18+
19+
; CHECK-LE-LABEL: .LCPI0_0:
20+
; CHECK-LE-NEXT: .byte 1
21+
; CHECK-LE-NEXT: .byte 2
22+
; CHECK-LE-NEXT: .byte 4
23+
; CHECK-LE-NEXT: .byte 8
24+
; CHECK-LE-NEXT: .byte 16
25+
; CHECK-LE-NEXT: .byte 32
26+
; CHECK-LE-NEXT: .byte 64
27+
; CHECK-LE-NEXT: .byte 128
28+
; CHECK-LE-NEXT: .byte 1
29+
; CHECK-LE-NEXT: .byte 2
30+
; CHECK-LE-NEXT: .byte 4
31+
; CHECK-LE-NEXT: .byte 8
32+
; CHECK-LE-NEXT: .byte 16
33+
; CHECK-LE-NEXT: .byte 32
34+
; CHECK-LE-NEXT: .byte 64
35+
; CHECK-LE-NEXT: .byte 128
36+
37+
; CHECK-LE-LABEL: .LCPI1_0:
38+
; CHECK-LE-NEXT: .hword 1
39+
; CHECK-LE-NEXT: .hword 2
40+
; CHECK-LE-NEXT: .hword 4
41+
; CHECK-LE-NEXT: .hword 8
42+
; CHECK-LE-NEXT: .hword 16
43+
; CHECK-LE-NEXT: .hword 32
44+
; CHECK-LE-NEXT: .hword 64
45+
; CHECK-LE-NEXT: .hword 128
46+
47+
; Big endian
48+
49+
; CHECK-BE-LABEL: .LCPI0_0:
50+
; CHECK-BE-NEXT: .byte 128
51+
; CHECK-BE-NEXT: .byte 64
52+
; CHECK-BE-NEXT: .byte 32
53+
; CHECK-BE-NEXT: .byte 16
54+
; CHECK-BE-NEXT: .byte 8
55+
; CHECK-BE-NEXT: .byte 4
56+
; CHECK-BE-NEXT: .byte 2
57+
; CHECK-BE-NEXT: .byte 1
58+
; CHECK-BE-NEXT: .byte 128
59+
; CHECK-BE-NEXT: .byte 64
60+
; CHECK-BE-NEXT: .byte 32
61+
; CHECK-BE-NEXT: .byte 16
62+
; CHECK-BE-NEXT: .byte 8
63+
; CHECK-BE-NEXT: .byte 4
64+
; CHECK-BE-NEXT: .byte 2
65+
; CHECK-BE-NEXT: .byte 1
66+
67+
; CHECK-BE-LABEL: .LCPI1_0:
68+
; CHECK-BE-NEXT: .hword 128
69+
; CHECK-BE-NEXT: .hword 64
70+
; CHECK-BE-NEXT: .hword 32
71+
; CHECK-BE-NEXT: .hword 16
72+
; CHECK-BE-NEXT: .hword 8
73+
; CHECK-BE-NEXT: .hword 4
74+
; CHECK-BE-NEXT: .hword 2
75+
; CHECK-BE-NEXT: .hword 1

0 commit comments

Comments
 (0)