Skip to content

Commit 700a905

Browse files
committed
[DAGISel][ARM] Fix vector truncate combine for big-endian
This DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian.
1 parent 7fb3d04 commit 700a905

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15495,12 +15495,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1549515495
unsigned BuildVecNumElts = BuildVect.getNumOperands();
1549615496
unsigned TruncVecNumElts = VT.getVectorNumElements();
1549715497
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15498+
unsigned FirstElt =
15499+
DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0;
1549815500

1549915501
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
1550015502
"Invalid number of elements");
1550115503

1550215504
SmallVector<SDValue, 8> Opnds;
15503-
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15505+
for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
15506+
i += TruncEltOffset)
1550415507
Opnds.push_back(BuildVect.getOperand(i));
1550515508

1550615509
return DAG.getBuildVector(VT, DL, Opnds);

llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,23 @@
44
define i32 @test(i64 %arg1) "target-features"="+neon" {
55
; CHECK-LABEL: test:
66
; CHECK: @ %bb.0: @ %entry
7-
; CHECK-NEXT: mov r0, #0
7+
; CHECK-NEXT: subs r1, r1, #1
8+
; CHECK-NEXT: mov r2, #0
9+
; CHECK-NEXT: sbcs r0, r0, #0
10+
; CHECK-NEXT: vldr s0, .LCPI0_0
11+
; CHECK-NEXT: movwhs r2, #1
12+
; CHECK-NEXT: cmp r2, #0
13+
; CHECK-NEXT: mvnne r2, #0
14+
; CHECK-NEXT: vmov s1, r2
15+
; CHECK-NEXT: vmovn.i32 d16, q0
16+
; CHECK-NEXT: vmovn.i16 d16, q8
17+
; CHECK-NEXT: vmov.u8 r0, d16[0]
18+
; CHECK-NEXT: and r0, r0, #1
819
; CHECK-NEXT: bx lr
20+
; CHECK-NEXT: .p2align 2
21+
; CHECK-NEXT: @ %bb.1:
22+
; CHECK-NEXT: .LCPI0_0:
23+
; CHECK-NEXT: .long 0xffffffff @ float NaN
924
entry:
1025
%insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
1126
%splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer

0 commit comments

Comments
 (0)