-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[DAGISel][ARM] Fix vector truncate combine for big-endian #118101
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian.
|
@llvm/pr-subscribers-backend-arm Author: Oliver Stannard (ostannard) ChangesThis DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian. Full diff: https://github.com/llvm/llvm-project/pull/118101.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 521829675ae7c3..90aa3009fb5ef0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15495,12 +15495,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
unsigned BuildVecNumElts = BuildVect.getNumOperands();
unsigned TruncVecNumElts = VT.getVectorNumElements();
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+ unsigned FirstElt =
+ DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0;
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
"Invalid number of elements");
SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
+ i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
return DAG.getBuildVector(VT, DL, Opnds);
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
new file mode 100644
index 00000000000000..cdc09754d2654c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s
+
+define i32 @test(i64 %arg1) "target-features"="+neon" {
+; CHECK-LABEL: test:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: subs r1, r1, #1
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: sbcs r0, r0, #0
+; CHECK-NEXT: vldr s0, .LCPI0_0
+; CHECK-NEXT: movwhs r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vmov s1, r2
+; CHECK-NEXT: vmovn.i32 d16, q0
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov.u8 r0, d16[0]
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 0xffffffff @ float NaN
+entry:
+ %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
+ %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
+ %cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
+ %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
+ %ext = zext i1 %first_cmp to i32
+ ret i32 %ext
+}
|
|
@llvm/pr-subscribers-llvm-selectiondag Author: Oliver Stannard (ostannard) ChangesThis DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian. Full diff: https://github.com/llvm/llvm-project/pull/118101.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 521829675ae7c3..90aa3009fb5ef0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15495,12 +15495,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
unsigned BuildVecNumElts = BuildVect.getNumOperands();
unsigned TruncVecNumElts = VT.getVectorNumElements();
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+ unsigned FirstElt =
+ DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0;
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
"Invalid number of elements");
SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
+ i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
return DAG.getBuildVector(VT, DL, Opnds);
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
new file mode 100644
index 00000000000000..cdc09754d2654c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s
+
+define i32 @test(i64 %arg1) "target-features"="+neon" {
+; CHECK-LABEL: test:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: subs r1, r1, #1
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: sbcs r0, r0, #0
+; CHECK-NEXT: vldr s0, .LCPI0_0
+; CHECK-NEXT: movwhs r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vmov s1, r2
+; CHECK-NEXT: vmovn.i32 d16, q0
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov.u8 r0, d16[0]
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 0xffffffff @ float NaN
+entry:
+ %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
+ %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
+ %cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
+ %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
+ %ext = zext i1 %first_cmp to i32
+ ret i32 %ext
+}
|
| ; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s | ||
|
|
||
| define i32 @test(i64 %arg1) "target-features"="+neon" { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The -mattr does nothing with target-features, remove one or the other
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one minor
| unsigned TruncVecNumElts = VT.getVectorNumElements(); | ||
| unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; | ||
| unsigned FirstElt = | ||
| DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
visitTRUNCATE already has isLE - replace this with:
unsigned FirstElt =isLE ? 0 : (TruncEltOffset - 1);
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
This DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian.