From 7fb3d04e4c1cccb03f428e01d516466ef95a9e58 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 29 Nov 2024 14:43:30 +0000 Subject: [PATCH 1/5] Add test showing bug --- llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll new file mode 100644 index 0000000000000..5de8bf477e53e --- /dev/null +++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s + +define i32 @test(i64 %arg1) "target-features"="+neon" { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: bx lr +entry: + %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0 + %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer + %cmp_vec = icmp ule <8 x i64> , %splat_zero + %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0 + %ext = zext i1 %first_cmp to i32 + ret i32 %ext +} From 700a90545d68d7f4e40ccee5de67b2f7c7c85d9e Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 29 Nov 2024 14:45:33 +0000 Subject: [PATCH 2/5] [DAGISel][ARM] Fix vector truncate combine for big-endian This DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++++- .../test/CodeGen/ARM/big-endian-vector-trunc.ll | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 521829675ae7c..90aa3009fb5ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15495,12 +15495,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { unsigned BuildVecNumElts = BuildVect.getNumOperands(); unsigned TruncVecNumElts = VT.getVectorNumElements(); unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + unsigned FirstElt = + DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0; assert((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"); SmallVector Opnds; - for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + for (unsigned i = FirstElt, e = BuildVecNumElts; i < e; + i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); return DAG.getBuildVector(VT, DL, Opnds); diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll index 5de8bf477e53e..cdc09754d2654 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll @@ -4,8 +4,23 @@ define i32 @test(i64 %arg1) "target-features"="+neon" { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: subs r1, r1, #1 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: sbcs r0, r0, #0 +; CHECK-NEXT: vldr s0, .LCPI0_0 +; CHECK-NEXT: movwhs r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vmov s1, r2 +; CHECK-NEXT: vmovn.i32 d16, q0 +; CHECK-NEXT: vmovn.i16 d16, q8 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 0xffffffff @ float NaN entry: %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0 %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer From 804db3d27fbdbb45241c6e8179ecc97ab62d6cf1 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 29 Nov 2024 15:42:39 +0000 Subject: [PATCH 3/5] Remove unneeded target-features attribute --- llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll index cdc09754d2654..15a4f2e37ca46 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s -define i32 @test(i64 %arg1) "target-features"="+neon" { +define i32 @test(i64 %arg1) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: subs r1, r1, #1 From fc2f59abee13242be5a1970298d101394a8a54fd Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Tue, 3 Dec 2024 09:26:35 +0000 Subject: [PATCH 4/5] Also test little-endian --- .../CodeGen/ARM/big-endian-vector-trunc.ll | 31 ----------- llvm/test/CodeGen/ARM/vector-trunc.ll | 52 +++++++++++++++++++ 2 files changed, 52 insertions(+), 31 deletions(-) delete mode 100644 llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll create mode 100644 llvm/test/CodeGen/ARM/vector-trunc.ll diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll deleted file mode 100644 index 15a4f2e37ca46..0000000000000 --- a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll +++ /dev/null @@ -1,31 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s - -define i32 @test(i64 %arg1) { -; CHECK-LABEL: test: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r1, r1, #1 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: sbcs r0, r0, #0 -; CHECK-NEXT: vldr s0, .LCPI0_0 -; CHECK-NEXT: movwhs r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vmov s1, r2 -; CHECK-NEXT: vmovn.i32 d16, q0 -; CHECK-NEXT: vmovn.i16 d16, q8 -; CHECK-NEXT: vmov.u8 r0, d16[0] -; CHECK-NEXT: and r0, r0, #1 -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI0_0: -; CHECK-NEXT: .long 0xffffffff @ float NaN -entry: - %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0 - %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer - %cmp_vec = icmp ule <8 x i64> , %splat_zero - %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0 - %ext = zext i1 %first_cmp to i32 - ret i32 %ext -} diff --git a/llvm/test/CodeGen/ARM/vector-trunc.ll b/llvm/test/CodeGen/ARM/vector-trunc.ll new file mode 100644 index 0000000000000..9acf463c2be93 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vector-trunc.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=LE +; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=BE + +define i32 @test(i64 %arg1) { +; LE-LABEL: test: +; LE: @ %bb.0: @ %entry +; LE-NEXT: subs r0, r0, #1 +; LE-NEXT: mov r2, #0 +; LE-NEXT: sbcs r0, r1, #0 +; LE-NEXT: vldr s0, .LCPI0_0 +; LE-NEXT: movwhs r2, #1 +; LE-NEXT: cmp r2, #0 +; LE-NEXT: mvnne r2, #0 +; LE-NEXT: vmov s1, r2 +; LE-NEXT: vmovn.i32 d16, q0 +; LE-NEXT: vmovn.i16 d16, q8 +; LE-NEXT: vmov.u8 r0, d16[0] +; LE-NEXT: and r0, r0, #1 +; LE-NEXT: bx lr +; LE-NEXT: .p2align 2 +; LE-NEXT: @ %bb.1: +; LE-NEXT: .LCPI0_0: +; LE-NEXT: .long 0xffffffff @ float NaN +; +; BE-LABEL: test: +; BE: @ %bb.0: @ %entry +; BE-NEXT: subs r1, r1, #1 +; BE-NEXT: mov r2, #0 +; BE-NEXT: sbcs r0, r0, #0 +; BE-NEXT: vldr s0, .LCPI0_0 +; BE-NEXT: movwhs r2, #1 +; BE-NEXT: cmp r2, #0 +; BE-NEXT: mvnne r2, #0 +; BE-NEXT: vmov s1, r2 +; BE-NEXT: vmovn.i32 d16, q0 +; BE-NEXT: vmovn.i16 d16, q8 +; BE-NEXT: vmov.u8 r0, d16[0] +; BE-NEXT: and r0, r0, #1 +; BE-NEXT: bx lr +; BE-NEXT: .p2align 2 +; BE-NEXT: @ %bb.1: +; BE-NEXT: .LCPI0_0: +; BE-NEXT: .long 0xffffffff @ float NaN +entry: + %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0 + %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer + %cmp_vec = icmp ule <8 x i64> , %splat_zero + %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0 + %ext = zext i1 %first_cmp to i32 + ret i32 %ext +} From 3e7564bf197a62de51d73de4a0814e2e3be418d9 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Wed, 4 Dec 2024 13:50:48 +0000 Subject: [PATCH 5/5] Use existing isLE variable --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 37be089096ebb..4435d5ff50c72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15495,8 +15495,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { unsigned BuildVecNumElts = BuildVect.getNumOperands(); unsigned TruncVecNumElts = VT.getVectorNumElements(); unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; - unsigned FirstElt = - DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0; + unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1); assert((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements");