From 644ad38a0ea3347d7b1fe23df1405d55588a9a02 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 15 Apr 2025 12:41:28 -0700 Subject: [PATCH] [DAGCombiner] Disable narrowExtractedVectorLoad for indexed loads. The later code does not expect or preserve the index output. Fixes #135821 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/AArch64/pr135821.ll | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/pr135821.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b322fe670d4a7..d72be359867ca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25183,7 +25183,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); auto *Ld = dyn_cast(Extract->getOperand(0)); - if (!Ld || Ld->getExtensionType() || !Ld->isSimple()) + if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple()) return SDValue(); // Allow targets to opt-out. diff --git a/llvm/test/CodeGen/AArch64/pr135821.ll b/llvm/test/CodeGen/AArch64/pr135821.ll new file mode 100644 index 0000000000000..cfd6cd086e130 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr135821.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s + +define <4 x float> @f(ptr %0) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: ldr q1, [x0, #56]! +; CHECK-NEXT: ldr d0, [x0, #16] +; CHECK-NEXT: mov v1.d[1], v0.d[0] +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %2 = getelementptr inbounds nuw i8, ptr %0, i64 56 + %3 = load <6 x float>, ptr %2, align 4 + %4 = shufflevector <6 x float> %3, <6 x float> poison, <4 x i32> + tail call void @use(ptr %2) + ret <4 x float> %4 +} + +declare void @use(ptr)