From d2f7fba049f5191736783035a658e380e2c4755b Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Fri, 26 Sep 2025 10:40:26 +0000 Subject: [PATCH 1/2] [AArch64][GlobalISel] Prevented llvm.aarch64.neon.saddlp/uaddlp fallback Prevented fallback on G_SADDLP/G_UADDLP instructions that return one-element i64 vectors, caused due to incorrect Register Bank Selection. --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 ++++ llvm/test/CodeGen/AArch64/arm64-vadd.ll | 5 +---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index f90bcc7a77cdf..830a35bbeb494 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -590,6 +590,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, unsigned Depth) const { switch (MI.getOpcode()) { case AArch64::G_DUP: + case AArch64::G_SADDLP: + case AArch64::G_UADDLP: case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: case TargetOpcode::G_EXTRACT_VECTOR_ELT: @@ -798,6 +800,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { if (Ty.isVector()) OpRegBankIdx[Idx] = PMI_FirstFPR; else if (isPreISelGenericFloatingPointOpcode(Opc) || + (MO.isDef() && onlyDefinesFP(MI, MRI, TRI)) || + (MO.isUse() && onlyUsesFP(MI, MRI, TRI)) || Ty.getSizeInBits() > 64) OpRegBankIdx[Idx] = PMI_FirstFPR; else diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll index 11fb73237da07..e3c80256feea0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for saddlp1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uaddlp1d +; RUN: llc < %s -mtriple=arm64-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @addhn8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: addhn8b: From 28b34fb762f2306873f61fbd49a02511548e613c Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Tue, 7 Oct 2025 09:25:17 +0000 Subject: [PATCH 2/2] [AArch64][GlobalISel] Added mir test for saddlp/uaddlp intrinsics --- .../AArch64/arm64-saddlp1d-uaddlp1d.mir | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir diff --git a/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir new file mode 100644 index 0000000000000..074f75ab8a5f0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: saddlp1d +legalized: true +regBankSelected: false +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0 + + ; CHECK-LABEL: name: saddlp1d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) + ; CHECK-NEXT: [[SADDLP:%[0-9]+]]:fpr(s64) = G_SADDLP [[LOAD]] + ; CHECK-NEXT: $d0 = COPY [[SADDLP]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(p0) = COPY $x0 + %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) + %2:_(s64) = G_SADDLP %1 + $d0 = COPY %2(s64) + RET_ReallyLR implicit $d0 +... +--- +name: uaddlp1d +legalized: true +regBankSelected: false +failedISel: false +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0 + + ; CHECK-LABEL: name: uaddlp1d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) + ; CHECK-NEXT: [[UADDLP:%[0-9]+]]:fpr(s64) = G_UADDLP [[LOAD]] + ; CHECK-NEXT: $d0 = COPY [[UADDLP]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(p0) = COPY $x0 + %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) + %2:_(s64) = G_UADDLP %1 + $d0 = COPY %2(s64) + RET_ReallyLR implicit $d0 +...