From dca42a811200e7dbb7ced5fa898456a2b3a24bd3 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Wed, 10 Sep 2025 17:52:19 +0800 Subject: [PATCH] [LoongArch] Override cost hooks to expose more DAG combine opportunities --- .../LoongArch/LoongArchISelLowering.cpp | 17 +++++++++++++ .../Target/LoongArch/LoongArchISelLowering.h | 3 +++ .../CodeGen/LoongArch/lasx/scalarize-fp.ll | 24 +++++++------------ .../CodeGen/LoongArch/lsx/scalarize-fp.ll | 17 +++++-------- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ecd003cae3263..098bcfa67d1d3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -9559,3 +9559,20 @@ bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { EVT ScalarVT = VecVT.getScalarType(); return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); } + +bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const { + if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) + return false; + + // Extract a 128-bit subvector from index 0 of a 256-bit vector is free. + return Index == 0; +} + +bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT, + unsigned Index) const { + EVT EltVT = VT.getScalarType(); + + // Extract a scalar FP value from index 0 of a vector is free. + return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 3c00296116ac2..9b60a9fd53726 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -338,6 +338,9 @@ class LoongArchTargetLowering : public TargetLowering { unsigned Depth) const override; bool shouldScalarizeBinop(SDValue VecOp) const override; + bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const override; + bool isExtractVecEltCheap(EVT VT, unsigned Index) const override; /// Check if a constant splat can be generated using [x]vldi, where imm[12] /// is 1. diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll b/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll index c93a6582b9c69..39ac647d6875c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll @@ -5,9 +5,8 @@ define <8 x float> @fadd_elt0_v8f32(float %a) nounwind { ; CHECK-LABEL: fadd_elt0_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 -; CHECK-NEXT: xvldi $xr1, -1424 -; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CHECK-NEXT: vldi $vr1, -1168 +; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1 ; CHECK-NEXT: ret entry: %b = insertelement <8 x float> poison, float %a, i32 0 @@ -18,9 +17,8 @@ entry: define <4 x double> @fadd_elt0_v4f64(double %a) nounwind { ; CHECK-LABEL: fadd_elt0_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvldi $xr1, -912 -; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: vldi $vr1, -912 +; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1 ; CHECK-NEXT: ret entry: %b = insertelement <4 x double> poison, double %a, i32 0 @@ -31,11 +29,8 @@ entry: define <8 x float> @fsub_splat_v8f32(float %a, float %b) nounwind { ; CHECK-LABEL: fsub_splat_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 +; CHECK-NEXT: fsub.s $fa0, $fa0, $fa1 +; CHECK-NEXT: xvreplve0.w $xr0, $xr0 ; CHECK-NEXT: ret entry: %insa = insertelement <8 x float> poison, float %a, i32 0 @@ -49,10 +44,9 @@ entry: define <4 x double> @fsub_splat_v4f64(double %a) nounwind { ; CHECK-LABEL: fsub_splat_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; CHECK-NEXT: xvldi $xr1, -784 -; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 0 +; CHECK-NEXT: vldi $vr1, -784 +; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1 +; CHECK-NEXT: xvreplve0.d $xr0, $xr0 ; CHECK-NEXT: ret entry: %insa = insertelement <4 x double> poison, double %a, i32 0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll b/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll index cc2d3d818b412..b651f11596c82 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll @@ -5,9 +5,8 @@ define <4 x float> @fadd_elt0_v4f32(float %a) nounwind { ; CHECK-LABEL: fadd_elt0_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vldi $vr1, -1424 -; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, -1168 +; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1 ; CHECK-NEXT: ret entry: %b = insertelement <4 x float> poison, float %a, i32 0 @@ -18,9 +17,8 @@ entry: define <2 x double> @fadd_elt0_v2f64(double %a) nounwind { ; CHECK-LABEL: fadd_elt0_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 ; CHECK-NEXT: vldi $vr1, -912 -; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1 ; CHECK-NEXT: ret entry: %b = insertelement <2 x double> poison, double %a, i32 0 @@ -31,9 +29,8 @@ entry: define <4 x float> @fsub_splat_v4f32(float %b) nounwind { ; CHECK-LABEL: fsub_splat_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vldi $vr1, -1424 -; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vldi $vr1, -1168 +; CHECK-NEXT: fsub.s $fa0, $fa1, $fa0 ; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: ret entry: @@ -48,9 +45,7 @@ entry: define <2 x double> @fsub_splat_v2f64(double %a, double %b) nounwind { ; CHECK-LABEL: fsub_splat_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: fsub.d $fa0, $fa0, $fa1 ; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: ret entry: