Skip to content

Commit d516d07

Browse files
committed
[LoongArch] Override cost hooks to expose more DAG combine opportunities
1 parent ce10516 commit d516d07

File tree

4 files changed

+50
-70
lines changed

4 files changed

+50
-70
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9303,3 +9303,20 @@ bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
93039303
return TargetLowering::SimplifyDemandedBitsForTargetNode(
93049304
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
93059305
}
9306+
9307+
bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
9308+
unsigned Index) const {
9309+
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
9310+
return false;
9311+
9312+
// Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9313+
return Index == 0;
9314+
}
9315+
9316+
bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
9317+
unsigned Index) const {
9318+
EVT EltVT = VT.getScalarType();
9319+
9320+
// Extract a scalar FP value from index 0 of a vector is free.
9321+
return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9322+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ class LoongArchTargetLowering : public TargetLowering {
337337
TargetLoweringOpt &TLO,
338338
unsigned Depth) const override;
339339

340+
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
341+
unsigned Index) const override;
342+
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;
343+
340344
private:
341345
/// Target-specific function used to lower LoongArch calling conventions.
342346
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll

Lines changed: 17 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
44

55
define <8 x float> @fadd_elt0_v8f32(float %a) nounwind {
66
; CHECK-LABEL: fadd_elt0_v8f32:
77
; CHECK: # %bb.0: # %entry
8-
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
9-
; CHECK-NEXT: lu12i.w $a0, 260096
10-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
11-
; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1
8+
; CHECK-NEXT: vldi $vr1, -1168
9+
; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1
1210
; CHECK-NEXT: ret
1311
entry:
1412
%b = insertelement <8 x float> poison, float %a, i32 0
@@ -17,20 +15,11 @@ entry:
1715
}
1816

1917
define <4 x double> @fadd_elt0_v4f64(double %a) nounwind {
20-
; LA32-LABEL: fadd_elt0_v4f64:
21-
; LA32: # %bb.0: # %entry
22-
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
23-
; LA32-NEXT: vldi $vr1, -912
24-
; LA32-NEXT: xvfadd.d $xr0, $xr0, $xr1
25-
; LA32-NEXT: ret
26-
;
27-
; LA64-LABEL: fadd_elt0_v4f64:
28-
; LA64: # %bb.0: # %entry
29-
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
30-
; LA64-NEXT: lu52i.d $a0, $zero, 1023
31-
; LA64-NEXT: xvreplgr2vr.d $xr1, $a0
32-
; LA64-NEXT: xvfadd.d $xr0, $xr0, $xr1
33-
; LA64-NEXT: ret
18+
; CHECK-LABEL: fadd_elt0_v4f64:
19+
; CHECK: # %bb.0: # %entry
20+
; CHECK-NEXT: vldi $vr1, -912
21+
; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1
22+
; CHECK-NEXT: ret
3423
entry:
3524
%b = insertelement <4 x double> poison, double %a, i32 0
3625
%c = fadd <4 x double> %b, <double 1.0, double poison, double poison, double poison>
@@ -40,11 +29,8 @@ entry:
4029
define <8 x float> @fsub_splat_v8f32(float %a, float %b) nounwind {
4130
; CHECK-LABEL: fsub_splat_v8f32:
4231
; CHECK: # %bb.0: # %entry
43-
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
44-
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
45-
; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1
46-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
47-
; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0
32+
; CHECK-NEXT: fsub.s $fa0, $fa0, $fa1
33+
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
4834
; CHECK-NEXT: ret
4935
entry:
5036
%insa = insertelement <8 x float> poison, float %a, i32 0
@@ -56,24 +42,12 @@ entry:
5642
}
5743

5844
define <4 x double> @fsub_splat_v4f64(double %a) nounwind {
59-
; LA32-LABEL: fsub_splat_v4f64:
60-
; LA32: # %bb.0: # %entry
61-
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
62-
; LA32-NEXT: vldi $vr1, -784
63-
; LA32-NEXT: xvfadd.d $xr0, $xr0, $xr1
64-
; LA32-NEXT: xvpermi.d $xr0, $xr0, 68
65-
; LA32-NEXT: xvrepl128vei.d $xr0, $xr0, 0
66-
; LA32-NEXT: ret
67-
;
68-
; LA64-LABEL: fsub_splat_v4f64:
69-
; LA64: # %bb.0: # %entry
70-
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
71-
; LA64-NEXT: lu52i.d $a0, $zero, -1025
72-
; LA64-NEXT: xvreplgr2vr.d $xr1, $a0
73-
; LA64-NEXT: xvfadd.d $xr0, $xr0, $xr1
74-
; LA64-NEXT: xvpermi.d $xr0, $xr0, 68
75-
; LA64-NEXT: xvrepl128vei.d $xr0, $xr0, 0
76-
; LA64-NEXT: ret
45+
; CHECK-LABEL: fsub_splat_v4f64:
46+
; CHECK: # %bb.0: # %entry
47+
; CHECK-NEXT: vldi $vr1, -784
48+
; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1
49+
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
50+
; CHECK-NEXT: ret
7751
entry:
7852
%insa = insertelement <4 x double> poison, double %a, i32 0
7953
%insb = insertelement <4 x double> poison, double 1.0, i32 0

llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
44

55
define <4 x float> @fadd_elt0_v4f32(float %a) nounwind {
66
; CHECK-LABEL: fadd_elt0_v4f32:
77
; CHECK: # %bb.0: # %entry
8-
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
9-
; CHECK-NEXT: lu12i.w $a0, 260096
10-
; CHECK-NEXT: vreplgr2vr.w $vr1, $a0
11-
; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1
8+
; CHECK-NEXT: vldi $vr1, -1168
9+
; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1
1210
; CHECK-NEXT: ret
1311
entry:
1412
%b = insertelement <4 x float> poison, float %a, i32 0
@@ -17,20 +15,11 @@ entry:
1715
}
1816

1917
define <2 x double> @fadd_elt0_v2f64(double %a) nounwind {
20-
; LA32-LABEL: fadd_elt0_v2f64:
21-
; LA32: # %bb.0: # %entry
22-
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
23-
; LA32-NEXT: vldi $vr1, -912
24-
; LA32-NEXT: vfadd.d $vr0, $vr0, $vr1
25-
; LA32-NEXT: ret
26-
;
27-
; LA64-LABEL: fadd_elt0_v2f64:
28-
; LA64: # %bb.0: # %entry
29-
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
30-
; LA64-NEXT: lu52i.d $a0, $zero, 1023
31-
; LA64-NEXT: vreplgr2vr.d $vr1, $a0
32-
; LA64-NEXT: vfadd.d $vr0, $vr0, $vr1
33-
; LA64-NEXT: ret
18+
; CHECK-LABEL: fadd_elt0_v2f64:
19+
; CHECK: # %bb.0: # %entry
20+
; CHECK-NEXT: vldi $vr1, -912
21+
; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1
22+
; CHECK-NEXT: ret
3423
entry:
3524
%b = insertelement <2 x double> poison, double %a, i32 0
3625
%c = fadd <2 x double> %b, <double 1.0, double poison>
@@ -40,10 +29,8 @@ entry:
4029
define <4 x float> @fsub_splat_v4f32(float %b) nounwind {
4130
; CHECK-LABEL: fsub_splat_v4f32:
4231
; CHECK: # %bb.0: # %entry
43-
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
44-
; CHECK-NEXT: lu12i.w $a0, 260096
45-
; CHECK-NEXT: vreplgr2vr.w $vr1, $a0
46-
; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0
32+
; CHECK-NEXT: vldi $vr1, -1168
33+
; CHECK-NEXT: fsub.s $fa0, $fa1, $fa0
4734
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
4835
; CHECK-NEXT: ret
4936
entry:
@@ -58,9 +45,7 @@ entry:
5845
define <2 x double> @fsub_splat_v2f64(double %a, double %b) nounwind {
5946
; CHECK-LABEL: fsub_splat_v2f64:
6047
; CHECK: # %bb.0: # %entry
61-
; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1
62-
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
63-
; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1
48+
; CHECK-NEXT: fsub.d $fa0, $fa0, $fa1
6449
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
6550
; CHECK-NEXT: ret
6651
entry:

0 commit comments

Comments
 (0)