Skip to content

Commit 8553bd2

Browse files
authored
[LoongArch] Override shouldScalarizeBinop to enable extract(binop)->binop(extract) combination (#159726)
1 parent 9440f40 commit 8553bd2

File tree

4 files changed

+67
-72
lines changed

4 files changed

+67
-72
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9420,3 +9420,22 @@ bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
94209420
return TargetLowering::SimplifyDemandedBitsForTargetNode(
94219421
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
94229422
}
9423+
9424+
bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
9425+
unsigned Opc = VecOp.getOpcode();
9426+
9427+
// Assume target opcodes can't be scalarized.
9428+
// TODO - do we have any exceptions?
9429+
if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9430+
return false;
9431+
9432+
// If the vector op is not supported, try to convert to scalar.
9433+
EVT VecVT = VecOp.getValueType();
9434+
if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
9435+
return true;
9436+
9437+
// If the vector op is supported, but the scalar op is not, the transform may
9438+
// not be worthwhile.
9439+
EVT ScalarVT = VecVT.getScalarType();
9440+
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9441+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ class LoongArchTargetLowering : public TargetLowering {
337337
TargetLoweringOpt &TLO,
338338
unsigned Depth) const override;
339339

340+
bool shouldScalarizeBinop(SDValue VecOp) const override;
341+
340342
private:
341343
/// Target-specific function used to lower LoongArch calling conventions.
342344
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,18 @@ entry:
3131
}
3232

3333
define i32 @extractelt_add_v8i32(ptr %p) {
34-
; CHECK-LABEL: extractelt_add_v8i32:
35-
; CHECK: # %bb.0: # %entry
36-
; CHECK-NEXT: xvld $xr0, $a0, 0
37-
; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 13
38-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
39-
; CHECK-NEXT: ret
34+
; LA32-LABEL: extractelt_add_v8i32:
35+
; LA32: # %bb.0: # %entry
36+
; LA32-NEXT: ld.w $a0, $a0, 8
37+
; LA32-NEXT: addi.w $a0, $a0, 13
38+
; LA32-NEXT: ret
39+
;
40+
; LA64-LABEL: extractelt_add_v8i32:
41+
; LA64: # %bb.0: # %entry
42+
; LA64-NEXT: xvld $xr0, $a0, 0
43+
; LA64-NEXT: xvaddi.wu $xr0, $xr0, 13
44+
; LA64-NEXT: xvpickve2gr.w $a0, $xr0, 2
45+
; LA64-NEXT: ret
4046
entry:
4147
%x = load <8 x i32>, ptr %p
4248
%add = add <8 x i32> %x, <i32 11, i32 12, i32 13, i32 14, i32 11, i32 12, i32 13, i32 14>
@@ -55,9 +61,8 @@ define i64 @extractelt_add_v4i64(ptr %p) {
5561
;
5662
; LA64-LABEL: extractelt_add_v4i64:
5763
; LA64: # %bb.0: # %entry
58-
; LA64-NEXT: xvld $xr0, $a0, 0
59-
; LA64-NEXT: xvaddi.du $xr0, $xr0, 12
60-
; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
64+
; LA64-NEXT: ld.d $a0, $a0, 8
65+
; LA64-NEXT: addi.d $a0, $a0, 12
6166
; LA64-NEXT: ret
6267
entry:
6368
%x = load <4 x i64>, ptr %p
@@ -69,12 +74,9 @@ entry:
6974
define float @extractelt_fadd_v8f32(ptr %p) {
7075
; CHECK-LABEL: extractelt_fadd_v8f32:
7176
; CHECK: # %bb.0: # %entry
72-
; CHECK-NEXT: xvld $xr0, $a0, 0
73-
; CHECK-NEXT: lu12i.w $a0, 267520
74-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
75-
; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1
76-
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2
77-
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
77+
; CHECK-NEXT: fld.s $fa0, $a0, 8
78+
; CHECK-NEXT: vldi $vr1, -1238
79+
; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1
7880
; CHECK-NEXT: ret
7981
entry:
8082
%x = load <8 x float>, ptr %p
@@ -84,27 +86,12 @@ entry:
8486
}
8587

8688
define double @extractelt_fadd_v4f64(ptr %p) {
87-
; LA32-LABEL: extractelt_fadd_v4f64:
88-
; LA32: # %bb.0: # %entry
89-
; LA32-NEXT: xvld $xr0, $a0, 0
90-
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
91-
; LA32-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
92-
; LA32-NEXT: xvfadd.d $xr0, $xr0, $xr1
93-
; LA32-NEXT: xvpickve.d $xr0, $xr0, 1
94-
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
95-
; LA32-NEXT: ret
96-
;
97-
; LA64-LABEL: extractelt_fadd_v4f64:
98-
; LA64: # %bb.0: # %entry
99-
; LA64-NEXT: xvld $xr0, $a0, 0
100-
; LA64-NEXT: ori $a0, $zero, 0
101-
; LA64-NEXT: lu32i.d $a0, -524288
102-
; LA64-NEXT: lu52i.d $a0, $a0, 1026
103-
; LA64-NEXT: xvreplgr2vr.d $xr1, $a0
104-
; LA64-NEXT: xvfadd.d $xr0, $xr0, $xr1
105-
; LA64-NEXT: xvpickve.d $xr0, $xr0, 1
106-
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
107-
; LA64-NEXT: ret
89+
; CHECK-LABEL: extractelt_fadd_v4f64:
90+
; CHECK: # %bb.0: # %entry
91+
; CHECK-NEXT: fld.d $fa0, $a0, 8
92+
; CHECK-NEXT: vldi $vr1, -984
93+
; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1
94+
; CHECK-NEXT: ret
10895
entry:
10996
%x = load <4 x double>, ptr %p
11097
%add = fadd <4 x double> %x, <double 11.0, double 12.0, double 13.0, double 14.0>

llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,18 @@ entry:
3131
}
3232

3333
define i32 @extractelt_add_v4i32(ptr %p) {
34-
; CHECK-LABEL: extractelt_add_v4i32:
35-
; CHECK: # %bb.0: # %entry
36-
; CHECK-NEXT: vld $vr0, $a0, 0
37-
; CHECK-NEXT: vaddi.wu $vr0, $vr0, 13
38-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
39-
; CHECK-NEXT: ret
34+
; LA32-LABEL: extractelt_add_v4i32:
35+
; LA32: # %bb.0: # %entry
36+
; LA32-NEXT: ld.w $a0, $a0, 8
37+
; LA32-NEXT: addi.w $a0, $a0, 13
38+
; LA32-NEXT: ret
39+
;
40+
; LA64-LABEL: extractelt_add_v4i32:
41+
; LA64: # %bb.0: # %entry
42+
; LA64-NEXT: vld $vr0, $a0, 0
43+
; LA64-NEXT: vaddi.wu $vr0, $vr0, 13
44+
; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2
45+
; LA64-NEXT: ret
4046
entry:
4147
%x = load <4 x i32>, ptr %p
4248
%add = add <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
@@ -55,9 +61,8 @@ define i64 @extractelt_add_v2i64(ptr %p) {
5561
;
5662
; LA64-LABEL: extractelt_add_v2i64:
5763
; LA64: # %bb.0: # %entry
58-
; LA64-NEXT: vld $vr0, $a0, 0
59-
; LA64-NEXT: vaddi.du $vr0, $vr0, 12
60-
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1
64+
; LA64-NEXT: ld.d $a0, $a0, 8
65+
; LA64-NEXT: addi.d $a0, $a0, 12
6166
; LA64-NEXT: ret
6267
entry:
6368
%x = load <2 x i64>, ptr %p
@@ -69,12 +74,9 @@ entry:
6974
define float @extractelt_fadd_v4f32(ptr %p) {
7075
; CHECK-LABEL: extractelt_fadd_v4f32:
7176
; CHECK: # %bb.0: # %entry
72-
; CHECK-NEXT: vld $vr0, $a0, 0
73-
; CHECK-NEXT: lu12i.w $a0, 267520
74-
; CHECK-NEXT: vreplgr2vr.w $vr1, $a0
75-
; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1
76-
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2
77-
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
77+
; CHECK-NEXT: fld.s $fa0, $a0, 8
78+
; CHECK-NEXT: vldi $vr1, -1238
79+
; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1
7880
; CHECK-NEXT: ret
7981
entry:
8082
%x = load <4 x float>, ptr %p
@@ -84,27 +86,12 @@ entry:
8486
}
8587

8688
define double @extractelt_fadd_v2f64(ptr %p) {
87-
; LA32-LABEL: extractelt_fadd_v2f64:
88-
; LA32: # %bb.0: # %entry
89-
; LA32-NEXT: vld $vr0, $a0, 0
90-
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
91-
; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
92-
; LA32-NEXT: vfadd.d $vr0, $vr0, $vr1
93-
; LA32-NEXT: vreplvei.d $vr0, $vr0, 1
94-
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
95-
; LA32-NEXT: ret
96-
;
97-
; LA64-LABEL: extractelt_fadd_v2f64:
98-
; LA64: # %bb.0: # %entry
99-
; LA64-NEXT: vld $vr0, $a0, 0
100-
; LA64-NEXT: ori $a0, $zero, 0
101-
; LA64-NEXT: lu32i.d $a0, -524288
102-
; LA64-NEXT: lu52i.d $a0, $a0, 1026
103-
; LA64-NEXT: vreplgr2vr.d $vr1, $a0
104-
; LA64-NEXT: vfadd.d $vr0, $vr0, $vr1
105-
; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
106-
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
107-
; LA64-NEXT: ret
89+
; CHECK-LABEL: extractelt_fadd_v2f64:
90+
; CHECK: # %bb.0: # %entry
91+
; CHECK-NEXT: fld.d $fa0, $a0, 8
92+
; CHECK-NEXT: vldi $vr1, -984
93+
; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1
94+
; CHECK-NEXT: ret
10895
entry:
10996
%x = load <2 x double>, ptr %p
11097
%add = fadd <2 x double> %x, <double 11.0, double 12.0>

0 commit comments

Comments
 (0)