-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoongArch] lower SCALAR_TO_VECTOR to INSERT_VECTOR_ELT #122863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) Changesdefine <16 x i8> @<!-- -->scalar_to_16xi8(i8 %val) {
%ret = insertelement <16 x i8> undef, i8 %val, i32 0
ret <16 x i8> %ret
}before addi.d $sp, $sp, -16
st.b $a0, $sp, 0
vld $vr0, $sp, 0
addi.d $sp, $sp, 16
retafter vinsgr2vr.b $vr0, $a0, 0
retFull diff: https://github.com/llvm/llvm-project/pull/122863.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 96e6f71344a787..af8566680b2a07 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -255,6 +255,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -311,6 +312,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -446,10 +448,26 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BITREVERSE:
return lowerBITREVERSE(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return lowerSCALAR_TO_VECTOR(Op, DAG);
}
return SDValue();
}
+SDValue
+LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT OpVT = Op.getSimpleValueType();
+
+ SDValue Vector = DAG.getUNDEF(OpVT);
+ SDValue Val = Op.getOperand(0);
+ SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
+
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
+ return Vector;
+}
+
SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
SelectionDAG &DAG) const {
EVT ResTy = Op->getValueType(0);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index e619cb69f33325..a14d5d49ee9d1b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -336,6 +336,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
new file mode 100644
index 00000000000000..9020db76738f6a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+; Test scalar_to_vector expansion.
+
+define <32 x i8> @scalar_to_32xi8(i8 %val) {
+; CHECK-LABEL: scalar_to_32xi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <32 x i8> undef, i8 %val, i32 0
+ ret <32 x i8> %ret
+}
+
+define <16 x i16> @scalar_to_16xi16(i16 %val) {
+; CHECK-LABEL: scalar_to_16xi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <16 x i16> undef, i16 %val, i32 0
+ ret <16 x i16> %ret
+}
+
+define <8 x i32> @scalar_to_8xi32(i32 %val) {
+; CHECK-LABEL: scalar_to_8xi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <8 x i32> undef, i32 %val, i32 0
+ ret <8 x i32> %ret
+}
+
+define <4 x i64> @scalar_to_4xi64(i64 %val) {
+; CHECK-LABEL: scalar_to_4xi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x i64> undef, i64 %val, i32 0
+ ret <4 x i64> %ret
+}
+
+define <8 x float> @scalar_to_8xf32(float %val) {
+; CHECK-LABEL: scalar_to_8xf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.s $a0, $fa0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <8 x float> undef, float %val, i32 0
+ ret <8 x float> %ret
+}
+
+define <4 x double> @scalar_to_4xf64(double %val) {
+; CHECK-LABEL: scalar_to_4xf64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.d $a0, $fa0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x double> undef, double %val, i32 0
+ ret <4 x double> %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll
new file mode 100644
index 00000000000000..4a9471bbf552b0
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+; Test scalar_to_vector expansion.
+
+define <16 x i8> @scalar_to_16xi8(i8 %val) {
+; CHECK-LABEL: scalar_to_16xi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 0
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @scalar_to_8xi16(i16 %val) {
+; CHECK-LABEL: scalar_to_8xi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 0
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @scalar_to_4xi32(i32 %val) {
+; CHECK-LABEL: scalar_to_4xi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 0
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @scalar_to_2xi64(i64 %val) {
+; CHECK-LABEL: scalar_to_2xi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <2 x i64> undef, i64 %val, i32 0
+ ret <2 x i64> %ret
+}
+
+define <4 x float> @scalar_to_4xf32(float %val) {
+; CHECK-LABEL: scalar_to_4xf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.s $a0, $fa0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <4 x float> undef, float %val, i32 0
+ ret <4 x float> %ret
+}
+
+define <2 x double> @scalar_to_2xf64(double %val) {
+; CHECK-LABEL: scalar_to_2xf64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movfr2gr.d $a0, $fa0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %ret = insertelement <2 x double> undef, double %val, i32 0
+ ret <2 x double> %ret
+}
|
|
✅ With the latest revision this PR passed the undef deprecator. |
2b3fd87 to
7e36c8d
Compare
| setOperationAction(ISD::SETCC, VT, Legal); | ||
| setOperationAction(ISD::VSELECT, VT, Legal); | ||
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not make it Legal and define patterns in .td files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not make it
Legaland define patterns in .td files.
Hi @SixWeining, I thought that in .td files, we can only lower operations to machine instructions. However, if this is done during ISel lowering, it gets canonicalized into a different standard SDNode. Could there be potential benefits if we incorporate some DAGCombine patterns in this approach?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not make it
Legaland define patterns in .td files.
def : Pat<(v16i8 (scalar_to_vector GRLenVT:$rj)),
(VINSGR2VR_B ?, GRLenVT:$rj, 0)>;
We cannot replace scalar_to_vector with the VINSGR2VR instruction because of the unknown ?.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I agree.
For the test case in the commit message, we do too many transforms: insert_vector_elt -> BUILD_VECTOR -> scalar_to_vector -> insert_vector_elt. Could we do: insert_vector_elt -> BUILD_VECTOR -> insert_vector_elt ?
Maybe we should change: LoongArchTargetLowering::lowerBUILD_VECTOR() or SelectionDAGLegalize::ExpandBUILD_VECTOR().
SixWeining
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which one should we use: movgr2fr or vinsgr2vr?
| ret <2 x i64> %ret | ||
| } | ||
|
|
||
| define <4 x float> @scalar_to_4xf32(float %val) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems these could be empty because FR overlap with the lower part of the SIMD register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vector_insert in .td files cannot deal with this.
We could make v4f32, v2f64 Legal, and process scalar_to_vector in .td files like this,
def : Pat<(v4f32 (scalar_to_vector FPR32:$fj)),
(SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32)>;
def : Pat<(v2f64 (scalar_to_vector FPR64:$fj)),
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64)>;
SixWeining
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Beyond this PR: If the insertion index is non-zero and the vector is undef or poison, stack store and load could also be replaced with vinsgr2vr.
Yes |
Co-authored-by: Lu Weining <[email protected]>
Co-authored-by: Lu Weining <[email protected]>
SixWeining
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Let's handle the non-zero case in a separate PR.
before
after