-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoongArch] Remove wrong vector shuffle lowering for lasx. #140688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) ChangesPR #137918 introduces a wrong lowering for v4f64/v4i64 to generate xvshuf4i.d instruction. Full diff: https://github.com/llvm/llvm-project/pull/140688.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 258891ae488da..ac54979a760c3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1027,8 +1027,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
SelectionDAG &DAG) {
unsigned SubVecSize = 4;
- if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
- VT == MVT::v4i64) {
+ if (VT == MVT::v2f64 || VT == MVT::v2i64) {
SubVecSize = 2;
}
@@ -1068,9 +1067,8 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
Imm |= M & 0x3;
}
- // Return vshuf4i.d and xvshuf4i.d
- if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
- VT == MVT::v4i64)
+ // Return vshuf4i.d
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
DAG.getConstant(Imm, DL, MVT::i64));
@@ -1458,6 +1456,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
+ // When the size is less than or equal to 4, lower cost instructions may be
+ // used.
+ if (Mask.size() <= 4)
+ return SDValue();
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
}
@@ -1839,9 +1841,6 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
return Result;
- if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
- (Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
- return Result;
if ((Result =
lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
return Result;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index ad5b49564f9cd..f2b22d0d64d6c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,7 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
-def SDT_LoongArchVShuf4i_D
+def SDT_LoongArchV2RUimm
: SDTypeProfile<1, 3,
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i64>]>;
@@ -57,8 +57,7 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
-def loongarch_vshuf4i_d
- : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
+def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV2RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index f3736f669db41..02186d23e31e5 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -40,24 +40,4 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
; CHECK-NEXT: ret
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %c
-}
-
-;; xvshuf4i.d
-define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
-; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
-; CHECK-NEXT: ret
- %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
- ret <4 x i64> %c
-}
-
-;; xvshuf4i.d
-define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
-; CHECK-NEXT: ret
- %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
- ret <4 x double> %c
-}
+}
\ No newline at end of file
|
| ; CHECK-NEXT: ret | ||
| %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6> | ||
| ret <4 x i64> %c | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see, that was definitely incorrect.
The element ordering in XVSHUF4I.D doesn't actually align with shufflevector semantics. The correct mapping after transformation is:
vec : {vj[3], vj[2], vd[3], vd[2], vj[1], vj[0], vd[1], vd[0]}
mask: 7 6 3 2 5 4 1 0
It seems possible to lower the following case using XVSHUF4I.D:
define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
; CHECK-NEXT: ret
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
ret <4 x i64> %c
}
PR #137918 introduces a wrong lowering for v4f64/v4i64 to generate xvshuf4i.d instruction.
This PR reverts the wrong part of lasx.