Skip to content

Conversation

@tangaac
Copy link
Member

@tangaac tangaac commented May 20, 2025

PR #137918 introduces a wrong lowering for v4f64/v4i64 to generate xvshuf4i.d instruction.
This PR reverts the wrong part of lasx.

@llvmbot
Copy link
Member

llvmbot commented May 20, 2025

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

Changes

PR #137918 introduces a wrong lowering for v4f64/v4i64 to generate xvshuf4i.d instruction.
This PR reverts the wrong part of lasx.


Full diff: https://github.com/llvm/llvm-project/pull/140688.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+7-8)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+2-3)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll (+1-21)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 258891ae488da..ac54979a760c3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1027,8 +1027,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
                                            SelectionDAG &DAG) {
 
   unsigned SubVecSize = 4;
-  if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
-      VT == MVT::v4i64) {
+  if (VT == MVT::v2f64 || VT == MVT::v2i64) {
     SubVecSize = 2;
   }
 
@@ -1068,9 +1067,8 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
     Imm |= M & 0x3;
   }
 
-  // Return vshuf4i.d and xvshuf4i.d
-  if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
-      VT == MVT::v4i64)
+  // Return vshuf4i.d
+  if (VT == MVT::v2f64 || VT == MVT::v2i64)
     return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
                        DAG.getConstant(Imm, DL, MVT::i64));
 
@@ -1458,6 +1456,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
                                             MVT VT, SDValue V1, SDValue V2,
                                             SelectionDAG &DAG) {
+  // When the size is less than or equal to 4, lower cost instructions may be
+  // used.
+  if (Mask.size() <= 4)
+    return SDValue();
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
 }
 
@@ -1839,9 +1841,6 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     return Result;
   if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
     return Result;
-  if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
-      (Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
-    return Result;
   if ((Result =
            lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
     return Result;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index ad5b49564f9cd..f2b22d0d64d6c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,7 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
                                      SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
 def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
                                         SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
-def SDT_LoongArchVShuf4i_D
+def SDT_LoongArchV2RUimm
     : SDTypeProfile<1, 3,
                     [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
                      SDTCisVT<3, i64>]>;
@@ -57,8 +57,7 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
 def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
 
 def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
-def loongarch_vshuf4i_d
-    : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
+def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV2RUimm>;
 def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
 def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
 
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index f3736f669db41..02186d23e31e5 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -40,24 +40,4 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
 ; CHECK-NEXT:    ret
     %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     ret <8 x float> %c
-}
-
-;; xvshuf4i.d
-define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
-; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9
-; CHECK-NEXT:    ret
-    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
-    ret <4 x i64> %c
-}
-
-;; xvshuf4i.d
-define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9
-; CHECK-NEXT:    ret
-    %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
-    ret <4 x double> %c
-}
+}
\ No newline at end of file

; CHECK-NEXT: ret
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
ret <4 x i64> %c
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, that was definitely incorrect.

The element ordering in XVSHUF4I.D doesn't actually align with shufflevector semantics. The correct mapping after transformation is:

vec : {vj[3], vj[2], vd[3], vd[2], vj[1], vj[0], vd[1], vd[0]}
mask:     7      6      3      2      5      4      1      0

It seems possible to lower the following case using XVSHUF4I.D:

define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {		
; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
; CHECK:       # %bb.0:		
; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9		
; CHECK-NEXT:    ret		
    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 4, i32 3, i32 6>		
    ret <4 x i64> %c		
}

@tangaac tangaac merged commit 3cf6565 into llvm:main May 21, 2025
9 of 11 checks passed
@tangaac tangaac deleted the xvshuf4i_d branch May 21, 2025 01:41
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants