Skip to content

Conversation

@tangaac
Copy link
Member

@tangaac tangaac commented Aug 29, 2025

PR #135896 introduces [x]vldrepl instructions without handling extension.
This patch will fix that.

@llvmbot
Copy link
Member

llvmbot commented Aug 29, 2025

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

Changes

PR #135896 introduces [x]vldrepl instructions without handling extension.
This patch will fix that.


Full diff: https://github.com/llvm/llvm-project/pull/155960.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+2-1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll (+38-6)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll (+39-6)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ffb6c2980026f..478c335c3f07e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2471,8 +2471,9 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
   if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
     return SDValue();
 
-  if (IsIdeneity) {
+  if (IsIdeneity && ISD::isNON_EXTLoad(IdentitySrc.getNode())) {
     auto *LN = cast<LoadSDNode>(IdentitySrc);
+
     SDVTList Tys =
         LN->isIndexed()
             ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 976924bdca686..89592a0886cc1 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
   ret <4 x i64> %tmp2
 }
 
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
 define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
@@ -34,7 +60,8 @@ define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 define <32 x i8> @xvldrepl_b(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.b $xr0, $a0, 0
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i8, ptr %ptr
   %tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <32 x i8> @xvldrepl_b(ptr %ptr) {
 define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_b_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.b $xr0, $a0, 33
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i8, ptr %ptr, i64 33
   %tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
 define <16 x i16> @xvldrepl_h(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_h:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.h $xr0, $a0, 0
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i16, ptr %ptr
   %tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <16 x i16> @xvldrepl_h(ptr %ptr) {
 define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_h_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.h $xr0, $a0, 66
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i16, ptr %ptr, i64 33
   %tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
 define <8 x i32> @xvldrepl_w(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_w:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.w $xr0, $a0, 0
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i32, ptr %ptr
   %tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <8 x i32> @xvldrepl_w(ptr %ptr) {
 define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_w_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvldrepl.w $xr0, $a0, 132
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i32, ptr %ptr, i64 33
   %tmp = load i32, ptr %p
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef30509..a8cddbf9e6400 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
   ret <2 x i64> %tmp2
 }
 
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
 define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
@@ -34,7 +60,8 @@ define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 define <16 x i8> @vldrepl_b(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.b $vr0, $a0, 0
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i8, ptr %ptr
   %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <16 x i8> @vldrepl_b(ptr %ptr) {
 define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_b_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.b $vr0, $a0, 33
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i8, ptr %ptr, i64 33
   %tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
 define <8 x i16> @vldrepl_h(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_h:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.h $vr0, $a0, 0
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i16, ptr %ptr
   %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <8 x i16> @vldrepl_h(ptr %ptr) {
 define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_h_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.h $vr0, $a0, 66
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i16, ptr %ptr, i64 33
   %tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
 define <4 x i32> @vldrepl_w(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_w:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.w $vr0, $a0, 0
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    ret
   %tmp = load i32, ptr %ptr
   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <4 x i32> @vldrepl_w(ptr %ptr) {
 define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_w_offset:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vldrepl.w $vr0, $a0, 132
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    ret
   %p = getelementptr i32, ptr %ptr, i64 33
   %tmp = load i32, ptr %p
@@ -169,3 +201,4 @@ define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
   %tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
   ret <2 x double> %tmp2
 }
+

@zhaoqi5
Copy link
Contributor

zhaoqi5 commented Aug 29, 2025

The TODO at the top of the test files may can be deleted.

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks!

@github-project-automation github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Sep 1, 2025
@tangaac tangaac merged commit 2320529 into llvm:main Sep 1, 2025
9 checks passed
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Sep 1, 2025
@tangaac
Copy link
Member Author

tangaac commented Sep 2, 2025

/cherry-pick 2320529

@llvmbot
Copy link
Member

llvmbot commented Sep 2, 2025

/pull-request #156384

tru pushed a commit to llvmbot/llvm-project that referenced this pull request Sep 3, 2025
PR llvm#135896 introduces [x]vldrepl instructions without handling
extension.
This patch will fix that.

(cherry picked from commit 2320529)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

Development

Successfully merging this pull request may close these issues.

5 participants