-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoongArch] Fix broadcast load with extension. #155960
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) ChangesPR #135896 introduces [x]vldrepl instructions without handling extension. Full diff: https://github.com/llvm/llvm-project/pull/155960.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ffb6c2980026f..478c335c3f07e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2471,8 +2471,9 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
return SDValue();
- if (IsIdeneity) {
+ if (IsIdeneity && ISD::isNON_EXTLoad(IdentitySrc.getNode())) {
auto *LN = cast<LoadSDNode>(IdentitySrc);
+
SDVTList Tys =
LN->isIndexed()
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 976924bdca686..89592a0886cc1 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
ret <4 x i64> %tmp2
}
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
; CHECK-LABEL: xvldrepl_d_unaligned_offset:
; CHECK: # %bb.0:
@@ -34,7 +60,8 @@ define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
define <32 x i8> @xvldrepl_b(ptr %ptr) {
; CHECK-LABEL: xvldrepl_b:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvldrepl.b $xr0, $a0, 0
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <32 x i8> @xvldrepl_b(ptr %ptr) {
define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
; CHECK-LABEL: xvldrepl_b_offset:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvldrepl.b $xr0, $a0, 33
+; CHECK-NEXT: ld.b $a0, $a0, 33
+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
; CHECK-NEXT: ret
%p = getelementptr i8, ptr %ptr, i64 33
%tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
define <16 x i16> @xvldrepl_h(ptr %ptr) {
; CHECK-LABEL: xvldrepl_h:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvldrepl.h $xr0, $a0, 0
+; CHECK-NEXT: ld.h $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
; CHECK-NEXT: ret
%tmp = load i16, ptr %ptr
%tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <16 x i16> @xvldrepl_h(ptr %ptr) {
define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
; CHECK-LABEL: xvldrepl_h_offset:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvldrepl.h $xr0, $a0, 66
+; CHECK-NEXT: ld.h $a0, $a0, 66
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
; CHECK-NEXT: ret
%p = getelementptr i16, ptr %ptr, i64 33
%tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
define <8 x i32> @xvldrepl_w(ptr %ptr) {
; CHECK-LABEL: xvldrepl_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvldrepl.w $xr0, $a0, 0
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
; CHECK-NEXT: ret
%tmp = load i32, ptr %ptr
%tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <8 x i32> @xvldrepl_w(ptr %ptr) {
define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
; CHECK-LABEL: xvldrepl_w_offset:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvldrepl.w $xr0, $a0, 132
+; CHECK-NEXT: ld.w $a0, $a0, 132
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
; CHECK-NEXT: ret
%p = getelementptr i32, ptr %ptr, i64 33
%tmp = load i32, ptr %p
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef30509..a8cddbf9e6400 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
ret <2 x i64> %tmp2
}
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_d_unaligned_offset:
; CHECK: # %bb.0:
@@ -34,7 +60,8 @@ define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
define <16 x i8> @vldrepl_b(ptr %ptr) {
; CHECK-LABEL: vldrepl_b:
; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.b $vr0, $a0, 0
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -45,7 +72,8 @@ define <16 x i8> @vldrepl_b(ptr %ptr) {
define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_b_offset:
; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.b $vr0, $a0, 33
+; CHECK-NEXT: ld.b $a0, $a0, 33
+; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
; CHECK-NEXT: ret
%p = getelementptr i8, ptr %ptr, i64 33
%tmp = load i8, ptr %p
@@ -58,7 +86,8 @@ define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
define <8 x i16> @vldrepl_h(ptr %ptr) {
; CHECK-LABEL: vldrepl_h:
; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.h $vr0, $a0, 0
+; CHECK-NEXT: ld.h $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: ret
%tmp = load i16, ptr %ptr
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -69,7 +98,8 @@ define <8 x i16> @vldrepl_h(ptr %ptr) {
define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_h_offset:
; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.h $vr0, $a0, 66
+; CHECK-NEXT: ld.h $a0, $a0, 66
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: ret
%p = getelementptr i16, ptr %ptr, i64 33
%tmp = load i16, ptr %p
@@ -81,7 +111,8 @@ define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
define <4 x i32> @vldrepl_w(ptr %ptr) {
; CHECK-LABEL: vldrepl_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.w $vr0, $a0, 0
+; CHECK-NEXT: ld.w $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
; CHECK-NEXT: ret
%tmp = load i32, ptr %ptr
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -92,7 +123,8 @@ define <4 x i32> @vldrepl_w(ptr %ptr) {
define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_w_offset:
; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.w $vr0, $a0, 132
+; CHECK-NEXT: ld.w $a0, $a0, 132
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
; CHECK-NEXT: ret
%p = getelementptr i32, ptr %ptr, i64 33
%tmp = load i32, ptr %p
@@ -169,3 +201,4 @@ define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
%tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %tmp2
}
+
|
|
The |
heiher
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
|
/cherry-pick 2320529 |
|
/pull-request #156384 |
PR llvm#135896 introduces [x]vldrepl instructions without handling extension. This patch will fix that. (cherry picked from commit 2320529)
PR #135896 introduces [x]vldrepl instructions without handling extension.
This patch will fix that.