Skip to content

Commit be1adea

Browse files
committed
Fix broadcast load with extension.
1 parent af41d0d commit be1adea

File tree

3 files changed

+56
-2
lines changed

3 files changed

+56
-2
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,8 +2519,10 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
25192519
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
25202520
return SDValue();
25212521

2522-
if (IsIdeneity) {
2523-
auto *LN = cast<LoadSDNode>(IdentitySrc);
2522+
auto *LN = cast<LoadSDNode>(IdentitySrc);
2523+
auto ExtType = LN->getExtensionType();
2524+
2525+
if (IsIdeneity && (ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD)) {
25242526
SDVTList Tys =
25252527
LN->isIndexed()
25262528
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)

llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
3333
ret <4 x i64> %tmp2
3434
}
3535

36+
define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
37+
; CHECK-LABEL: should_not_be_optimized_sext_load:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: ld.b $a0, $a0, 0
40+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
41+
; CHECK-NEXT: ret
42+
%tmp = load i8, ptr %ptr
43+
%tmp1 = sext i8 %tmp to i16
44+
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
45+
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
46+
ret <16 x i16> %tmp3
47+
}
48+
49+
define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
50+
; CHECK-LABEL: should_not_be_optimized_zext_load:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: ld.bu $a0, $a0, 0
53+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
54+
; CHECK-NEXT: ret
55+
%tmp = load i8, ptr %ptr
56+
%tmp1 = zext i8 %tmp to i16
57+
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
58+
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
59+
ret <16 x i16> %tmp3
60+
}
61+
3662
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
3763
; LA32-LABEL: xvldrepl_d_unaligned_offset:
3864
; LA32: # %bb.0:

llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
3131
ret <2 x i64> %tmp2
3232
}
3333

34+
define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
35+
; CHECK-LABEL: should_not_be_optimized_sext_load:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: ld.b $a0, $a0, 0
38+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
39+
; CHECK-NEXT: ret
40+
%tmp = load i8, ptr %ptr
41+
%tmp1 = sext i8 %tmp to i16
42+
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
43+
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
44+
ret <8 x i16> %tmp3
45+
}
46+
47+
define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
48+
; CHECK-LABEL: should_not_be_optimized_zext_load:
49+
; CHECK: # %bb.0:
50+
; CHECK-NEXT: ld.bu $a0, $a0, 0
51+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
52+
; CHECK-NEXT: ret
53+
%tmp = load i8, ptr %ptr
54+
%tmp1 = zext i8 %tmp to i16
55+
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
56+
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
57+
ret <8 x i16> %tmp3
58+
}
59+
3460
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
3561
; LA32-LABEL: vldrepl_d_unaligned_offset:
3662
; LA32: # %bb.0:

0 commit comments

Comments
 (0)