Skip to content

Commit 2320529

Browse files
authored
[LoongArch] Fix broadcast load with extension. (#155960)
PR #135896 introduces [x]vldrepl instructions without handling extension. This patch will fix that.
1 parent 32beea0 commit 2320529

File tree

3 files changed

+58
-5
lines changed

3 files changed

+58
-5
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2516,11 +2516,14 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
25162516
}
25172517

25182518
// make sure that this load is valid and only has one user.
2519-
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2519+
if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
25202520
return SDValue();
25212521

2522-
if (IsIdeneity) {
2523-
auto *LN = cast<LoadSDNode>(IdentitySrc);
2522+
auto *LN = cast<LoadSDNode>(IdentitySrc);
2523+
auto ExtType = LN->getExtensionType();
2524+
2525+
if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2526+
VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
25242527
SDVTList Tys =
25252528
LN->isIndexed()
25262529
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)

llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
3333
ret <4 x i64> %tmp2
3434
}
3535

36+
define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
37+
; CHECK-LABEL: should_not_be_optimized_sext_load:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: ld.b $a0, $a0, 0
40+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
41+
; CHECK-NEXT: ret
42+
%tmp = load i8, ptr %ptr
43+
%tmp1 = sext i8 %tmp to i16
44+
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
45+
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
46+
ret <16 x i16> %tmp3
47+
}
48+
49+
define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
50+
; CHECK-LABEL: should_not_be_optimized_zext_load:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: ld.bu $a0, $a0, 0
53+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
54+
; CHECK-NEXT: ret
55+
%tmp = load i8, ptr %ptr
56+
%tmp1 = zext i8 %tmp to i16
57+
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
58+
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
59+
ret <16 x i16> %tmp3
60+
}
61+
3662
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
3763
; LA32-LABEL: xvldrepl_d_unaligned_offset:
3864
; LA32: # %bb.0:

llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
33
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
44

5-
; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
6-
75
; A load has more than one user shouldn't be lowered to vldrepl
86
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
97
; LA32-LABEL: should_not_be_optimized:
@@ -31,6 +29,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
3129
ret <2 x i64> %tmp2
3230
}
3331

32+
define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
33+
; CHECK-LABEL: should_not_be_optimized_sext_load:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: ld.b $a0, $a0, 0
36+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
37+
; CHECK-NEXT: ret
38+
%tmp = load i8, ptr %ptr
39+
%tmp1 = sext i8 %tmp to i16
40+
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
41+
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
42+
ret <8 x i16> %tmp3
43+
}
44+
45+
define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
46+
; CHECK-LABEL: should_not_be_optimized_zext_load:
47+
; CHECK: # %bb.0:
48+
; CHECK-NEXT: ld.bu $a0, $a0, 0
49+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
50+
; CHECK-NEXT: ret
51+
%tmp = load i8, ptr %ptr
52+
%tmp1 = zext i8 %tmp to i16
53+
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
54+
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
55+
ret <8 x i16> %tmp3
56+
}
57+
3458
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
3559
; LA32-LABEL: vldrepl_d_unaligned_offset:
3660
; LA32: # %bb.0:

0 commit comments

Comments
 (0)