Skip to content

Commit 9560864

Browse files
tangaactru
authored andcommitted
[LoongArch] Fix broadcast load with extension. (#155960)
PR #135896 introduces [x]vldrepl instructions without handling extension. This patch will fix that. (cherry picked from commit 2320529)
1 parent 0d67367 commit 9560864

File tree

3 files changed

+58
-5
lines changed

3 files changed

+58
-5
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,11 +2430,14 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
24302430
}
24312431

24322432
// make sure that this load is valid and only has one user.
2433-
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2433+
if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
24342434
return SDValue();
24352435

2436-
if (IsIdeneity) {
2437-
auto *LN = cast<LoadSDNode>(IdentitySrc);
2436+
auto *LN = cast<LoadSDNode>(IdentitySrc);
2437+
auto ExtType = LN->getExtensionType();
2438+
2439+
if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2440+
VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
24382441
SDVTList Tys =
24392442
LN->isIndexed()
24402443
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)

llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
1818
ret <4 x i64> %tmp2
1919
}
2020

21+
define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
22+
; CHECK-LABEL: should_not_be_optimized_sext_load:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: ld.b $a0, $a0, 0
25+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
26+
; CHECK-NEXT: ret
27+
%tmp = load i8, ptr %ptr
28+
%tmp1 = sext i8 %tmp to i16
29+
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
30+
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
31+
ret <16 x i16> %tmp3
32+
}
33+
34+
define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
35+
; CHECK-LABEL: should_not_be_optimized_zext_load:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: ld.bu $a0, $a0, 0
38+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
39+
; CHECK-NEXT: ret
40+
%tmp = load i8, ptr %ptr
41+
%tmp1 = zext i8 %tmp to i16
42+
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
43+
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
44+
ret <16 x i16> %tmp3
45+
}
46+
2147
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
2248
; CHECK-LABEL: xvldrepl_d_unaligned_offset:
2349
; CHECK: # %bb.0:

llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
33

4-
; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
5-
64
; A load has more than one user shouldn't be lowered to vldrepl
75
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
86
; CHECK-LABEL: should_not_be_optimized:
@@ -18,6 +16,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
1816
ret <2 x i64> %tmp2
1917
}
2018

19+
define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
20+
; CHECK-LABEL: should_not_be_optimized_sext_load:
21+
; CHECK: # %bb.0:
22+
; CHECK-NEXT: ld.b $a0, $a0, 0
23+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
24+
; CHECK-NEXT: ret
25+
%tmp = load i8, ptr %ptr
26+
%tmp1 = sext i8 %tmp to i16
27+
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
28+
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
29+
ret <8 x i16> %tmp3
30+
}
31+
32+
define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
33+
; CHECK-LABEL: should_not_be_optimized_zext_load:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: ld.bu $a0, $a0, 0
36+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
37+
; CHECK-NEXT: ret
38+
%tmp = load i8, ptr %ptr
39+
%tmp1 = zext i8 %tmp to i16
40+
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
41+
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
42+
ret <8 x i16> %tmp3
43+
}
44+
2145
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
2246
; CHECK-LABEL: vldrepl_d_unaligned_offset:
2347
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)