Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2516,11 +2516,14 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
}

// make sure that this load is valid and only has one user.
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
return SDValue();

if (IsIdeneity) {
auto *LN = cast<LoadSDNode>(IdentitySrc);
auto *LN = cast<LoadSDNode>(IdentitySrc);
auto ExtType = LN->getExtensionType();

if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
SDVTList Tys =
LN->isIndexed()
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
ret <4 x i64> %tmp2
}

define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
; CHECK-LABEL: should_not_be_optimized_sext_load:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.b $a0, $a0, 0
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = sext i8 %tmp to i16
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
ret <16 x i16> %tmp3
}

define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
; CHECK-LABEL: should_not_be_optimized_zext_load:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.bu $a0, $a0, 0
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = zext i8 %tmp to i16
%tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
%tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
ret <16 x i16> %tmp3
}

define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
; LA32-LABEL: xvldrepl_d_unaligned_offset:
; LA32: # %bb.0:
Expand Down
28 changes: 26 additions & 2 deletions llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64

; TODO: Load a element and splat it to a vector could be lowerd to vldrepl

; A load has more than one user shouldn't be lowered to vldrepl
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
; LA32-LABEL: should_not_be_optimized:
Expand Down Expand Up @@ -31,6 +29,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
ret <2 x i64> %tmp2
}

define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
; CHECK-LABEL: should_not_be_optimized_sext_load:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.b $a0, $a0, 0
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = sext i8 %tmp to i16
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %tmp3
}

define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
; CHECK-LABEL: should_not_be_optimized_zext_load:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.bu $a0, $a0, 0
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = zext i8 %tmp to i16
%tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %tmp3
}

define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
; LA32-LABEL: vldrepl_d_unaligned_offset:
; LA32: # %bb.0:
Expand Down