Skip to content

Commit 563d6d0

Browse files
preamesmahesh-attarde
authored andcommitted
[RISCV] Allow non-canonicalized splats in isProfitableToSinkOperands (llvm#161586)
This isn't an optimization change - IR transforms should have remove the operands and replaced them with poison. However, I noticed the non-canonical splat structure in a couple of llvm-reduce outputs. This results in us creating extremely atypical IR which is quite misleading about the true cause of what's going on. (Because the non-canonical splat doesn't get sunk, we then prone whatever was actually holding it outside the loop in the original example, eliminating insight as to the true cause of whatever issue we're debugging.)
1 parent 2444561 commit 563d6d0

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3139,8 +3139,8 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
31393139
bool IsVPSplat = match(Op, m_Intrinsic<Intrinsic::experimental_vp_splat>(
31403140
m_Value(), m_Value(), m_Value()));
31413141
if (!IsVPSplat &&
3142-
!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
3143-
m_Undef(), m_ZeroMask())))
3142+
!match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),
3143+
m_Value(), m_ZeroMask())))
31443144
continue;
31453145

31463146
// Don't sink i1 splats.

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6018,3 +6018,39 @@ vector.latch: ; preds = %for.body419
60186018
for.cond.cleanup: ; preds = %vector.latch
60196019
ret void
60206020
}
6021+
6022+
;; This is exactly like sink_add_splat except that the splat has operands
6023+
;; which haven't been converted to undef.
6024+
define void @sink_non_canonical_splat(ptr nocapture %a, i32 signext %x) {
6025+
; CHECK-LABEL: sink_non_canonical_splat:
6026+
; CHECK: # %bb.0: # %entry
6027+
; CHECK-NEXT: lui a2, 1
6028+
; CHECK-NEXT: add a2, a0, a2
6029+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6030+
; CHECK-NEXT: .LBB131_1: # %vector.body
6031+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
6032+
; CHECK-NEXT: vle32.v v8, (a0)
6033+
; CHECK-NEXT: vadd.vx v8, v8, a1
6034+
; CHECK-NEXT: vse32.v v8, (a0)
6035+
; CHECK-NEXT: addi a0, a0, 16
6036+
; CHECK-NEXT: bne a0, a2, .LBB131_1
6037+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
6038+
; CHECK-NEXT: ret
6039+
entry:
6040+
%broadcast.splatinsert = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
6041+
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
6042+
br label %vector.body
6043+
6044+
vector.body: ; preds = %vector.body, %entry
6045+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
6046+
%0 = getelementptr inbounds i32, ptr %a, i64 %index
6047+
%wide.load = load <4 x i32>, ptr %0, align 4
6048+
%1 = add <4 x i32> %wide.load, %broadcast.splat
6049+
store <4 x i32> %1, ptr %0, align 4
6050+
%index.next = add nuw i64 %index, 4
6051+
%2 = icmp eq i64 %index.next, 1024
6052+
br i1 %2, label %for.cond.cleanup, label %vector.body
6053+
6054+
for.cond.cleanup: ; preds = %vector.body
6055+
ret void
6056+
}

0 commit comments

Comments
 (0)