Skip to content

Commit a611d67

Browse files
authored
[RISCV][TTI] Add llvm.fmuladd and llvm.vp.fmuladd into canSplatOperand (#119508)
The first or second operand of fmuladd is a splat operand , it can help fmuladd fold vv instructions to vf instructions.
1 parent 5e247d7 commit a611d67

File tree

2 files changed

+162
-0
lines changed

2 files changed

+162
-0
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2446,6 +2446,8 @@ bool RISCVTTIImpl::canSplatOperand(Instruction *I, int Operand) const {
24462446
switch (II->getIntrinsicID()) {
24472447
case Intrinsic::fma:
24482448
case Intrinsic::vp_fma:
2449+
case Intrinsic::fmuladd:
2450+
case Intrinsic::vp_fmuladd:
24492451
return Operand == 0 || Operand == 1;
24502452
case Intrinsic::vp_shl:
24512453
case Intrinsic::vp_lshr:

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5581,3 +5581,163 @@ vector.body:
55815581
for.cond.cleanup:
55825582
ret void
55835583
}
5584+
5585+
define void @sink_splat_fmuladd(ptr %a, ptr %b, float %x) {
5586+
; CHECK-LABEL: sink_splat_fmuladd:
5587+
; CHECK: # %bb.0: # %entry
5588+
; CHECK-NEXT: lui a2, 1
5589+
; CHECK-NEXT: add a2, a1, a2
5590+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5591+
; CHECK-NEXT: .LBB121_1: # %vector.body
5592+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5593+
; CHECK-NEXT: vle32.v v8, (a0)
5594+
; CHECK-NEXT: vle32.v v9, (a1)
5595+
; CHECK-NEXT: addi a1, a1, 16
5596+
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
5597+
; CHECK-NEXT: vse32.v v9, (a0)
5598+
; CHECK-NEXT: addi a0, a0, 16
5599+
; CHECK-NEXT: bne a1, a2, .LBB121_1
5600+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5601+
; CHECK-NEXT: ret
5602+
entry:
5603+
%broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5604+
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5605+
br label %vector.body
5606+
5607+
vector.body:
5608+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5609+
%0 = getelementptr inbounds float, ptr %a, i64 %index
5610+
%wide.load = load <4 x float>, ptr %0, align 4
5611+
%1 = getelementptr inbounds float, ptr %b, i64 %index
5612+
%wide.load12 = load <4 x float>, ptr %1, align 4
5613+
%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12)
5614+
store <4 x float> %2, ptr %0, align 4
5615+
%index.next = add nuw i64 %index, 4
5616+
%3 = icmp eq i64 %index.next, 1024
5617+
br i1 %3, label %for.cond.cleanup, label %vector.body
5618+
5619+
for.cond.cleanup:
5620+
ret void
5621+
}
5622+
5623+
define void @sink_splat_fmuladd_commute(ptr %a, ptr %b, float %x) {
5624+
; CHECK-LABEL: sink_splat_fmuladd_commute:
5625+
; CHECK: # %bb.0: # %entry
5626+
; CHECK-NEXT: lui a2, 1
5627+
; CHECK-NEXT: add a2, a1, a2
5628+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5629+
; CHECK-NEXT: .LBB122_1: # %vector.body
5630+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5631+
; CHECK-NEXT: vle32.v v8, (a0)
5632+
; CHECK-NEXT: vle32.v v9, (a1)
5633+
; CHECK-NEXT: addi a1, a1, 16
5634+
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
5635+
; CHECK-NEXT: vse32.v v9, (a0)
5636+
; CHECK-NEXT: addi a0, a0, 16
5637+
; CHECK-NEXT: bne a1, a2, .LBB122_1
5638+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5639+
; CHECK-NEXT: ret
5640+
entry:
5641+
%broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5642+
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5643+
br label %vector.body
5644+
5645+
vector.body:
5646+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5647+
%0 = getelementptr inbounds float, ptr %a, i64 %index
5648+
%wide.load = load <4 x float>, ptr %0, align 4
5649+
%1 = getelementptr inbounds float, ptr %b, i64 %index
5650+
%wide.load12 = load <4 x float>, ptr %1, align 4
5651+
%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
5652+
store <4 x float> %2, ptr %0, align 4
5653+
%index.next = add nuw i64 %index, 4
5654+
%3 = icmp eq i64 %index.next, 1024
5655+
br i1 %3, label %for.cond.cleanup, label %vector.body
5656+
5657+
for.cond.cleanup:
5658+
ret void
5659+
}
5660+
5661+
define void @sink_splat_vp_fmuladd(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) {
5662+
; CHECK-LABEL: sink_splat_vp_fmuladd:
5663+
; CHECK: # %bb.0: # %entry
5664+
; CHECK-NEXT: lui a3, 1
5665+
; CHECK-NEXT: slli a4, a2, 32
5666+
; CHECK-NEXT: add a2, a1, a3
5667+
; CHECK-NEXT: srli a3, a4, 32
5668+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5669+
; CHECK-NEXT: .LBB123_1: # %vector.body
5670+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5671+
; CHECK-NEXT: vle32.v v8, (a0)
5672+
; CHECK-NEXT: vle32.v v9, (a1)
5673+
; CHECK-NEXT: addi a1, a1, 16
5674+
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
5675+
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
5676+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5677+
; CHECK-NEXT: vse32.v v8, (a0)
5678+
; CHECK-NEXT: addi a0, a0, 16
5679+
; CHECK-NEXT: bne a1, a2, .LBB123_1
5680+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5681+
; CHECK-NEXT: ret
5682+
entry:
5683+
%broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5684+
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5685+
br label %vector.body
5686+
5687+
vector.body:
5688+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5689+
%0 = getelementptr inbounds float, ptr %a, i64 %index
5690+
%wide.load = load <4 x float>, ptr %0, align 4
5691+
%1 = getelementptr inbounds float, ptr %b, i64 %index
5692+
%wide.load12 = load <4 x float>, ptr %1, align 4
5693+
%2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
5694+
store <4 x float> %2, ptr %0, align 4
5695+
%index.next = add nuw i64 %index, 4
5696+
%3 = icmp eq i64 %index.next, 1024
5697+
br i1 %3, label %for.cond.cleanup, label %vector.body
5698+
5699+
for.cond.cleanup:
5700+
ret void
5701+
}
5702+
5703+
define void @sink_splat_vp_fmuladd_commute(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) {
5704+
; CHECK-LABEL: sink_splat_vp_fmuladd_commute:
5705+
; CHECK: # %bb.0: # %entry
5706+
; CHECK-NEXT: lui a3, 1
5707+
; CHECK-NEXT: slli a4, a2, 32
5708+
; CHECK-NEXT: add a2, a1, a3
5709+
; CHECK-NEXT: srli a3, a4, 32
5710+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5711+
; CHECK-NEXT: .LBB124_1: # %vector.body
5712+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5713+
; CHECK-NEXT: vle32.v v8, (a0)
5714+
; CHECK-NEXT: vle32.v v9, (a1)
5715+
; CHECK-NEXT: addi a1, a1, 16
5716+
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
5717+
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
5718+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5719+
; CHECK-NEXT: vse32.v v8, (a0)
5720+
; CHECK-NEXT: addi a0, a0, 16
5721+
; CHECK-NEXT: bne a1, a2, .LBB124_1
5722+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5723+
; CHECK-NEXT: ret
5724+
entry:
5725+
%broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5726+
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5727+
br label %vector.body
5728+
5729+
vector.body:
5730+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5731+
%0 = getelementptr inbounds float, ptr %a, i64 %index
5732+
%wide.load = load <4 x float>, ptr %0, align 4
5733+
%1 = getelementptr inbounds float, ptr %b, i64 %index
5734+
%wide.load12 = load <4 x float>, ptr %1, align 4
5735+
%2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
5736+
store <4 x float> %2, ptr %0, align 4
5737+
%index.next = add nuw i64 %index, 4
5738+
%3 = icmp eq i64 %index.next, 1024
5739+
br i1 %3, label %for.cond.cleanup, label %vector.body
5740+
5741+
for.cond.cleanup:
5742+
ret void
5743+
}

0 commit comments

Comments
 (0)