@@ -5581,3 +5581,163 @@ vector.body:
5581
5581
for.cond.cleanup:
5582
5582
ret void
5583
5583
}
5584
+
5585
+ define void @sink_splat_fmuladd (ptr %a , ptr %b , float %x ) {
5586
+ ; CHECK-LABEL: sink_splat_fmuladd:
5587
+ ; CHECK: # %bb.0: # %entry
5588
+ ; CHECK-NEXT: lui a2, 1
5589
+ ; CHECK-NEXT: add a2, a1, a2
5590
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5591
+ ; CHECK-NEXT: .LBB121_1: # %vector.body
5592
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5593
+ ; CHECK-NEXT: vle32.v v8, (a0)
5594
+ ; CHECK-NEXT: vle32.v v9, (a1)
5595
+ ; CHECK-NEXT: addi a1, a1, 16
5596
+ ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
5597
+ ; CHECK-NEXT: vse32.v v9, (a0)
5598
+ ; CHECK-NEXT: addi a0, a0, 16
5599
+ ; CHECK-NEXT: bne a1, a2, .LBB121_1
5600
+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5601
+ ; CHECK-NEXT: ret
5602
+ entry:
5603
+ %broadcast.splatinsert = insertelement <4 x float > poison, float %x , i32 0
5604
+ %broadcast.splat = shufflevector <4 x float > %broadcast.splatinsert , <4 x float > poison, <4 x i32 > zeroinitializer
5605
+ br label %vector.body
5606
+
5607
+ vector.body:
5608
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5609
+ %0 = getelementptr inbounds float , ptr %a , i64 %index
5610
+ %wide.load = load <4 x float >, ptr %0 , align 4
5611
+ %1 = getelementptr inbounds float , ptr %b , i64 %index
5612
+ %wide.load12 = load <4 x float >, ptr %1 , align 4
5613
+ %2 = call <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %wide.load , <4 x float > %broadcast.splat , <4 x float > %wide.load12 )
5614
+ store <4 x float > %2 , ptr %0 , align 4
5615
+ %index.next = add nuw i64 %index , 4
5616
+ %3 = icmp eq i64 %index.next , 1024
5617
+ br i1 %3 , label %for.cond.cleanup , label %vector.body
5618
+
5619
+ for.cond.cleanup:
5620
+ ret void
5621
+ }
5622
+
5623
+ define void @sink_splat_fmuladd_commute (ptr %a , ptr %b , float %x ) {
5624
+ ; CHECK-LABEL: sink_splat_fmuladd_commute:
5625
+ ; CHECK: # %bb.0: # %entry
5626
+ ; CHECK-NEXT: lui a2, 1
5627
+ ; CHECK-NEXT: add a2, a1, a2
5628
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5629
+ ; CHECK-NEXT: .LBB122_1: # %vector.body
5630
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5631
+ ; CHECK-NEXT: vle32.v v8, (a0)
5632
+ ; CHECK-NEXT: vle32.v v9, (a1)
5633
+ ; CHECK-NEXT: addi a1, a1, 16
5634
+ ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
5635
+ ; CHECK-NEXT: vse32.v v9, (a0)
5636
+ ; CHECK-NEXT: addi a0, a0, 16
5637
+ ; CHECK-NEXT: bne a1, a2, .LBB122_1
5638
+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5639
+ ; CHECK-NEXT: ret
5640
+ entry:
5641
+ %broadcast.splatinsert = insertelement <4 x float > poison, float %x , i32 0
5642
+ %broadcast.splat = shufflevector <4 x float > %broadcast.splatinsert , <4 x float > poison, <4 x i32 > zeroinitializer
5643
+ br label %vector.body
5644
+
5645
+ vector.body:
5646
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5647
+ %0 = getelementptr inbounds float , ptr %a , i64 %index
5648
+ %wide.load = load <4 x float >, ptr %0 , align 4
5649
+ %1 = getelementptr inbounds float , ptr %b , i64 %index
5650
+ %wide.load12 = load <4 x float >, ptr %1 , align 4
5651
+ %2 = call <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %broadcast.splat , <4 x float > %wide.load , <4 x float > %wide.load12 )
5652
+ store <4 x float > %2 , ptr %0 , align 4
5653
+ %index.next = add nuw i64 %index , 4
5654
+ %3 = icmp eq i64 %index.next , 1024
5655
+ br i1 %3 , label %for.cond.cleanup , label %vector.body
5656
+
5657
+ for.cond.cleanup:
5658
+ ret void
5659
+ }
5660
+
5661
+ define void @sink_splat_vp_fmuladd (ptr %a , ptr %b , float %x , <4 x i1 > %m , i32 %vl ) {
5662
+ ; CHECK-LABEL: sink_splat_vp_fmuladd:
5663
+ ; CHECK: # %bb.0: # %entry
5664
+ ; CHECK-NEXT: lui a3, 1
5665
+ ; CHECK-NEXT: slli a4, a2, 32
5666
+ ; CHECK-NEXT: add a2, a1, a3
5667
+ ; CHECK-NEXT: srli a3, a4, 32
5668
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5669
+ ; CHECK-NEXT: .LBB123_1: # %vector.body
5670
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5671
+ ; CHECK-NEXT: vle32.v v8, (a0)
5672
+ ; CHECK-NEXT: vle32.v v9, (a1)
5673
+ ; CHECK-NEXT: addi a1, a1, 16
5674
+ ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
5675
+ ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
5676
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5677
+ ; CHECK-NEXT: vse32.v v8, (a0)
5678
+ ; CHECK-NEXT: addi a0, a0, 16
5679
+ ; CHECK-NEXT: bne a1, a2, .LBB123_1
5680
+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5681
+ ; CHECK-NEXT: ret
5682
+ entry:
5683
+ %broadcast.splatinsert = insertelement <4 x float > poison, float %x , i32 0
5684
+ %broadcast.splat = shufflevector <4 x float > %broadcast.splatinsert , <4 x float > poison, <4 x i32 > zeroinitializer
5685
+ br label %vector.body
5686
+
5687
+ vector.body:
5688
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5689
+ %0 = getelementptr inbounds float , ptr %a , i64 %index
5690
+ %wide.load = load <4 x float >, ptr %0 , align 4
5691
+ %1 = getelementptr inbounds float , ptr %b , i64 %index
5692
+ %wide.load12 = load <4 x float >, ptr %1 , align 4
5693
+ %2 = call <4 x float > @llvm.vp.fmuladd.v4f32 (<4 x float > %wide.load , <4 x float > %broadcast.splat , <4 x float > %wide.load12 , <4 x i1 > %m , i32 %vl )
5694
+ store <4 x float > %2 , ptr %0 , align 4
5695
+ %index.next = add nuw i64 %index , 4
5696
+ %3 = icmp eq i64 %index.next , 1024
5697
+ br i1 %3 , label %for.cond.cleanup , label %vector.body
5698
+
5699
+ for.cond.cleanup:
5700
+ ret void
5701
+ }
5702
+
5703
+ define void @sink_splat_vp_fmuladd_commute (ptr %a , ptr %b , float %x , <4 x i1 > %m , i32 %vl ) {
5704
+ ; CHECK-LABEL: sink_splat_vp_fmuladd_commute:
5705
+ ; CHECK: # %bb.0: # %entry
5706
+ ; CHECK-NEXT: lui a3, 1
5707
+ ; CHECK-NEXT: slli a4, a2, 32
5708
+ ; CHECK-NEXT: add a2, a1, a3
5709
+ ; CHECK-NEXT: srli a3, a4, 32
5710
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5711
+ ; CHECK-NEXT: .LBB124_1: # %vector.body
5712
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5713
+ ; CHECK-NEXT: vle32.v v8, (a0)
5714
+ ; CHECK-NEXT: vle32.v v9, (a1)
5715
+ ; CHECK-NEXT: addi a1, a1, 16
5716
+ ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
5717
+ ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
5718
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5719
+ ; CHECK-NEXT: vse32.v v8, (a0)
5720
+ ; CHECK-NEXT: addi a0, a0, 16
5721
+ ; CHECK-NEXT: bne a1, a2, .LBB124_1
5722
+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5723
+ ; CHECK-NEXT: ret
5724
+ entry:
5725
+ %broadcast.splatinsert = insertelement <4 x float > poison, float %x , i32 0
5726
+ %broadcast.splat = shufflevector <4 x float > %broadcast.splatinsert , <4 x float > poison, <4 x i32 > zeroinitializer
5727
+ br label %vector.body
5728
+
5729
+ vector.body:
5730
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5731
+ %0 = getelementptr inbounds float , ptr %a , i64 %index
5732
+ %wide.load = load <4 x float >, ptr %0 , align 4
5733
+ %1 = getelementptr inbounds float , ptr %b , i64 %index
5734
+ %wide.load12 = load <4 x float >, ptr %1 , align 4
5735
+ %2 = call <4 x float > @llvm.vp.fmuladd.v4f32 (<4 x float > %broadcast.splat , <4 x float > %wide.load , <4 x float > %wide.load12 , <4 x i1 > %m , i32 %vl )
5736
+ store <4 x float > %2 , ptr %0 , align 4
5737
+ %index.next = add nuw i64 %index , 4
5738
+ %3 = icmp eq i64 %index.next , 1024
5739
+ br i1 %3 , label %for.cond.cleanup , label %vector.body
5740
+
5741
+ for.cond.cleanup:
5742
+ ret void
5743
+ }
0 commit comments