@@ -5741,3 +5741,152 @@ vector.body:
57415741for.cond.cleanup:
57425742 ret void
57435743}
5744+
5745+ define void @sink_splat_vfwadd_vf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5746+ ; CHECK-LABEL: sink_splat_vfwadd_vf:
5747+ ; CHECK: # %bb.0: # %entry
5748+ ; CHECK-NEXT: li a1, 0
5749+ ; CHECK-NEXT: li a2, 1020
5750+ ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5751+ ; CHECK-NEXT: .LBB125_1: # %vector.body
5752+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5753+ ; CHECK-NEXT: vl1re32.v v8, (a0)
5754+ ; CHECK-NEXT: addi a1, a1, 4
5755+ ; CHECK-NEXT: addi a2, a2, -4
5756+ ; CHECK-NEXT: vfwadd.vf v10, v8, fa0
5757+ ; CHECK-NEXT: vs2r.v v10, (a0)
5758+ ; CHECK-NEXT: addi a0, a0, 16
5759+ ; CHECK-NEXT: j .LBB125_1
5760+ entry:
5761+ %f.ext = fpext float %f to double
5762+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5763+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5764+ br label %vector.body
5765+
5766+ vector.body: ; preds = %vector.body, %entry
5767+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5768+ %0 = getelementptr float , ptr %a , i64 %index
5769+ %wide.load = load <vscale x 2 x float >, ptr %0
5770+ %ext = fpext <vscale x 2 x float > %wide.load to <vscale x 2 x double >
5771+ %1 = fadd <vscale x 2 x double > %ext , %broadcast.splat
5772+ %2 = getelementptr double , ptr %b , i64 %index
5773+ store <vscale x 2 x double > %1 , ptr %0
5774+ %index.next = add i64 %index , 4
5775+ %3 = icmp eq i64 %index.next , 1024
5776+ br i1 32 , label %for.cond.cleanup , label %vector.body
5777+
5778+ for.cond.cleanup: ; preds = %vector.body
5779+ ret void
5780+ }
5781+
5782+ define void @sink_splat_vfwadd_wf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5783+ ; CHECK-LABEL: sink_splat_vfwadd_wf:
5784+ ; CHECK: # %bb.0: # %entry
5785+ ; CHECK-NEXT: li a1, 0
5786+ ; CHECK-NEXT: li a2, 1020
5787+ ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5788+ ; CHECK-NEXT: .LBB126_1: # %vector.body
5789+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5790+ ; CHECK-NEXT: vl2re64.v v8, (a0)
5791+ ; CHECK-NEXT: addi a1, a1, 4
5792+ ; CHECK-NEXT: addi a2, a2, -4
5793+ ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
5794+ ; CHECK-NEXT: vs2r.v v8, (a0)
5795+ ; CHECK-NEXT: addi a0, a0, 32
5796+ ; CHECK-NEXT: j .LBB126_1
5797+ entry:
5798+ %f.ext = fpext float %f to double
5799+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5800+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5801+ br label %vector.body
5802+
5803+ vector.body: ; preds = %vector.body, %entry
5804+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5805+ %0 = getelementptr double , ptr %a , i64 %index
5806+ %wide.load = load <vscale x 2 x double >, ptr %0
5807+ %1 = fadd <vscale x 2 x double > %wide.load , %broadcast.splat
5808+ %2 = getelementptr double , ptr %b , i64 %index
5809+ store <vscale x 2 x double > %1 , ptr %0
5810+ %index.next = add i64 %index , 4
5811+ %3 = icmp eq i64 %index.next , 1024
5812+ br i1 32 , label %for.cond.cleanup , label %vector.body
5813+
5814+ for.cond.cleanup: ; preds = %vector.body
5815+ ret void
5816+ }
5817+
5818+ define void @sink_splat_vfwmul_vf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5819+ ; CHECK-LABEL: sink_splat_vfwmul_vf:
5820+ ; CHECK: # %bb.0: # %entry
5821+ ; CHECK-NEXT: li a1, 0
5822+ ; CHECK-NEXT: li a2, 1020
5823+ ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5824+ ; CHECK-NEXT: .LBB127_1: # %vector.body
5825+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5826+ ; CHECK-NEXT: vl1re32.v v8, (a0)
5827+ ; CHECK-NEXT: addi a1, a1, 4
5828+ ; CHECK-NEXT: addi a2, a2, -4
5829+ ; CHECK-NEXT: vfwmul.vf v10, v8, fa0
5830+ ; CHECK-NEXT: vs2r.v v10, (a0)
5831+ ; CHECK-NEXT: addi a0, a0, 16
5832+ ; CHECK-NEXT: j .LBB127_1
5833+ entry:
5834+ %f.ext = fpext float %f to double
5835+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5836+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5837+ br label %vector.body
5838+
5839+ vector.body: ; preds = %vector.body, %entry
5840+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5841+ %0 = getelementptr float , ptr %a , i64 %index
5842+ %wide.load = load <vscale x 2 x float >, ptr %0
5843+ %ext = fpext <vscale x 2 x float > %wide.load to <vscale x 2 x double >
5844+ %1 = fmul <vscale x 2 x double > %ext , %broadcast.splat
5845+ %2 = getelementptr double , ptr %b , i64 %index
5846+ store <vscale x 2 x double > %1 , ptr %0
5847+ %index.next = add i64 %index , 4
5848+ %3 = icmp eq i64 %index.next , 1024
5849+ br i1 32 , label %for.cond.cleanup , label %vector.body
5850+
5851+ for.cond.cleanup: ; preds = %vector.body
5852+ ret void
5853+ }
5854+
5855+ ; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure
5856+ ; early-machinelicm undos the sink after isel.
5857+ define void @sink_splat_vfwmul_wf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5858+ ; CHECK-LABEL: sink_splat_vfwmul_wf:
5859+ ; CHECK: # %bb.0: # %entry
5860+ ; CHECK-NEXT: li a1, 0
5861+ ; CHECK-NEXT: li a2, 1020
5862+ ; CHECK-NEXT: fcvt.d.s fa5, fa0
5863+ ; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma
5864+ ; CHECK-NEXT: .LBB128_1: # %vector.body
5865+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5866+ ; CHECK-NEXT: vl2re64.v v8, (a0)
5867+ ; CHECK-NEXT: addi a1, a1, 4
5868+ ; CHECK-NEXT: addi a2, a2, -4
5869+ ; CHECK-NEXT: vfmul.vf v8, v8, fa5
5870+ ; CHECK-NEXT: vs2r.v v8, (a0)
5871+ ; CHECK-NEXT: addi a0, a0, 16
5872+ ; CHECK-NEXT: j .LBB128_1
5873+ entry:
5874+ %f.ext = fpext float %f to double
5875+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5876+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5877+ br label %vector.body
5878+
5879+ vector.body: ; preds = %vector.body, %entry
5880+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5881+ %0 = getelementptr float , ptr %a , i64 %index
5882+ %wide.load = load <vscale x 2 x double >, ptr %0
5883+ %1 = fmul <vscale x 2 x double > %wide.load , %broadcast.splat
5884+ %2 = getelementptr double , ptr %b , i64 %index
5885+ store <vscale x 2 x double > %1 , ptr %0
5886+ %index.next = add i64 %index , 4
5887+ %3 = icmp eq i64 %index.next , 1024
5888+ br i1 32 , label %for.cond.cleanup , label %vector.body
5889+
5890+ for.cond.cleanup: ; preds = %vector.body
5891+ ret void
5892+ }
0 commit comments