@@ -5890,3 +5890,133 @@ vector.body: ; preds = %vector.body, %entry
58905890for.cond.cleanup: ; preds = %vector.body
58915891 ret void
58925892}
5893+
5894+ define void @sink_vp_splat (ptr nocapture %out , ptr nocapture %in ) {
5895+ ; CHECK-LABEL: sink_vp_splat:
5896+ ; CHECK: # %bb.0: # %entry
5897+ ; CHECK-NEXT: li a2, 0
5898+ ; CHECK-NEXT: li a3, 1024
5899+ ; CHECK-NEXT: lui a4, 1
5900+ ; CHECK-NEXT: .LBB129_1: # %vector.body
5901+ ; CHECK-NEXT: # =>This Loop Header: Depth=1
5902+ ; CHECK-NEXT: # Child Loop BB129_2 Depth 2
5903+ ; CHECK-NEXT: vsetvli a5, a3, e32, m4, ta, ma
5904+ ; CHECK-NEXT: slli a6, a2, 2
5905+ ; CHECK-NEXT: vmv.v.i v8, 0
5906+ ; CHECK-NEXT: vmv.v.i v12, 3
5907+ ; CHECK-NEXT: add a7, a1, a6
5908+ ; CHECK-NEXT: li t0, 1024
5909+ ; CHECK-NEXT: .LBB129_2: # %for.body424
5910+ ; CHECK-NEXT: # Parent Loop BB129_1 Depth=1
5911+ ; CHECK-NEXT: # => This Inner Loop Header: Depth=2
5912+ ; CHECK-NEXT: vle32.v v16, (a7)
5913+ ; CHECK-NEXT: addi t0, t0, -1
5914+ ; CHECK-NEXT: vmacc.vv v8, v16, v12
5915+ ; CHECK-NEXT: add a7, a7, a4
5916+ ; CHECK-NEXT: bnez t0, .LBB129_2
5917+ ; CHECK-NEXT: # %bb.3: # %vector.latch
5918+ ; CHECK-NEXT: # in Loop: Header=BB129_1 Depth=1
5919+ ; CHECK-NEXT: add a6, a0, a6
5920+ ; CHECK-NEXT: sub a3, a3, a5
5921+ ; CHECK-NEXT: vse32.v v8, (a6)
5922+ ; CHECK-NEXT: add a2, a2, a5
5923+ ; CHECK-NEXT: bnez a3, .LBB129_1
5924+ ; CHECK-NEXT: # %bb.4: # %for.cond.cleanup
5925+ ; CHECK-NEXT: ret
5926+ entry:
5927+ br label %vector.body
5928+
5929+ vector.body: ; preds = %vector.latch, %entry
5930+ %scalar.ind = phi i64 [ 0 , %entry ], [ %next.ind , %vector.latch ]
5931+ %trip.count = phi i64 [ 1024 , %entry ], [ %remaining.trip.count , %vector.latch ]
5932+ %evl = tail call i32 @llvm.experimental.get.vector.length.i64 (i64 %trip.count , i32 8 , i1 true )
5933+ %vp.splat1 = tail call <vscale x 8 x i32 > @llvm.experimental.vp.splat.nxv8i32 (i32 0 , <vscale x 8 x i1 > splat(i1 true ), i32 %evl )
5934+ %vp.splat2 = tail call <vscale x 8 x i32 > @llvm.experimental.vp.splat.nxv8i32 (i32 3 , <vscale x 8 x i1 > splat(i1 true ), i32 %evl )
5935+ %evl.cast = zext i32 %evl to i64
5936+ br label %for.body424
5937+
5938+ for.body424: ; preds = %for.body424, %vector.body
5939+ %scalar.phi = phi i64 [ 0 , %vector.body ], [ %indvars.iv.next27 , %for.body424 ]
5940+ %vector.phi = phi <vscale x 8 x i32 > [ %vp.splat1 , %vector.body ], [ %vp.binary26 , %for.body424 ]
5941+ %arrayidx625 = getelementptr inbounds [1024 x i32 ], ptr %in , i64 %scalar.phi , i64 %scalar.ind
5942+ %widen.load = tail call <vscale x 8 x i32 > @llvm.vp.load.nxv8i32.p0 (ptr %arrayidx625 , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
5943+ %vp.binary = tail call <vscale x 8 x i32 > @llvm.vp.mul.nxv8i32 (<vscale x 8 x i32 > %widen.load , <vscale x 8 x i32 > %vp.splat2 , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
5944+ %vp.binary26 = tail call <vscale x 8 x i32 > @llvm.vp.add.nxv8i32 (<vscale x 8 x i32 > %vector.phi , <vscale x 8 x i32 > %vp.binary , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
5945+ %indvars.iv.next27 = add nuw nsw i64 %scalar.phi , 1
5946+ %exitcond.not28 = icmp eq i64 %indvars.iv.next27 , 1024
5947+ br i1 %exitcond.not28 , label %vector.latch , label %for.body424
5948+
5949+ vector.latch: ; preds = %for.body424
5950+ %arrayidx830 = getelementptr inbounds i32 , ptr %out , i64 %scalar.ind
5951+ tail call void @llvm.vp.store.nxv8i32.p0 (<vscale x 8 x i32 > %vp.binary26 , ptr %arrayidx830 , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
5952+ %remaining.trip.count = sub nuw i64 %trip.count , %evl.cast
5953+ %next.ind = add i64 %scalar.ind , %evl.cast
5954+ %6 = icmp eq i64 %remaining.trip.count , 0
5955+ br i1 %6 , label %for.cond.cleanup , label %vector.body
5956+
5957+ for.cond.cleanup: ; preds = %vector.latch
5958+ ret void
5959+ }
5960+
5961+ define void @sink_vp_splat_vfwadd_wf (ptr nocapture %in , float %f ) {
5962+ ; CHECK-LABEL: sink_vp_splat_vfwadd_wf:
5963+ ; CHECK: # %bb.0: # %entry
5964+ ; CHECK-NEXT: li a1, 0
5965+ ; CHECK-NEXT: fcvt.d.s fa5, fa0
5966+ ; CHECK-NEXT: li a2, 1024
5967+ ; CHECK-NEXT: lui a3, 2
5968+ ; CHECK-NEXT: .LBB130_1: # %vector.body
5969+ ; CHECK-NEXT: # =>This Loop Header: Depth=1
5970+ ; CHECK-NEXT: # Child Loop BB130_2 Depth 2
5971+ ; CHECK-NEXT: vsetvli a4, a2, e64, m8, ta, ma
5972+ ; CHECK-NEXT: slli a5, a1, 3
5973+ ; CHECK-NEXT: vfmv.v.f v8, fa5
5974+ ; CHECK-NEXT: add a5, a0, a5
5975+ ; CHECK-NEXT: li a6, 1024
5976+ ; CHECK-NEXT: .LBB130_2: # %for.body419
5977+ ; CHECK-NEXT: # Parent Loop BB130_1 Depth=1
5978+ ; CHECK-NEXT: # => This Inner Loop Header: Depth=2
5979+ ; CHECK-NEXT: vle64.v v16, (a5)
5980+ ; CHECK-NEXT: addi a6, a6, -1
5981+ ; CHECK-NEXT: vfadd.vv v16, v16, v8
5982+ ; CHECK-NEXT: vse64.v v16, (a5)
5983+ ; CHECK-NEXT: add a5, a5, a3
5984+ ; CHECK-NEXT: bnez a6, .LBB130_2
5985+ ; CHECK-NEXT: # %bb.3: # %vector.latch
5986+ ; CHECK-NEXT: # in Loop: Header=BB130_1 Depth=1
5987+ ; CHECK-NEXT: sub a2, a2, a4
5988+ ; CHECK-NEXT: add a1, a1, a4
5989+ ; CHECK-NEXT: bnez a2, .LBB130_1
5990+ ; CHECK-NEXT: # %bb.4: # %for.cond.cleanup
5991+ ; CHECK-NEXT: ret
5992+ entry:
5993+ %conv = fpext float %f to double
5994+ br label %vector.body
5995+
5996+ vector.body: ; preds = %vector.latch, %entry
5997+ %scalar.ind = phi i64 [ 0 , %entry ], [ %next.ind , %vector.latch ]
5998+ %trip.count = phi i64 [ 1024 , %entry ], [ %remaining.trip.count , %vector.latch ]
5999+ %evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %trip.count , i32 8 , i1 true )
6000+ %vp.splat = call <vscale x 8 x double > @llvm.experimental.vp.splat.nxv8f64 (double %conv , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
6001+ %evl.cast = zext i32 %evl to i64
6002+ br label %for.body419
6003+
6004+ for.body419: ; preds = %for.body419, %vector.body
6005+ %scalar.phi = phi i64 [ 0 , %vector.body ], [ %indvars.iv.next21 , %for.body419 ]
6006+ %arrayidx620 = getelementptr inbounds [1024 x double ], ptr %in , i64 %scalar.phi , i64 %scalar.ind
6007+ %widen.load = call <vscale x 8 x double > @llvm.vp.load.nxv8f64.p0 (ptr %arrayidx620 , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
6008+ %vp.binary = call <vscale x 8 x double > @llvm.vp.fadd.nxv8f64 (<vscale x 8 x double > %widen.load , <vscale x 8 x double > %vp.splat , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
6009+ call void @llvm.vp.store.nxv8f64.p0 (<vscale x 8 x double > %vp.binary , ptr %arrayidx620 , <vscale x 8 x i1 > splat (i1 true ), i32 %evl )
6010+ %indvars.iv.next21 = add nuw nsw i64 %scalar.phi , 1
6011+ %exitcond.not22 = icmp eq i64 %indvars.iv.next21 , 1024
6012+ br i1 %exitcond.not22 , label %vector.latch , label %for.body419
6013+
6014+ vector.latch: ; preds = %for.body419
6015+ %remaining.trip.count = sub nuw i64 %trip.count , %evl.cast
6016+ %next.ind = add i64 %scalar.ind , %evl.cast
6017+ %cond = icmp eq i64 %remaining.trip.count , 0
6018+ br i1 %cond , label %for.cond.cleanup , label %vector.body
6019+
6020+ for.cond.cleanup: ; preds = %vector.latch
6021+ ret void
6022+ }
0 commit comments