@@ -487,6 +487,54 @@ for.end: ; preds = %for.body, %entry
487487 ret void
488488}
489489
490+ define void @saxpy_vec_demanded_fields (i64 %n , float %a , ptr nocapture readonly %x , ptr nocapture %y ) {
491+ ; CHECK-LABEL: saxpy_vec_demanded_fields:
492+ ; CHECK: # %bb.0: # %entry
493+ ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
494+ ; CHECK-NEXT: beqz a3, .LBB9_2
495+ ; CHECK-NEXT: .LBB9_1: # %for.body
496+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
497+ ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
498+ ; CHECK-NEXT: vle32.v v8, (a1)
499+ ; CHECK-NEXT: vle32.v v16, (a2)
500+ ; CHECK-NEXT: slli a4, a3, 2
501+ ; CHECK-NEXT: add a1, a1, a4
502+ ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma
503+ ; CHECK-NEXT: vfmacc.vf v16, fa0, v8
504+ ; CHECK-NEXT: vse32.v v16, (a2)
505+ ; CHECK-NEXT: sub a0, a0, a3
506+ ; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma
507+ ; CHECK-NEXT: add a2, a2, a4
508+ ; CHECK-NEXT: bnez a3, .LBB9_1
509+ ; CHECK-NEXT: .LBB9_2: # %for.end
510+ ; CHECK-NEXT: ret
511+ entry:
512+ %0 = tail call i64 @llvm.riscv.vsetvli.i64 (i64 %n , i64 2 , i64 3 )
513+ %cmp.not13 = icmp eq i64 %0 , 0
514+ br i1 %cmp.not13 , label %for.end , label %for.body
515+
516+ for.body: ; preds = %for.body, %entry
517+ %1 = phi i64 [ %7 , %for.body ], [ %0 , %entry ]
518+ %n.addr.016 = phi i64 [ %sub , %for.body ], [ %n , %entry ]
519+ %x.addr.015 = phi ptr [ %add.ptr , %for.body ], [ %x , %entry ]
520+ %y.addr.014 = phi ptr [ %add.ptr1 , %for.body ], [ %y , %entry ]
521+ %2 = bitcast ptr %x.addr.015 to ptr
522+ %3 = tail call <vscale x 16 x float > @llvm.riscv.vle.nxv16f32.i64 (<vscale x 16 x float > undef , ptr %2 , i64 %1 )
523+ %add.ptr = getelementptr inbounds float , ptr %x.addr.015 , i64 %1
524+ %4 = bitcast ptr %y.addr.014 to ptr
525+ %5 = tail call <vscale x 16 x float > @llvm.riscv.vle.nxv16f32.i64 (<vscale x 16 x float > undef , ptr %4 , i64 %1 )
526+ %6 = tail call <vscale x 16 x float > @llvm.riscv.vfmacc.nxv16f32.f32.i64 (<vscale x 16 x float > %5 , float %a , <vscale x 16 x float > %3 , i64 7 , i64 %1 , i64 0 )
527+ tail call void @llvm.riscv.vse.nxv16f32.i64 (<vscale x 16 x float > %6 , ptr %4 , i64 %1 )
528+ %add.ptr1 = getelementptr inbounds float , ptr %y.addr.014 , i64 %1
529+ %sub = sub i64 %n.addr.016 , %1
530+ %7 = tail call i64 @llvm.riscv.vsetvli.i64 (i64 %sub , i64 1 , i64 2 )
531+ %cmp.not = icmp eq i64 %7 , 0
532+ br i1 %cmp.not , label %for.end , label %for.body
533+
534+ for.end: ; preds = %for.body, %entry
535+ ret void
536+ }
537+
490538declare i64 @llvm.riscv.vsetvli.i64 (i64 , i64 immarg, i64 immarg)
491539declare <vscale x 16 x float > @llvm.riscv.vle.nxv16f32.i64 (<vscale x 16 x float >, ptr nocapture , i64 )
492540declare <vscale x 16 x float > @llvm.riscv.vfmacc.nxv16f32.f32.i64 (<vscale x 16 x float >, float , <vscale x 16 x float >, i64 , i64 , i64 )
@@ -501,12 +549,12 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32>
501549; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
502550; CHECK-NEXT: vle32.v v9, (a0)
503551; CHECK-NEXT: andi a3, a3, 1
504- ; CHECK-NEXT: beqz a3, .LBB9_2
552+ ; CHECK-NEXT: beqz a3, .LBB10_2
505553; CHECK-NEXT: # %bb.1: # %if
506554; CHECK-NEXT: vle16.v v10, (a1)
507555; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
508556; CHECK-NEXT: vwcvt.x.x.v v8, v10
509- ; CHECK-NEXT: .LBB9_2 : # %if.end
557+ ; CHECK-NEXT: .LBB10_2 : # %if.end
510558; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
511559; CHECK-NEXT: vadd.vv v8, v9, v8
512560; CHECK-NEXT: ret
@@ -540,19 +588,19 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl,
540588; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
541589; CHECK-NEXT: vle32.v v9, (a0)
542590; CHECK-NEXT: andi a4, a4, 1
543- ; CHECK-NEXT: beqz a4, .LBB10_2
591+ ; CHECK-NEXT: beqz a4, .LBB11_2
544592; CHECK-NEXT: # %bb.1: # %if
545593; CHECK-NEXT: vle16.v v10, (a1)
546594; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
547595; CHECK-NEXT: vwadd.wv v9, v9, v10
548- ; CHECK-NEXT: .LBB10_2 : # %if.end
596+ ; CHECK-NEXT: .LBB11_2 : # %if.end
549597; CHECK-NEXT: andi a5, a5, 1
550- ; CHECK-NEXT: beqz a5, .LBB10_4
598+ ; CHECK-NEXT: beqz a5, .LBB11_4
551599; CHECK-NEXT: # %bb.3: # %if2
552600; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
553601; CHECK-NEXT: vle16.v v10, (a2)
554602; CHECK-NEXT: vwadd.wv v9, v9, v10
555- ; CHECK-NEXT: .LBB10_4 : # %if2.end
603+ ; CHECK-NEXT: .LBB11_4 : # %if2.end
556604; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
557605; CHECK-NEXT: vadd.vv v8, v9, v8
558606; CHECK-NEXT: ret
@@ -586,11 +634,11 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
586634; CHECK-LABEL: vlmax:
587635; CHECK: # %bb.0: # %entry
588636; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma
589- ; CHECK-NEXT: blez a0, .LBB11_3
637+ ; CHECK-NEXT: blez a0, .LBB12_3
590638; CHECK-NEXT: # %bb.1: # %for.body.preheader
591639; CHECK-NEXT: li a5, 0
592640; CHECK-NEXT: slli a4, a6, 3
593- ; CHECK-NEXT: .LBB11_2 : # %for.body
641+ ; CHECK-NEXT: .LBB12_2 : # %for.body
594642; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
595643; CHECK-NEXT: vle64.v v8, (a2)
596644; CHECK-NEXT: vle64.v v9, (a3)
@@ -600,8 +648,8 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
600648; CHECK-NEXT: add a1, a1, a4
601649; CHECK-NEXT: add a3, a3, a4
602650; CHECK-NEXT: add a2, a2, a4
603- ; CHECK-NEXT: blt a5, a0, .LBB11_2
604- ; CHECK-NEXT: .LBB11_3 : # %for.end
651+ ; CHECK-NEXT: blt a5, a0, .LBB12_2
652+ ; CHECK-NEXT: .LBB12_3 : # %for.end
605653; CHECK-NEXT: ret
606654entry:
607655 %0 = tail call i64 @llvm.riscv.vsetvlimax.i64 (i64 3 , i64 0 )
@@ -633,18 +681,18 @@ define void @vector_init_vlmax(i64 %N, ptr %c) {
633681; CHECK-LABEL: vector_init_vlmax:
634682; CHECK: # %bb.0: # %entry
635683; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
636- ; CHECK-NEXT: blez a0, .LBB12_3
684+ ; CHECK-NEXT: blez a0, .LBB13_3
637685; CHECK-NEXT: # %bb.1: # %for.body.preheader
638686; CHECK-NEXT: li a3, 0
639687; CHECK-NEXT: slli a4, a2, 3
640688; CHECK-NEXT: vmv.v.i v8, 0
641- ; CHECK-NEXT: .LBB12_2 : # %for.body
689+ ; CHECK-NEXT: .LBB13_2 : # %for.body
642690; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
643691; CHECK-NEXT: vse64.v v8, (a1)
644692; CHECK-NEXT: add a3, a3, a2
645693; CHECK-NEXT: add a1, a1, a4
646- ; CHECK-NEXT: blt a3, a0, .LBB12_2
647- ; CHECK-NEXT: .LBB12_3 : # %for.end
694+ ; CHECK-NEXT: blt a3, a0, .LBB13_2
695+ ; CHECK-NEXT: .LBB13_3 : # %for.end
648696; CHECK-NEXT: ret
649697entry:
650698 %0 = tail call i64 @llvm.riscv.vsetvlimax.i64 (i64 3 , i64 0 )
@@ -669,20 +717,20 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
669717; CHECK-LABEL: vector_init_vsetvli_N:
670718; CHECK: # %bb.0: # %entry
671719; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma
672- ; CHECK-NEXT: blez a0, .LBB13_3
720+ ; CHECK-NEXT: blez a0, .LBB14_3
673721; CHECK-NEXT: # %bb.1: # %for.body.preheader
674722; CHECK-NEXT: li a3, 0
675723; CHECK-NEXT: slli a4, a2, 3
676724; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma
677725; CHECK-NEXT: vmv.v.i v8, 0
678- ; CHECK-NEXT: .LBB13_2 : # %for.body
726+ ; CHECK-NEXT: .LBB14_2 : # %for.body
679727; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
680728; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
681729; CHECK-NEXT: vse64.v v8, (a1)
682730; CHECK-NEXT: add a3, a3, a2
683731; CHECK-NEXT: add a1, a1, a4
684- ; CHECK-NEXT: blt a3, a0, .LBB13_2
685- ; CHECK-NEXT: .LBB13_3 : # %for.end
732+ ; CHECK-NEXT: blt a3, a0, .LBB14_2
733+ ; CHECK-NEXT: .LBB14_3 : # %for.end
686734; CHECK-NEXT: ret
687735entry:
688736 %0 = tail call i64 @llvm.riscv.vsetvli (i64 %N , i64 3 , i64 0 )
@@ -711,13 +759,13 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
711759; CHECK-NEXT: slli a4, a3, 3
712760; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma
713761; CHECK-NEXT: vmv.v.i v8, 0
714- ; CHECK-NEXT: .LBB14_1 : # %for.body
762+ ; CHECK-NEXT: .LBB15_1 : # %for.body
715763; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
716764; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
717765; CHECK-NEXT: vse64.v v8, (a1)
718766; CHECK-NEXT: add a2, a2, a3
719767; CHECK-NEXT: add a1, a1, a4
720- ; CHECK-NEXT: blt a2, a0, .LBB14_1
768+ ; CHECK-NEXT: blt a2, a0, .LBB15_1
721769; CHECK-NEXT: # %bb.2: # %for.end
722770; CHECK-NEXT: ret
723771entry:
@@ -745,13 +793,13 @@ define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) {
745793; CHECK-NEXT: li a2, 0
746794; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
747795; CHECK-NEXT: vmv.v.i v8, 0
748- ; CHECK-NEXT: .LBB15_1 : # %for.body
796+ ; CHECK-NEXT: .LBB16_1 : # %for.body
749797; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
750798; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
751799; CHECK-NEXT: vse64.v v8, (a1)
752800; CHECK-NEXT: addi a2, a2, 4
753801; CHECK-NEXT: addi a1, a1, 32
754- ; CHECK-NEXT: blt a2, a0, .LBB15_1
802+ ; CHECK-NEXT: blt a2, a0, .LBB16_1
755803; CHECK-NEXT: # %bb.2: # %for.end
756804; CHECK-NEXT: ret
757805entry:
@@ -779,13 +827,13 @@ define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) {
779827; CHECK-NEXT: li a2, 0
780828; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
781829; CHECK-NEXT: vmv.v.i v8, 0
782- ; CHECK-NEXT: .LBB16_1 : # %for.body
830+ ; CHECK-NEXT: .LBB17_1 : # %for.body
783831; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
784832; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
785833; CHECK-NEXT: vse64.v v8, (a1)
786834; CHECK-NEXT: addi a2, a2, 4
787835; CHECK-NEXT: addi a1, a1, 32
788- ; CHECK-NEXT: blt a2, a0, .LBB16_1
836+ ; CHECK-NEXT: blt a2, a0, .LBB17_1
789837; CHECK-NEXT: # %bb.2: # %for.end
790838; CHECK-NEXT: ret
791839entry:
@@ -861,10 +909,10 @@ define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x d
861909; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
862910; CHECK-NEXT: vfadd.vv v8, v8, v9
863911; CHECK-NEXT: vs1r.v v8, (a1)
864- ; CHECK-NEXT: beqz a0, .LBB19_2
912+ ; CHECK-NEXT: beqz a0, .LBB20_2
865913; CHECK-NEXT: # %bb.1: # %if.then
866914; CHECK-NEXT: vse32.v v10, (a2)
867- ; CHECK-NEXT: .LBB19_2 : # %if.end
915+ ; CHECK-NEXT: .LBB20_2 : # %if.end
868916; CHECK-NEXT: ret
869917entry:
870918 %res = fadd <vscale x 1 x double > %a , %b
@@ -886,16 +934,16 @@ define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nou
886934; CHECK-LABEL: test_ratio_only_vmv_s_x:
887935; CHECK: # %bb.0: # %entry
888936; CHECK-NEXT: andi a2, a2, 1
889- ; CHECK-NEXT: beqz a2, .LBB20_2
937+ ; CHECK-NEXT: beqz a2, .LBB21_2
890938; CHECK-NEXT: # %bb.1: # %if
891939; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
892940; CHECK-NEXT: vle16.v v9, (a1)
893941; CHECK-NEXT: vwcvt.x.x.v v8, v9
894- ; CHECK-NEXT: j .LBB20_3
895- ; CHECK-NEXT: .LBB20_2 :
942+ ; CHECK-NEXT: j .LBB21_3
943+ ; CHECK-NEXT: .LBB21_2 :
896944; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
897945; CHECK-NEXT: vle32.v v8, (a0)
898- ; CHECK-NEXT: .LBB20_3 : # %if.end
946+ ; CHECK-NEXT: .LBB21_3 : # %if.end
899947; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
900948; CHECK-NEXT: vmv.s.x v8, zero
901949; CHECK-NEXT: ret
@@ -918,16 +966,16 @@ define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) no
918966; CHECK-LABEL: test_ratio_only_vmv_s_x2:
919967; CHECK: # %bb.0: # %entry
920968; CHECK-NEXT: andi a2, a2, 1
921- ; CHECK-NEXT: beqz a2, .LBB21_2
969+ ; CHECK-NEXT: beqz a2, .LBB22_2
922970; CHECK-NEXT: # %bb.1: # %if
923971; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
924972; CHECK-NEXT: vle32.v v8, (a0)
925- ; CHECK-NEXT: j .LBB21_3
926- ; CHECK-NEXT: .LBB21_2 :
973+ ; CHECK-NEXT: j .LBB22_3
974+ ; CHECK-NEXT: .LBB22_2 :
927975; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
928976; CHECK-NEXT: vle16.v v9, (a1)
929977; CHECK-NEXT: vwcvt.x.x.v v8, v9
930- ; CHECK-NEXT: .LBB21_3 : # %if.end
978+ ; CHECK-NEXT: .LBB22_3 : # %if.end
931979; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
932980; CHECK-NEXT: vmv.s.x v8, zero
933981; CHECK-NEXT: ret
@@ -953,13 +1001,13 @@ define void @pre_over_vle(ptr %A) {
9531001; CHECK: # %bb.0: # %entry
9541002; CHECK-NEXT: addi a1, a0, 800
9551003; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
956- ; CHECK-NEXT: .LBB22_1 : # %vector.body
1004+ ; CHECK-NEXT: .LBB23_1 : # %vector.body
9571005; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
9581006; CHECK-NEXT: vle8.v v8, (a0)
9591007; CHECK-NEXT: vsext.vf4 v9, v8
9601008; CHECK-NEXT: vse32.v v9, (a0)
9611009; CHECK-NEXT: addi a0, a0, 8
962- ; CHECK-NEXT: bne a0, a1, .LBB22_1
1010+ ; CHECK-NEXT: bne a0, a1, .LBB23_1
9631011; CHECK-NEXT: # %bb.2: # %exit
9641012; CHECK-NEXT: ret
9651013entry:
0 commit comments