@@ -565,6 +565,65 @@ entry:
565565 ret <1 x i32 > %res
566566}
567567
568+ define <1 x i32 > @vqdotu_vv_partial_reduce_v1i32_v4i8 (<4 x i8 > %a , <4 x i8 > %b ) {
569+ ; NODOT-LABEL: vqdotu_vv_partial_reduce_v1i32_v4i8:
570+ ; NODOT: # %bb.0: # %entry
571+ ; NODOT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
572+ ; NODOT-NEXT: vwmulu.vv v10, v8, v9
573+ ; NODOT-NEXT: vsetvli zero, zero, e32, m1, ta, ma
574+ ; NODOT-NEXT: vzext.vf2 v8, v10
575+ ; NODOT-NEXT: vslidedown.vi v9, v8, 3
576+ ; NODOT-NEXT: vslidedown.vi v10, v8, 2
577+ ; NODOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
578+ ; NODOT-NEXT: vadd.vv v9, v9, v8
579+ ; NODOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma
580+ ; NODOT-NEXT: vslidedown.vi v8, v8, 1
581+ ; NODOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
582+ ; NODOT-NEXT: vadd.vv v8, v8, v10
583+ ; NODOT-NEXT: vadd.vv v8, v8, v9
584+ ; NODOT-NEXT: ret
585+ ;
586+ ; DOT-LABEL: vqdotu_vv_partial_reduce_v1i32_v4i8:
587+ ; DOT: # %bb.0: # %entry
588+ ; DOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
589+ ; DOT-NEXT: vmv.s.x v10, zero
590+ ; DOT-NEXT: vqdotu.vv v10, v8, v9
591+ ; DOT-NEXT: vmv1r.v v8, v10
592+ ; DOT-NEXT: ret
593+ entry:
594+ %a.sext = zext <4 x i8 > %a to <4 x i32 >
595+ %b.sext = zext <4 x i8 > %b to <4 x i32 >
596+ %mul = mul <4 x i32 > %a.sext , %b.sext
597+ %res = call <1 x i32 > @llvm.experimental.vector.partial.reduce.add (<1 x i32 > zeroinitializer , <4 x i32 > %mul )
598+ ret <1 x i32 > %res
599+ }
600+
601+ define <1 x i32 > @vqdotsu_vv_partial_reduce_v1i32_v4i8 (<4 x i8 > %a , <4 x i8 > %b ) {
602+ ; CHECK-LABEL: vqdotsu_vv_partial_reduce_v1i32_v4i8:
603+ ; CHECK: # %bb.0: # %entry
604+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
605+ ; CHECK-NEXT: vsext.vf2 v10, v8
606+ ; CHECK-NEXT: vzext.vf2 v8, v9
607+ ; CHECK-NEXT: vwmulsu.vv v9, v10, v8
608+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
609+ ; CHECK-NEXT: vslidedown.vi v8, v9, 3
610+ ; CHECK-NEXT: vslidedown.vi v10, v9, 2
611+ ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
612+ ; CHECK-NEXT: vadd.vv v8, v8, v9
613+ ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
614+ ; CHECK-NEXT: vslidedown.vi v9, v9, 1
615+ ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
616+ ; CHECK-NEXT: vadd.vv v9, v9, v10
617+ ; CHECK-NEXT: vadd.vv v8, v9, v8
618+ ; CHECK-NEXT: ret
619+ entry:
620+ %a.sext = sext <4 x i8 > %a to <4 x i32 >
621+ %b.sext = zext <4 x i8 > %b to <4 x i32 >
622+ %mul = mul <4 x i32 > %a.sext , %b.sext
623+ %res = call <1 x i32 > @llvm.experimental.vector.partial.reduce.add (<1 x i32 > zeroinitializer , <4 x i32 > %mul )
624+ ret <1 x i32 > %res
625+ }
626+
568627define <2 x i32 > @vqdot_vv_partial_reduce_v2i32_v8i8 (<8 x i8 > %a , <8 x i8 > %b ) {
569628; NODOT-LABEL: vqdot_vv_partial_reduce_v2i32_v8i8:
570629; NODOT: # %bb.0: # %entry
0 commit comments