@@ -299,17 +299,31 @@ entry:
299299}
300300
301301define i32 @vqdot_vv_accum (<16 x i8 > %a , <16 x i8 > %b , <16 x i32 > %x ) {
302- ; CHECK-LABEL: vqdot_vv_accum:
303- ; CHECK: # %bb.0: # %entry
304- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
305- ; CHECK-NEXT: vsext.vf2 v10, v8
306- ; CHECK-NEXT: vsext.vf2 v16, v9
307- ; CHECK-NEXT: vwmacc.vv v12, v10, v16
308- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
309- ; CHECK-NEXT: vmv.s.x v8, zero
310- ; CHECK-NEXT: vredsum.vs v8, v12, v8
311- ; CHECK-NEXT: vmv.x.s a0, v8
312- ; CHECK-NEXT: ret
302+ ; NODOT-LABEL: vqdot_vv_accum:
303+ ; NODOT: # %bb.0: # %entry
304+ ; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma
305+ ; NODOT-NEXT: vsext.vf2 v10, v8
306+ ; NODOT-NEXT: vsext.vf2 v16, v9
307+ ; NODOT-NEXT: vwmacc.vv v12, v10, v16
308+ ; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
309+ ; NODOT-NEXT: vmv.s.x v8, zero
310+ ; NODOT-NEXT: vredsum.vs v8, v12, v8
311+ ; NODOT-NEXT: vmv.x.s a0, v8
312+ ; NODOT-NEXT: ret
313+ ;
314+ ; DOT-LABEL: vqdot_vv_accum:
315+ ; DOT: # %bb.0: # %entry
316+ ; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
317+ ; DOT-NEXT: vmv.v.i v10, 0
318+ ; DOT-NEXT: vqdot.vv v10, v8, v9
319+ ; DOT-NEXT: vadd.vv v8, v10, v12
320+ ; DOT-NEXT: vsetivli zero, 4, e32, m4, tu, ma
321+ ; DOT-NEXT: vmv.v.v v12, v8
322+ ; DOT-NEXT: vmv.s.x v8, zero
323+ ; DOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma
324+ ; DOT-NEXT: vredsum.vs v8, v12, v8
325+ ; DOT-NEXT: vmv.x.s a0, v8
326+ ; DOT-NEXT: ret
313327entry:
314328 %a.sext = sext <16 x i8 > %a to <16 x i32 >
315329 %b.sext = sext <16 x i8 > %b to <16 x i32 >
@@ -320,17 +334,31 @@ entry:
320334}
321335
322336define i32 @vqdotu_vv_accum (<16 x i8 > %a , <16 x i8 > %b , <16 x i32 > %x ) {
323- ; CHECK-LABEL: vqdotu_vv_accum:
324- ; CHECK: # %bb.0: # %entry
325- ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
326- ; CHECK-NEXT: vwmulu.vv v10, v8, v9
327- ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
328- ; CHECK-NEXT: vwaddu.wv v12, v12, v10
329- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
330- ; CHECK-NEXT: vmv.s.x v8, zero
331- ; CHECK-NEXT: vredsum.vs v8, v12, v8
332- ; CHECK-NEXT: vmv.x.s a0, v8
333- ; CHECK-NEXT: ret
337+ ; NODOT-LABEL: vqdotu_vv_accum:
338+ ; NODOT: # %bb.0: # %entry
339+ ; NODOT-NEXT: vsetivli zero, 16, e8, m1, ta, ma
340+ ; NODOT-NEXT: vwmulu.vv v10, v8, v9
341+ ; NODOT-NEXT: vsetvli zero, zero, e16, m2, ta, ma
342+ ; NODOT-NEXT: vwaddu.wv v12, v12, v10
343+ ; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
344+ ; NODOT-NEXT: vmv.s.x v8, zero
345+ ; NODOT-NEXT: vredsum.vs v8, v12, v8
346+ ; NODOT-NEXT: vmv.x.s a0, v8
347+ ; NODOT-NEXT: ret
348+ ;
349+ ; DOT-LABEL: vqdotu_vv_accum:
350+ ; DOT: # %bb.0: # %entry
351+ ; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
352+ ; DOT-NEXT: vmv.v.i v10, 0
353+ ; DOT-NEXT: vqdotu.vv v10, v8, v9
354+ ; DOT-NEXT: vadd.vv v8, v10, v12
355+ ; DOT-NEXT: vsetivli zero, 4, e32, m4, tu, ma
356+ ; DOT-NEXT: vmv.v.v v12, v8
357+ ; DOT-NEXT: vmv.s.x v8, zero
358+ ; DOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma
359+ ; DOT-NEXT: vredsum.vs v8, v12, v8
360+ ; DOT-NEXT: vmv.x.s a0, v8
361+ ; DOT-NEXT: ret
334362entry:
335363 %a.zext = zext <16 x i8 > %a to <16 x i32 >
336364 %b.zext = zext <16 x i8 > %b to <16 x i32 >
@@ -341,17 +369,31 @@ entry:
341369}
342370
343371define i32 @vqdotsu_vv_accum (<16 x i8 > %a , <16 x i8 > %b , <16 x i32 > %x ) {
344- ; CHECK-LABEL: vqdotsu_vv_accum:
345- ; CHECK: # %bb.0: # %entry
346- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
347- ; CHECK-NEXT: vsext.vf2 v10, v8
348- ; CHECK-NEXT: vzext.vf2 v16, v9
349- ; CHECK-NEXT: vwmaccsu.vv v12, v10, v16
350- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
351- ; CHECK-NEXT: vmv.s.x v8, zero
352- ; CHECK-NEXT: vredsum.vs v8, v12, v8
353- ; CHECK-NEXT: vmv.x.s a0, v8
354- ; CHECK-NEXT: ret
372+ ; NODOT-LABEL: vqdotsu_vv_accum:
373+ ; NODOT: # %bb.0: # %entry
374+ ; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma
375+ ; NODOT-NEXT: vsext.vf2 v10, v8
376+ ; NODOT-NEXT: vzext.vf2 v16, v9
377+ ; NODOT-NEXT: vwmaccsu.vv v12, v10, v16
378+ ; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
379+ ; NODOT-NEXT: vmv.s.x v8, zero
380+ ; NODOT-NEXT: vredsum.vs v8, v12, v8
381+ ; NODOT-NEXT: vmv.x.s a0, v8
382+ ; NODOT-NEXT: ret
383+ ;
384+ ; DOT-LABEL: vqdotsu_vv_accum:
385+ ; DOT: # %bb.0: # %entry
386+ ; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
387+ ; DOT-NEXT: vmv.v.i v10, 0
388+ ; DOT-NEXT: vqdotsu.vv v10, v8, v9
389+ ; DOT-NEXT: vadd.vv v8, v10, v12
390+ ; DOT-NEXT: vsetivli zero, 4, e32, m4, tu, ma
391+ ; DOT-NEXT: vmv.v.v v12, v8
392+ ; DOT-NEXT: vmv.s.x v8, zero
393+ ; DOT-NEXT: vsetivli zero, 16, e32, m4, ta, ma
394+ ; DOT-NEXT: vredsum.vs v8, v12, v8
395+ ; DOT-NEXT: vmv.x.s a0, v8
396+ ; DOT-NEXT: ret
355397entry:
356398 %a.sext = sext <16 x i8 > %a to <16 x i32 >
357399 %b.zext = zext <16 x i8 > %b to <16 x i32 >
@@ -455,20 +497,33 @@ entry:
455497}
456498
457499define i32 @vqdot_vv_split (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c , <16 x i8 > %d ) {
458- ; CHECK-LABEL: vqdot_vv_split:
459- ; CHECK: # %bb.0: # %entry
460- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
461- ; CHECK-NEXT: vsext.vf2 v12, v8
462- ; CHECK-NEXT: vsext.vf2 v14, v9
463- ; CHECK-NEXT: vsext.vf2 v16, v10
464- ; CHECK-NEXT: vsext.vf2 v18, v11
465- ; CHECK-NEXT: vwmul.vv v8, v12, v14
466- ; CHECK-NEXT: vwmacc.vv v8, v16, v18
467- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
468- ; CHECK-NEXT: vmv.s.x v12, zero
469- ; CHECK-NEXT: vredsum.vs v8, v8, v12
470- ; CHECK-NEXT: vmv.x.s a0, v8
471- ; CHECK-NEXT: ret
500+ ; NODOT-LABEL: vqdot_vv_split:
501+ ; NODOT: # %bb.0: # %entry
502+ ; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma
503+ ; NODOT-NEXT: vsext.vf2 v12, v8
504+ ; NODOT-NEXT: vsext.vf2 v14, v9
505+ ; NODOT-NEXT: vsext.vf2 v16, v10
506+ ; NODOT-NEXT: vsext.vf2 v18, v11
507+ ; NODOT-NEXT: vwmul.vv v8, v12, v14
508+ ; NODOT-NEXT: vwmacc.vv v8, v16, v18
509+ ; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
510+ ; NODOT-NEXT: vmv.s.x v12, zero
511+ ; NODOT-NEXT: vredsum.vs v8, v8, v12
512+ ; NODOT-NEXT: vmv.x.s a0, v8
513+ ; NODOT-NEXT: ret
514+ ;
515+ ; DOT-LABEL: vqdot_vv_split:
516+ ; DOT: # %bb.0: # %entry
517+ ; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
518+ ; DOT-NEXT: vmv.v.i v12, 0
519+ ; DOT-NEXT: vmv.v.i v13, 0
520+ ; DOT-NEXT: vqdot.vv v12, v8, v9
521+ ; DOT-NEXT: vqdot.vv v13, v10, v11
522+ ; DOT-NEXT: vadd.vv v8, v12, v13
523+ ; DOT-NEXT: vmv.s.x v9, zero
524+ ; DOT-NEXT: vredsum.vs v8, v8, v9
525+ ; DOT-NEXT: vmv.x.s a0, v8
526+ ; DOT-NEXT: ret
472527entry:
473528 %a.sext = sext <16 x i8 > %a to <16 x i32 >
474529 %b.sext = sext <16 x i8 > %b to <16 x i32 >
0 commit comments