Skip to content

Commit 2c36e5b

Browse files
authored
Merge pull request #78 from foss-for-synopsys-dwc-arc-processors/generator
Generator improvements
2 parents dabcb77 + 0bcaea3 commit 2c36e5b

19 files changed

+1821
-37
lines changed

include/api/mli_krn_avepool_spec_api.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern "C" {
2323
//===================================================================
2424
// AvePooling specialization kernels implementation
2525
//===================================================================
26+
char * mli_debug_krn_avepool_chw_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
2627

2728
mli_status mli_krn_avepool_chw_fx16_k2x2_str1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
2829
mli_status mli_krn_avepool_chw_fx16_k4x4_str1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
@@ -55,6 +56,7 @@ mli_status mli_krn_avepool_chw_fx16_knx1_nopad(const mli_tensor * in, const mli_
5556
mli_status mli_krn_avepool_chw_fx16_k2x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
5657
mli_status mli_krn_avepool_chw_fx16_k3x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
5758
mli_status mli_krn_avepool_chw_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
59+
char * mli_debug_krn_avepool_chw_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
5860

5961
mli_status mli_krn_avepool_chw_fx8_k2x2_str1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
6062
mli_status mli_krn_avepool_chw_fx8_k4x4_str1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);

include/api/mli_krn_conv2d_spec_api.h

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ extern "C" {
2323
//===================================================================
2424
// Convolution 2d specialization kernels implementation
2525
//===================================================================
26+
char * mli_debug_krn_conv2d_chw_fx16(
27+
const mli_tensor * in,
28+
const mli_tensor * weights,
29+
const mli_tensor * bias,
30+
const mli_conv2d_cfg * cfg,
31+
mli_tensor * out);
2632

2733
mli_status mli_krn_conv2d_chw_fx16_k1x1_str1_nopad(
2834
const mli_tensor * in,
@@ -276,6 +282,12 @@ mli_status mli_krn_conv2d_chw_fx16_generic(
276282
const mli_conv2d_cfg * cfg,
277283
mli_tensor * out);
278284

285+
char * mli_debug_krn_conv2d_chw_fx8(
286+
const mli_tensor * in,
287+
const mli_tensor * weights,
288+
const mli_tensor * bias,
289+
const mli_conv2d_cfg * cfg,
290+
mli_tensor * out);
279291

280292
mli_status mli_krn_conv2d_chw_fx8_k1x1_str1_nopad(
281293
const mli_tensor * in,
@@ -515,6 +527,251 @@ mli_status mli_krn_conv2d_chw_fx8_generic(
515527
const mli_conv2d_cfg * cfg,
516528
mli_tensor * out);
517529

530+
char * mli_debug_krn_conv2d_chw_fx8w16d(
531+
const mli_tensor * in,
532+
const mli_tensor * weights,
533+
const mli_tensor * bias,
534+
const mli_conv2d_cfg * cfg,
535+
mli_tensor * out);
536+
537+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_str1_nopad(
538+
const mli_tensor * in,
539+
const mli_tensor * weights,
540+
const mli_tensor * bias,
541+
const mli_conv2d_cfg * cfg,
542+
mli_tensor * out);
543+
544+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch1_str1_nopad(
545+
const mli_tensor * in,
546+
const mli_tensor * weights,
547+
const mli_tensor * bias,
548+
const mli_conv2d_cfg * cfg,
549+
mli_tensor * out);
550+
551+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch3_str1_nopad(
552+
const mli_tensor * in,
553+
const mli_tensor * weights,
554+
const mli_tensor * bias,
555+
const mli_conv2d_cfg * cfg,
556+
mli_tensor * out);
557+
558+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch4_str1_nopad(
559+
const mli_tensor * in,
560+
const mli_tensor * weights,
561+
const mli_tensor * bias,
562+
const mli_conv2d_cfg * cfg,
563+
mli_tensor * out);
564+
565+
mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_str1_krnpad(
566+
const mli_tensor * in,
567+
const mli_tensor * weights,
568+
const mli_tensor * bias,
569+
const mli_conv2d_cfg * cfg,
570+
mli_tensor * out);
571+
572+
mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_ch1_str1_krnpad(
573+
const mli_tensor * in,
574+
const mli_tensor * weights,
575+
const mli_tensor * bias,
576+
const mli_conv2d_cfg * cfg,
577+
mli_tensor * out);
578+
579+
mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_str1_krnpad(
580+
const mli_tensor * in,
581+
const mli_tensor * weights,
582+
const mli_tensor * bias,
583+
const mli_conv2d_cfg * cfg,
584+
mli_tensor * out);
585+
586+
mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_ch1_str1_krnpad(
587+
const mli_tensor * in,
588+
const mli_tensor * weights,
589+
const mli_tensor * bias,
590+
const mli_conv2d_cfg * cfg,
591+
mli_tensor * out);
592+
593+
mli_status mli_krn_conv2d_chw_fx8w16d_k4x4_str1_krnpad(
594+
const mli_tensor * in,
595+
const mli_tensor * weights,
596+
const mli_tensor * bias,
597+
const mli_conv2d_cfg * cfg,
598+
mli_tensor * out);
599+
600+
mli_status mli_krn_conv2d_chw_fx8w16d_k4x4_ch1_str1_krnpad(
601+
const mli_tensor * in,
602+
const mli_tensor * weights,
603+
const mli_tensor * bias,
604+
const mli_conv2d_cfg * cfg,
605+
mli_tensor * out);
606+
607+
mli_status mli_krn_conv2d_chw_fx8w16d_k5x5_str1_krnpad(
608+
const mli_tensor * in,
609+
const mli_tensor * weights,
610+
const mli_tensor * bias,
611+
const mli_conv2d_cfg * cfg,
612+
mli_tensor * out);
613+
614+
mli_status mli_krn_conv2d_chw_fx8w16d_k5x5_ch1_str1_krnpad(
615+
const mli_tensor * in,
616+
const mli_tensor * weights,
617+
const mli_tensor * bias,
618+
const mli_conv2d_cfg * cfg,
619+
mli_tensor * out);
620+
621+
mli_status mli_krn_conv2d_chw_fx8w16d_k6x6_str1_krnpad(
622+
const mli_tensor * in,
623+
const mli_tensor * weights,
624+
const mli_tensor * bias,
625+
const mli_conv2d_cfg * cfg,
626+
mli_tensor * out);
627+
628+
mli_status mli_krn_conv2d_chw_fx8w16d_k6x6_ch1_str1_krnpad(
629+
const mli_tensor * in,
630+
const mli_tensor * weights,
631+
const mli_tensor * bias,
632+
const mli_conv2d_cfg * cfg,
633+
mli_tensor * out);
634+
635+
mli_status mli_krn_conv2d_chw_fx8w16d_k7x7_str1_krnpad(
636+
const mli_tensor * in,
637+
const mli_tensor * weights,
638+
const mli_tensor * bias,
639+
const mli_conv2d_cfg * cfg,
640+
mli_tensor * out);
641+
642+
mli_status mli_krn_conv2d_chw_fx8w16d_k7x7_ch1_str1_krnpad(
643+
const mli_tensor * in,
644+
const mli_tensor * weights,
645+
const mli_tensor * bias,
646+
const mli_conv2d_cfg * cfg,
647+
mli_tensor * out);
648+
649+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x2_str1_krnpad(
650+
const mli_tensor * in,
651+
const mli_tensor * weights,
652+
const mli_tensor * bias,
653+
const mli_conv2d_cfg * cfg,
654+
mli_tensor * out);
655+
656+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x3_str1_krnpad(
657+
const mli_tensor * in,
658+
const mli_tensor * weights,
659+
const mli_tensor * bias,
660+
const mli_conv2d_cfg * cfg,
661+
mli_tensor * out);
662+
663+
mli_status mli_krn_conv2d_chw_fx8w16d_k2x1_str1_krnpad(
664+
const mli_tensor * in,
665+
const mli_tensor * weights,
666+
const mli_tensor * bias,
667+
const mli_conv2d_cfg * cfg,
668+
mli_tensor * out);
669+
670+
mli_status mli_krn_conv2d_chw_fx8w16d_k3x1_str1_krnpad(
671+
const mli_tensor * in,
672+
const mli_tensor * weights,
673+
const mli_tensor * bias,
674+
const mli_conv2d_cfg * cfg,
675+
mli_tensor * out);
676+
677+
mli_status mli_krn_conv2d_chw_fx8w16d_k1xn_str1(
678+
const mli_tensor * in,
679+
const mli_tensor * weights,
680+
const mli_tensor * bias,
681+
const mli_conv2d_cfg * cfg,
682+
mli_tensor * out);
683+
684+
mli_status mli_krn_conv2d_chw_fx8w16d_knx1_str1(
685+
const mli_tensor * in,
686+
const mli_tensor * weights,
687+
const mli_tensor * bias,
688+
const mli_conv2d_cfg * cfg,
689+
mli_tensor * out);
690+
691+
mli_status mli_krn_conv2d_chw_fx8w16d_ch1_str1(
692+
const mli_tensor * in,
693+
const mli_tensor * weights,
694+
const mli_tensor * bias,
695+
const mli_conv2d_cfg * cfg,
696+
mli_tensor * out);
697+
698+
mli_status mli_krn_conv2d_chw_fx8w16d_str1(
699+
const mli_tensor * in,
700+
const mli_tensor * weights,
701+
const mli_tensor * bias,
702+
const mli_conv2d_cfg * cfg,
703+
mli_tensor * out);
704+
705+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_nopad(
706+
const mli_tensor * in,
707+
const mli_tensor * weights,
708+
const mli_tensor * bias,
709+
const mli_conv2d_cfg * cfg,
710+
mli_tensor * out);
711+
712+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch1_nopad(
713+
const mli_tensor * in,
714+
const mli_tensor * weights,
715+
const mli_tensor * bias,
716+
const mli_conv2d_cfg * cfg,
717+
mli_tensor * out);
718+
719+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch3_nopad(
720+
const mli_tensor * in,
721+
const mli_tensor * weights,
722+
const mli_tensor * bias,
723+
const mli_conv2d_cfg * cfg,
724+
mli_tensor * out);
725+
726+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch4_nopad(
727+
const mli_tensor * in,
728+
const mli_tensor * weights,
729+
const mli_tensor * bias,
730+
const mli_conv2d_cfg * cfg,
731+
mli_tensor * out);
732+
733+
mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch8_nopad(
734+
const mli_tensor * in,
735+
const mli_tensor * weights,
736+
const mli_tensor * bias,
737+
const mli_conv2d_cfg * cfg,
738+
mli_tensor * out);
739+
740+
mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_krnpad(
741+
const mli_tensor * in,
742+
const mli_tensor * weights,
743+
const mli_tensor * bias,
744+
const mli_conv2d_cfg * cfg,
745+
mli_tensor * out);
746+
747+
mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_ch1_krnpad(
748+
const mli_tensor * in,
749+
const mli_tensor * weights,
750+
const mli_tensor * bias,
751+
const mli_conv2d_cfg * cfg,
752+
mli_tensor * out);
753+
754+
mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_krnpad(
755+
const mli_tensor * in,
756+
const mli_tensor * weights,
757+
const mli_tensor * bias,
758+
const mli_conv2d_cfg * cfg,
759+
mli_tensor * out);
760+
761+
mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_ch1_krnpad(
762+
const mli_tensor * in,
763+
const mli_tensor * weights,
764+
const mli_tensor * bias,
765+
const mli_conv2d_cfg * cfg,
766+
mli_tensor * out);
767+
768+
mli_status mli_krn_conv2d_chw_fx8w16d_generic(
769+
const mli_tensor * in,
770+
const mli_tensor * weights,
771+
const mli_tensor * bias,
772+
const mli_conv2d_cfg * cfg,
773+
mli_tensor * out);
774+
518775

519776
#ifdef __cplusplus
520777
}

0 commit comments

Comments
 (0)