@@ -35,14 +35,100 @@ int main() {
3535 matrix_type::sint32, matrix_type::sint32},
3636 {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
3737 matrix_type::fp32, matrix_type::fp32},
38+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
39+ matrix_type::fp16, matrix_type::fp32},
40+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
41+ matrix_type::fp32, matrix_type::fp16},
42+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
43+ matrix_type::fp16, matrix_type::fp16},
44+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
45+ matrix_type::fp32, matrix_type::fp32},
46+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
47+ matrix_type::fp32, matrix_type::fp16},
48+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
49+ matrix_type::fp16, matrix_type::fp32},
50+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
51+ matrix_type::fp16, matrix_type::fp16},
52+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
53+ matrix_type::fp32, matrix_type::fp32},
54+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
55+ matrix_type::fp16, matrix_type::fp32},
56+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
57+ matrix_type::fp32, matrix_type::fp16},
58+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
59+ matrix_type::fp16, matrix_type::fp16},
60+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
61+ matrix_type::fp32, matrix_type::fp32},
62+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
63+ matrix_type::fp16, matrix_type::fp32},
64+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
65+ matrix_type::fp32, matrix_type::fp16},
66+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
67+ matrix_type::fp16, matrix_type::fp16},
68+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
69+ matrix_type::fp32, matrix_type::fp32},
70+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
71+ matrix_type::fp16, matrix_type::fp32},
72+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
73+ matrix_type::fp32, matrix_type::fp16},
74+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
75+ matrix_type::fp16, matrix_type::fp16},
76+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
77+ matrix_type::fp32, matrix_type::fp32},
78+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
79+ matrix_type::fp16, matrix_type::fp32},
80+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
81+ matrix_type::fp32, matrix_type::fp16},
82+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
83+ matrix_type::fp16, matrix_type::fp16},
84+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
85+ matrix_type::bf16 , matrix_type::bf16 },
86+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
87+ matrix_type::fp32, matrix_type::bf16 },
88+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
89+ matrix_type::bf16 , matrix_type::fp32},
3890 {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
3991 matrix_type::fp32, matrix_type::fp32},
4092 {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
4193 matrix_type::fp32, matrix_type::fp32},
94+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
95+ matrix_type::bf16 , matrix_type::fp32},
96+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
97+ matrix_type::fp32, matrix_type::bf16 },
98+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
99+ matrix_type::bf16 , matrix_type::bf16 },
42100 {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
43101 matrix_type::fp32, matrix_type::fp32},
102+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
103+ matrix_type::bf16 , matrix_type::fp32},
104+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
105+ matrix_type::fp32, matrix_type::bf16 },
106+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
107+ matrix_type::bf16 , matrix_type::bf16 },
44108 {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
45109 matrix_type::fp32, matrix_type::fp32},
110+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
111+ matrix_type::bf16 , matrix_type::fp32},
112+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
113+ matrix_type::fp32, matrix_type::bf16 },
114+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
115+ matrix_type::bf16 , matrix_type::bf16 },
116+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
117+ matrix_type::fp32, matrix_type::fp32},
118+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
119+ matrix_type::bf16 , matrix_type::fp32},
120+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
121+ matrix_type::fp32, matrix_type::bf16 },
122+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
123+ matrix_type::bf16 , matrix_type::bf16 },
124+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
125+ matrix_type::fp32, matrix_type::fp32},
126+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
127+ matrix_type::bf16 , matrix_type::fp32},
128+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
129+ matrix_type::fp32, matrix_type::bf16 },
130+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
131+ matrix_type::bf16 , matrix_type::bf16 },
46132 {8 , 0 , 0 , 0 , 16 , 8 , matrix_type::tf32, matrix_type::tf32,
47133 matrix_type::fp32, matrix_type::fp32},
48134 };
0 commit comments