File tree Expand file tree Collapse file tree 1 file changed +5
-21
lines changed
Expand file tree Collapse file tree 1 file changed +5
-21
lines changed Original file line number Diff line number Diff line change 11#include "common.h"
2- /* helper for the direct sgemm code written by Arjan van der Ven */
3-
4-
5-
2+ /* helper for the direct sgemm code adapted from Arjan van der Ven's x86_64 version */
63
74int CNAME (BLASLONG M , BLASLONG N , BLASLONG K )
85{
9- if (M < 3 || M % 2 == 1 ) return 0 ;
6+ if (M < 3 ) return 0 ;
107 unsigned long long mnk = M * N * K ;
11- /* large matrixes -> not performant */
12- if (mnk >= 28 * 512 * 512 )
13- return 0 ;
14-
15- /*
16- * if the B matrix is not a nice multiple if 4 we get many unaligned accesses,
17- * and the regular sgemm copy/realignment of data pays off much quicker
18- */
19- if ((N & 3 ) != 0 && (mnk >= 8 * 512 * 512 ))
20- return 0 ;
21-
22- #ifdef SMP
23- /* if we can run multithreaded, the threading changes the based threshold */
24- if (mnk > 2 * 350 * 512 && num_cpu_avail (3 )> 1 )
8+ /* benchmark performance on M4 peaks around 512 and crosses the graph of the NEON SGEMM at about 3100 */
9+ if (mnk >= 3100 * 3100 * 3100 )
2510 return 0 ;
26- #endif
27-
11+
2812 return 1 ;
2913}
3014
You can’t perform that action at this time.
0 commit comments