11import subprocess
22import os
3- import datetime
43from operator import itemgetter
54
65workGroupSizes = [2 ** (x + 3 ) for x in range (5 )]
76workPerThreads = [2 ** (x + 1 ) for x in range (7 )]
87
98matrixSize = 1024
10- kernels = ['k2' , ' k3' , 'k4' ]
9+ kernels = ['k3' , 'k4' ]
1110semiring = 'arithmetic'
1211numToRun = 10
13- platform = 'nvidia '
14- types = ['mt-byte' , 'mt-int' , 'mt- float32' , 'mt-float64 ' ]
12+ platform = 'anygpu '
13+ types = ['mt-float32' ]
1514
1615out_directory = 'tuning_results'
1716
1817if not os .path .exists (out_directory ):
1918 os .makedirs (out_directory )
2019
2120for kernel in kernels :
22- for matrixType in types :
21+ for matrixType in types :
2322 res = []
2423 print (f'Tuning for { matrixType } and kernel { kernel } started.' )
2524 for wgs in workGroupSizes :
2625 for wpt in workPerThreads :
27- try :
26+ if wgs < wpt :
27+ continue
28+ try :
2829 cmd = f'dotnet ./src/MatrixMultiplication/bin/Release/net9.0/MatrixMultiplication.dll --platform { platform } --kernel { kernel } --matrixsize { matrixSize } --matrixtype { matrixType } --semiring { semiring } --numtorun { numToRun } --workperthread { wpt } --workgroupsize { wgs } '
2930 output = subprocess .check_output ([cmd ],shell = True )
3031 output = output .decode ("utf-8" )
3536 except BaseException : ()
3637
3738 res = sorted (res , key = itemgetter (2 ))
38- f = open (os .path .join (out_directory ,f'{ kernel } _{ platform } _{ matrixType } _{ matrixSize } _{ semiring } _ { datetime . datetime . now () } .log' ),'a' )
39+ f = open (os .path .join (out_directory ,f'{ kernel } _{ platform } _{ matrixType } _{ matrixSize } _{ semiring } .log' ),'a' )
3940 for r in res :
4041 print (r )
4142 f .write (f'{ r [0 ]} , { r [1 ]} , { r [2 ]} \n ' )
42- f .close ()
43+ f .close ()
0 commit comments