2727ROCPROF = '/opt/rocm/bin/rocprofv3'
2828MIOPENDRIVER = '/opt/rocm/bin/MIOpenDriver'
2929BENCHMARKING_RESULT_FILE_NAME = 'results'
30- BENCHMARKING_STATS_FILE_NAME = 'pmc_1/ results_kernel_stats.csv'
31- BENCHMARKING_METRICS_FILE_NAME = 'pmc_1/ results_counter_collection.csv'
30+ BENCHMARKING_STATS_FILE_NAME = 'results_kernel_stats.csv'
31+ BENCHMARKING_METRICS_FILE_NAME = 'results_counter_collection.csv'
3232ROCMLIR_INPUT_METRICS_FILE_NAME = 'rocmlir_metrics.txt'
3333DIRECTIONS = ['-F 1' , '-F 2' , '-F 4' ]
3434DATA_TYPES = ['conv' , 'convfp16' , 'convbfp16' , 'convfp8' , 'convint8' ]
@@ -158,6 +158,13 @@ def getNanoSeconds(fileName):
158158 csv_file .close ()
159159 return result
160160
161+ def getProfilerOutputPath (arch , baseOutPath ):
162+ chip = GFX_CHIP_RE .search (arch ).group (0 )
163+ # TODO (gfx950): check if gfx950 need this
164+ if (chip not in ["gfx942" ]):
165+ return os .path .join ('pmc_1' , baseOutPath )
166+ return baseOutPath
167+
161168def getMetricArgsForRocprof (arch ):
162169 chip = GFX_CHIP_RE .search (arch ).group (0 )
163170 current_dir = os .path .dirname (os .path .abspath (__file__ ))
@@ -346,7 +353,7 @@ def computeTFlops(self, ns):
346353
347354 def tableEntry (self , nanoSeconds ):
348355 # Future(kdrewnia): This can just be a dict literal on Python 3.7+
349- bankConflict = getBankConflict (BENCHMARKING_METRICS_FILE_NAME )
356+ bankConflict = getBankConflict (getProfilerOutputPath ( self . arch , BENCHMARKING_METRICS_FILE_NAME ) )
350357 result = OrderedDict ()
351358 values = [self .direction , self .dataType , self .chip , self .numCU , self .filterLayout , self .inputLayout , self .outputLayout ,
352359 self .n , self .c , self .hi , self .wi , self .k , self .y , self .x , self .dilationH , self .dilationW ,
@@ -543,7 +550,8 @@ def __init__(self, dtype: str, direction: str, filterLayout: str, inputLayout:st
543550
544551 @classmethod
545552 def benchmarkExternal (cls , commandLine , paths : Paths , arch , numCU ):
546- os .system ("rm -f " + BENCHMARKING_METRICS_FILE_NAME )
553+ if os .path .exists (getProfilerOutputPath (arch , BENCHMARKING_METRICS_FILE_NAME )):
554+ os .remove (getProfilerOutputPath (arch , BENCHMARKING_METRICS_FILE_NAME ))
547555 config = cls .fromCommandLine (commandLine , arch , numCU )
548556 MIOpenDriverCommand = [MIOPENDRIVER , * commandLine , '-V' , '0' , '-t' , '1' ]
549557 print ("Running MIOpen Benchmark: " , ' ' .join (commandLine ))
@@ -724,7 +732,7 @@ def computeTFlops(self, ns):
724732
725733 def tableEntry (self , nanoSeconds ):
726734 # Future(kdrewnia): This can just be a dict literal on Python 3.7+
727- bankConflict = getBankConflict (BENCHMARKING_METRICS_FILE_NAME )
735+ bankConflict = getBankConflict (getProfilerOutputPath ( self . arch , BENCHMARKING_METRICS_FILE_NAME ) )
728736 result = OrderedDict ()
729737 values = [self .dataType , self .outDataType , self .chip , self .numCU , self .transA , self .transB , \
730738 self .g , self .m , self .k , self .n , self .perfConfig , bankConflict , self .computeTFlops (nanoSeconds )]
@@ -1378,7 +1386,8 @@ def benchmarkExternal(cls, commandLine, paths: Paths, arch, numCU):
13781386 raise ValueError ("rocblas-benchmark-driver not built" )
13791387 benchmarkArgs = config .generateMlirDriverCommandLine ("" )
13801388 # remove the result file generated by rocprof in previous benchmarking
1381- os .system ("rm -f " + BENCHMARKING_STATS_FILE_NAME )
1389+ if os .path .exists (getProfilerOutputPath (arch , BENCHMARKING_STATS_FILE_NAME )):
1390+ os .remove (getProfilerOutputPath (arch , BENCHMARKING_STATS_FILE_NAME ))
13821391 print (f"Running rocBLAS benchmark { config !r} " )
13831392 profilerCommand = [paths .mlir_paths .rocblas_benchmark_driver_path ] + \
13841393 benchmarkArgs .split ()
@@ -1410,7 +1419,8 @@ def benchmarkExternal(cls, commandLine, paths: Paths, arch, numCU):
14101419
14111420def runConfigWithMLIR (config : PerfConfiguration , paths : Paths , arch , rocmlir_gen_flags , debug = True ):
14121421 # remove the result file generated by rocprof in previous benchmarking
1413- os .system ("rm -f " + BENCHMARKING_STATS_FILE_NAME )
1422+ if os .path .exists (getProfilerOutputPath (arch , BENCHMARKING_STATS_FILE_NAME )):
1423+ os .remove (getProfilerOutputPath (arch , BENCHMARKING_STATS_FILE_NAME ))
14141424 commandLineOptions = config .generateMlirDriverCommandLine (rocmlir_gen_flags )
14151425 if debug :
14161426 print ("Running MLIR Benchmark: " , repr (config ))
@@ -1433,7 +1443,7 @@ def benchmarkMLIR(commandLine, confClass, paths: Paths, arch, numCU, tuningDb: M
14331443
14341444 runConfigWithMLIR (config , paths , arch , rocmlir_gen_flags )
14351445 # get nanoseconds from rocprof output.
1436- nanoSeconds = getNanoSeconds (BENCHMARKING_STATS_FILE_NAME )
1446+ nanoSeconds = getNanoSeconds (getProfilerOutputPath ( arch , BENCHMARKING_STATS_FILE_NAME ) )
14371447 return config .tableEntry (nanoSeconds )
14381448
14391449#Generate MLIR vs. MIOpen or rocBLAS performance results
@@ -1535,31 +1545,32 @@ def findRunCommand(filename):
15351545
15361546# Extract testVector and test function name from the test file
15371547def getFusionTestInfo (filename , paths : Paths ):
1548+ chip = getChip ()
15381549 testEntry = {}
15391550 rocmlirCommand , futName = findRunCommand (filename )
15401551 if not rocmlirCommand :
15411552 return testEntry
15421553 # rocmlir-gen -fut test -arch gfx90a --clone-harness
1543- rocmlirgenCommand = [paths .mlir_paths .rocmlir_gen_path , '-fut' , futName , '-arch' , getChip () , '--clone-harness' , filename ]
1554+ rocmlirgenCommand = [paths .mlir_paths .rocmlir_gen_path , '-fut' , futName , '-arch' , chip , '--clone-harness' , filename ]
15441555 p0 = subprocess .Popen (rocmlirgenCommand , stdout = subprocess .PIPE , stderr = subprocess .DEVNULL )
15451556 if "-migraphx-to-tosa" in rocmlirCommand :
15461557 rocmlirOptCommand = [paths .mlir_paths .rocmlir_opt_path , '-migraphx-to-tosa' ]
1547- rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , getChip () ]
1558+ rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , chip ]
15481559 # rocmlir-opt -migraphx-to-tosa ../mlir/test/fusion/resnet50-e2e/mixr-resnet-fusion-case-1.mlir
15491560 p1 = subprocess .Popen (rocmlirOptCommand , stdin = p0 .stdout , stdout = subprocess .PIPE , stderr = subprocess .DEVNULL )
15501561 # pipe to rocmlir-driver -host-pipeline highlevel -targets gfx90a
15511562 p2 = subprocess .Popen (rocmlirDriverCommand , stdin = p1 .stdout , stdout = subprocess .PIPE , stderr = subprocess .DEVNULL )
15521563 p1 .stdout .close ()
15531564 elif "migraphx" in rocmlirCommand :
15541565 rocmlirMigraphxCommand = [paths .mlir_paths .rocmlir_driver_path , '-kernel-pipeline' , 'migraphx' ]
1555- rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'migraphx,highlevel' , '-targets' , getChip () ]
1566+ rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'migraphx,highlevel' , '-targets' , chip ]
15561567 # rocmlir-driver -kernel-pipeline migraphx ../mlir/test/fusion/resnet50-e2e/mixr-resnet-fusion-case-1.mlir
15571568 p1 = subprocess .Popen (rocmlirMigraphxCommand , stdin = p0 .stdout , stdout = subprocess .PIPE , stderr = subprocess .DEVNULL )
15581569 # pipe to rocmlir-driver -host-pipeline highlevel -targets gfx90a
15591570 p2 = subprocess .Popen (rocmlirDriverCommand , stdin = p1 .stdout , stdout = subprocess .PIPE , stderr = subprocess .DEVNULL )
15601571 p1 .stdout .close ()
15611572 else :
1562- rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , getChip () ]
1573+ rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , chip ]
15631574 # rocmlir-driver -host-pipeline highlevel -targets gfx90a
15641575 p2 = subprocess .Popen (rocmlirDriverCommand , stdin = p0 .stdout , stdout = subprocess .PIPE , stderr = subprocess .DEVNULL )
15651576
@@ -1573,33 +1584,36 @@ def getFusionTestInfo(filename, paths: Paths):
15731584 return testEntry
15741585
15751586def runFusionKernel (filename , rocmlirGenArgs , paths : Paths ):
1576- os .system ("rm -f " + BENCHMARKING_STATS_FILE_NAME )
1587+ arch = getArch ()
1588+ chip = getChip ()
1589+ if os .path .exists (getProfilerOutputPath (arch , BENCHMARKING_STATS_FILE_NAME )):
1590+ os .remove (getProfilerOutputPath (arch , BENCHMARKING_STATS_FILE_NAME ))
15771591
15781592 rocmlirCommand , futName = findRunCommand (filename )
15791593
15801594 # rocmlir-gen -fut test -arch gfx90a --clone-harness
1581- rocmlirgenCommand = [paths .mlir_paths .rocmlir_gen_path , '-fut' , futName , '-arch' , getChip () , '--clone-harness' , filename ]
1595+ rocmlirgenCommand = [paths .mlir_paths .rocmlir_gen_path , '-fut' , futName , '-arch' , chip , '--clone-harness' , filename ]
15821596 commands = [rocmlirgenCommand ]
15831597 if "-migraphx-to-tosa" in rocmlirCommand :
15841598 rocmlirOptCommand = [paths .mlir_paths .rocmlir_opt_path , '-migraphx-to-tosa' , filename ]
15851599 commands .append (rocmlirOptCommand )
1586- rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , getChip () ]
1600+ rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , chip ]
15871601 commands .append (rocmlirDriverCommand )
15881602 elif "migraphx" in rocmlirCommand :
15891603 rocmlirMigraphxCommand = [paths .mlir_paths .rocmlir_driver_path , '-kernel-pipeline' , 'migraphx' ]
15901604 commands .append (rocmlirMigraphxCommand )
1591- rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'migraphx,highlevel' , '-targets' , getChip () ]
1605+ rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'migraphx,highlevel' , '-targets' , chip ]
15921606 commands .append (rocmlirDriverCommand )
15931607 else :
1594- rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , getChip () ]
1608+ rocmlirDriverCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'highlevel' , '-targets' , chip ]
15951609 commands .append (rocmlirDriverCommand )
15961610
15971611 rocmlirGenCommand = [paths .mlir_paths .rocmlir_gen_path ] + rocmlirGenArgs
15981612 commands .append (rocmlirGenCommand )
15991613 kernelPipelineCommand = [paths .mlir_paths .rocmlir_driver_path , '-host-pipeline' , 'mhal,runner' , '-kernel-pipeline' , 'full' ]
16001614 commands .append (kernelPipelineCommand )
16011615 mlir_cpu_runner_args = [f'--shared-libs={ paths .mlir_paths .libmlir_rocm_runtime_path } ,{ paths .mlir_paths .libconv_validation_wrappers_path } ,{ paths .mlir_paths .libmlir_runtime_utils_path } ,{ paths .mlir_paths .libmlir_c_runner_utils_path } ' , '--entry-point-result=void' ]
1602- profilerCommand = [ROCPROF ] + getMetricArgsForRocprof (getChip () ) + ['--kernel-trace' , '--stats' , '-o' , BENCHMARKING_RESULT_FILE_NAME ] + ['--' , paths .mlir_paths .cpu_runner_path ] + mlir_cpu_runner_args
1616+ profilerCommand = [ROCPROF ] + getMetricArgsForRocprof (chip ) + ['--kernel-trace' , '--stats' , '-o' , BENCHMARKING_RESULT_FILE_NAME ] + ['--' , paths .mlir_paths .cpu_runner_path ] + mlir_cpu_runner_args
16031617 commands .append (profilerCommand )
16041618 runPipeline (commands )
16051619
@@ -1668,7 +1682,7 @@ def benchmarkFusionKernels(test_dir, paths: Paths, arch, numCU, tuningDb: MaybeT
16681682 rocmlirGenArgs = ['-ph' , '-fut=' + futName + '_wrapper' , '--perf_config=' + bestPerf , '-' ]
16691683 runFusionKernel (filename , rocmlirGenArgs , paths )
16701684 # Get nanoseconds of fusion test
1671- nanoSeconds = getNanoSeconds (BENCHMARKING_STATS_FILE_NAME )
1685+ nanoSeconds = getNanoSeconds (getProfilerOutputPath ( arch , BENCHMARKING_STATS_FILE_NAME ) )
16721686 oneEntry = config .tableEntry (nanoSeconds )
16731687 # Keep the best performance
16741688 if testVector in perfResults and oneEntry ['TFlops' ] <= perfResults [testVector ]['TFlops' ]:
@@ -1677,7 +1691,7 @@ def benchmarkFusionKernels(test_dir, paths: Paths, arch, numCU, tuningDb: MaybeT
16771691 # Run gemm or conv op with the same configuration
16781692 runConfigWithMLIR (config , paths , arch , '' )
16791693 # Get nanoseconds of gemm/conv
1680- nanoSeconds = getNanoSeconds (BENCHMARKING_STATS_FILE_NAME )
1694+ nanoSeconds = getNanoSeconds (getProfilerOutputPath ( arch , BENCHMARKING_STATS_FILE_NAME ) )
16811695 oneEntry ['MLIR TFlops' ] = config .computeTFlops (nanoSeconds )
16821696 oneEntry ['Fusion/MLIR' ] = oneEntry ['TFlops' ]/ oneEntry ['MLIR TFlops' ]
16831697 oneEntry ['FileName' ] = filename
0 commit comments