Skip to content

Commit dbbab51

Browse files
authored
Fix output file path for rocprofv3 (#1883)
1 parent 3a275a4 commit dbbab51

File tree

2 files changed

+35
-21
lines changed

2 files changed

+35
-21
lines changed

mlir/utils/performance/perfRunner.py

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
ROCPROF = '/opt/rocm/bin/rocprofv3'
2828
MIOPENDRIVER = '/opt/rocm/bin/MIOpenDriver'
2929
BENCHMARKING_RESULT_FILE_NAME = 'results'
30-
BENCHMARKING_STATS_FILE_NAME = 'pmc_1/results_kernel_stats.csv'
31-
BENCHMARKING_METRICS_FILE_NAME = 'pmc_1/results_counter_collection.csv'
30+
BENCHMARKING_STATS_FILE_NAME = 'results_kernel_stats.csv'
31+
BENCHMARKING_METRICS_FILE_NAME = 'results_counter_collection.csv'
3232
ROCMLIR_INPUT_METRICS_FILE_NAME = 'rocmlir_metrics.txt'
3333
DIRECTIONS = ['-F 1', '-F 2', '-F 4']
3434
DATA_TYPES = ['conv', 'convfp16', 'convbfp16', 'convfp8', 'convint8']
@@ -158,6 +158,13 @@ def getNanoSeconds(fileName):
158158
csv_file.close()
159159
return result
160160

161+
def getProfilerOutputPath(arch, baseOutPath):
162+
chip = GFX_CHIP_RE.search(arch).group(0)
163+
# TODO (gfx950): check if gfx950 need this
164+
if(chip not in ["gfx942"]):
165+
return os.path.join('pmc_1', baseOutPath)
166+
return baseOutPath
167+
161168
def getMetricArgsForRocprof(arch):
162169
chip = GFX_CHIP_RE.search(arch).group(0)
163170
current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -346,7 +353,7 @@ def computeTFlops(self, ns):
346353

347354
def tableEntry(self, nanoSeconds):
348355
# Future(kdrewnia): This can just be a dict literal on Python 3.7+
349-
bankConflict = getBankConflict(BENCHMARKING_METRICS_FILE_NAME)
356+
bankConflict = getBankConflict(getProfilerOutputPath(self.arch, BENCHMARKING_METRICS_FILE_NAME))
350357
result = OrderedDict()
351358
values = [self.direction, self.dataType, self.chip, self.numCU, self.filterLayout, self.inputLayout, self.outputLayout,
352359
self.n, self.c, self.hi, self.wi, self.k, self.y, self.x, self.dilationH, self.dilationW,
@@ -543,7 +550,8 @@ def __init__(self, dtype: str, direction: str, filterLayout: str, inputLayout:st
543550

544551
@classmethod
545552
def benchmarkExternal(cls, commandLine, paths: Paths, arch, numCU):
546-
os.system("rm -f "+BENCHMARKING_METRICS_FILE_NAME)
553+
if os.path.exists(getProfilerOutputPath(arch, BENCHMARKING_METRICS_FILE_NAME)):
554+
os.remove(getProfilerOutputPath(arch, BENCHMARKING_METRICS_FILE_NAME))
547555
config = cls.fromCommandLine(commandLine, arch, numCU)
548556
MIOpenDriverCommand = [MIOPENDRIVER, *commandLine, '-V', '0', '-t', '1']
549557
print("Running MIOpen Benchmark: ", ' '.join(commandLine))
@@ -724,7 +732,7 @@ def computeTFlops(self, ns):
724732

725733
def tableEntry(self, nanoSeconds):
726734
# Future(kdrewnia): This can just be a dict literal on Python 3.7+
727-
bankConflict = getBankConflict(BENCHMARKING_METRICS_FILE_NAME)
735+
bankConflict = getBankConflict(getProfilerOutputPath(self.arch, BENCHMARKING_METRICS_FILE_NAME))
728736
result = OrderedDict()
729737
values = [self.dataType, self.outDataType, self.chip, self.numCU, self.transA, self.transB, \
730738
self.g, self.m, self.k, self.n, self.perfConfig, bankConflict, self.computeTFlops(nanoSeconds)]
@@ -1378,7 +1386,8 @@ def benchmarkExternal(cls, commandLine, paths: Paths, arch, numCU):
13781386
raise ValueError("rocblas-benchmark-driver not built")
13791387
benchmarkArgs = config.generateMlirDriverCommandLine("")
13801388
# remove the result file generated by rocprof in previous benchmarking
1381-
os.system("rm -f "+BENCHMARKING_STATS_FILE_NAME)
1389+
if os.path.exists(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME)):
1390+
os.remove(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME))
13821391
print(f"Running rocBLAS benchmark {config!r}")
13831392
profilerCommand = [paths.mlir_paths.rocblas_benchmark_driver_path] + \
13841393
benchmarkArgs.split()
@@ -1410,7 +1419,8 @@ def benchmarkExternal(cls, commandLine, paths: Paths, arch, numCU):
14101419

14111420
def runConfigWithMLIR(config: PerfConfiguration, paths: Paths, arch, rocmlir_gen_flags, debug=True):
14121421
# remove the result file generated by rocprof in previous benchmarking
1413-
os.system("rm -f "+BENCHMARKING_STATS_FILE_NAME)
1422+
if os.path.exists(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME)):
1423+
os.remove(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME))
14141424
commandLineOptions = config.generateMlirDriverCommandLine(rocmlir_gen_flags)
14151425
if debug:
14161426
print("Running MLIR Benchmark: ", repr(config))
@@ -1433,7 +1443,7 @@ def benchmarkMLIR(commandLine, confClass, paths: Paths, arch, numCU, tuningDb: M
14331443

14341444
runConfigWithMLIR(config, paths, arch, rocmlir_gen_flags)
14351445
# get nanoseconds from rocprof output.
1436-
nanoSeconds = getNanoSeconds(BENCHMARKING_STATS_FILE_NAME)
1446+
nanoSeconds = getNanoSeconds(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME))
14371447
return config.tableEntry(nanoSeconds)
14381448

14391449
#Generate MLIR vs. MIOpen or rocBLAS performance results
@@ -1535,31 +1545,32 @@ def findRunCommand(filename):
15351545

15361546
# Extract testVector and test function name from the test file
15371547
def getFusionTestInfo(filename, paths: Paths):
1548+
chip = getChip()
15381549
testEntry = {}
15391550
rocmlirCommand, futName = findRunCommand(filename)
15401551
if not rocmlirCommand:
15411552
return testEntry
15421553
# rocmlir-gen -fut test -arch gfx90a --clone-harness
1543-
rocmlirgenCommand = [paths.mlir_paths.rocmlir_gen_path, '-fut', futName, '-arch', getChip(), '--clone-harness', filename]
1554+
rocmlirgenCommand = [paths.mlir_paths.rocmlir_gen_path, '-fut', futName, '-arch', chip, '--clone-harness', filename]
15441555
p0 = subprocess.Popen(rocmlirgenCommand, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
15451556
if "-migraphx-to-tosa" in rocmlirCommand:
15461557
rocmlirOptCommand = [paths.mlir_paths.rocmlir_opt_path, '-migraphx-to-tosa']
1547-
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', getChip()]
1558+
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', chip]
15481559
# rocmlir-opt -migraphx-to-tosa ../mlir/test/fusion/resnet50-e2e/mixr-resnet-fusion-case-1.mlir
15491560
p1 = subprocess.Popen(rocmlirOptCommand, stdin=p0.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
15501561
# pipe to rocmlir-driver -host-pipeline highlevel -targets gfx90a
15511562
p2 = subprocess.Popen(rocmlirDriverCommand, stdin=p1.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
15521563
p1.stdout.close()
15531564
elif "migraphx" in rocmlirCommand:
15541565
rocmlirMigraphxCommand = [paths.mlir_paths.rocmlir_driver_path, '-kernel-pipeline', 'migraphx']
1555-
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'migraphx,highlevel', '-targets', getChip()]
1566+
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'migraphx,highlevel', '-targets', chip]
15561567
# rocmlir-driver -kernel-pipeline migraphx ../mlir/test/fusion/resnet50-e2e/mixr-resnet-fusion-case-1.mlir
15571568
p1 = subprocess.Popen(rocmlirMigraphxCommand, stdin=p0.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
15581569
# pipe to rocmlir-driver -host-pipeline highlevel -targets gfx90a
15591570
p2 = subprocess.Popen(rocmlirDriverCommand, stdin=p1.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
15601571
p1.stdout.close()
15611572
else:
1562-
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', getChip()]
1573+
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', chip]
15631574
# rocmlir-driver -host-pipeline highlevel -targets gfx90a
15641575
p2 = subprocess.Popen(rocmlirDriverCommand, stdin=p0.stdout, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
15651576

@@ -1573,33 +1584,36 @@ def getFusionTestInfo(filename, paths: Paths):
15731584
return testEntry
15741585

15751586
def runFusionKernel(filename, rocmlirGenArgs, paths: Paths):
1576-
os.system("rm -f "+BENCHMARKING_STATS_FILE_NAME)
1587+
arch = getArch()
1588+
chip = getChip()
1589+
if os.path.exists(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME)):
1590+
os.remove(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME))
15771591

15781592
rocmlirCommand, futName = findRunCommand(filename)
15791593

15801594
# rocmlir-gen -fut test -arch gfx90a --clone-harness
1581-
rocmlirgenCommand = [paths.mlir_paths.rocmlir_gen_path, '-fut', futName, '-arch', getChip(), '--clone-harness', filename]
1595+
rocmlirgenCommand = [paths.mlir_paths.rocmlir_gen_path, '-fut', futName, '-arch', chip, '--clone-harness', filename]
15821596
commands = [rocmlirgenCommand]
15831597
if "-migraphx-to-tosa" in rocmlirCommand:
15841598
rocmlirOptCommand = [paths.mlir_paths.rocmlir_opt_path, '-migraphx-to-tosa', filename]
15851599
commands.append(rocmlirOptCommand)
1586-
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', getChip()]
1600+
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', chip]
15871601
commands.append(rocmlirDriverCommand)
15881602
elif "migraphx" in rocmlirCommand:
15891603
rocmlirMigraphxCommand = [paths.mlir_paths.rocmlir_driver_path, '-kernel-pipeline', 'migraphx']
15901604
commands.append(rocmlirMigraphxCommand)
1591-
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'migraphx,highlevel', '-targets', getChip()]
1605+
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'migraphx,highlevel', '-targets', chip]
15921606
commands.append(rocmlirDriverCommand)
15931607
else:
1594-
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', getChip()]
1608+
rocmlirDriverCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'highlevel', '-targets', chip]
15951609
commands.append(rocmlirDriverCommand)
15961610

15971611
rocmlirGenCommand = [paths.mlir_paths.rocmlir_gen_path] + rocmlirGenArgs
15981612
commands.append(rocmlirGenCommand)
15991613
kernelPipelineCommand = [paths.mlir_paths.rocmlir_driver_path, '-host-pipeline', 'mhal,runner', '-kernel-pipeline', 'full']
16001614
commands.append(kernelPipelineCommand)
16011615
mlir_cpu_runner_args = [f'--shared-libs={paths.mlir_paths.libmlir_rocm_runtime_path},{paths.mlir_paths.libconv_validation_wrappers_path},{paths.mlir_paths.libmlir_runtime_utils_path},{paths.mlir_paths.libmlir_c_runner_utils_path}', '--entry-point-result=void']
1602-
profilerCommand = [ROCPROF] + getMetricArgsForRocprof(getChip()) + ['--kernel-trace', '--stats', '-o', BENCHMARKING_RESULT_FILE_NAME] + ['--', paths.mlir_paths.cpu_runner_path] + mlir_cpu_runner_args
1616+
profilerCommand = [ROCPROF] + getMetricArgsForRocprof(chip) + ['--kernel-trace', '--stats', '-o', BENCHMARKING_RESULT_FILE_NAME] + ['--', paths.mlir_paths.cpu_runner_path] + mlir_cpu_runner_args
16031617
commands.append(profilerCommand)
16041618
runPipeline(commands)
16051619

@@ -1668,7 +1682,7 @@ def benchmarkFusionKernels(test_dir, paths: Paths, arch, numCU, tuningDb: MaybeT
16681682
rocmlirGenArgs = ['-ph', '-fut='+futName+'_wrapper', '--perf_config='+bestPerf, '-']
16691683
runFusionKernel(filename, rocmlirGenArgs, paths)
16701684
# Get nanoseconds of fusion test
1671-
nanoSeconds = getNanoSeconds(BENCHMARKING_STATS_FILE_NAME)
1685+
nanoSeconds = getNanoSeconds(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME))
16721686
oneEntry = config.tableEntry(nanoSeconds)
16731687
# Keep the best performance
16741688
if testVector in perfResults and oneEntry['TFlops'] <= perfResults[testVector]['TFlops']:
@@ -1677,7 +1691,7 @@ def benchmarkFusionKernels(test_dir, paths: Paths, arch, numCU, tuningDb: MaybeT
16771691
# Run gemm or conv op with the same configuration
16781692
runConfigWithMLIR(config, paths, arch, '')
16791693
# Get nanoseconds of gemm/conv
1680-
nanoSeconds = getNanoSeconds(BENCHMARKING_STATS_FILE_NAME)
1694+
nanoSeconds = getNanoSeconds(getProfilerOutputPath(arch, BENCHMARKING_STATS_FILE_NAME))
16811695
oneEntry['MLIR TFlops'] = config.computeTFlops(nanoSeconds)
16821696
oneEntry['Fusion/MLIR'] = oneEntry['TFlops']/oneEntry['MLIR TFlops']
16831697
oneEntry['FileName'] = filename

mlir/utils/performance/tuningRunner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def verifyKernelWithPerfConfig(perfConfig, config, paths: Paths, options: Option
8787
p3.kill()
8888
outs, errs = p3.communicate()
8989
return np.nan
90-
nanoSeconds = perfRunner.getNanoSeconds(perfRunner.BENCHMARKING_STATS_FILE_NAME)
90+
nanoSeconds = perfRunner.getNanoSeconds(perfRunner.getProfilerOutputPath(options.arch, perfRunner.BENCHMARKING_STATS_FILE_NAME))
9191
finally:
9292
os.chdir(prevdir)
9393
return nanoSeconds

0 commit comments

Comments
 (0)