54
54
import argparse
55
55
import zipfile
56
56
from dataclasses import dataclass
57
+ from enum import Enum
57
58
from os import PathLike
58
59
from os .path import exists , basename
59
60
from pathlib import Path
@@ -302,9 +303,9 @@ def __init__(self, vm: NativeImageVM, bm_suite: BenchmarkSuite | NativeImageBenc
302
303
for option in self .extra_agentlib_options :
303
304
if option .startswith ('config-output-dir' ):
304
305
mx .abort ("config-output-dir must not be set in the extra_agentlib_options." )
305
- # Do not strip the run arguments if safepoint-sampler or pgo_sampler_only configuration is active
306
+ # Do not strip the run arguments if safepoint-sampler configuration is active or we want pgo samples (either from instrumentation or perf)
306
307
self .extra_profile_run_args = bm_suite .extra_profile_run_arg (self .benchmark_name , args , list (image_run_args ),
307
- not (vm .safepoint_sampler or vm .pgo_sampler_only ))
308
+ not (vm .safepoint_sampler or vm .pgo_sampler_only or vm . pgo_use_perf ))
308
309
self .extra_agent_profile_run_args = bm_suite .extra_agent_profile_run_arg (self .benchmark_name , args ,
309
310
list (image_run_args ))
310
311
self .params = ['extra-image-build-argument' , 'extra-jvm-arg' , 'extra-run-arg' , 'extra-agent-run-arg' ,
@@ -320,10 +321,16 @@ def __init__(self, vm: NativeImageVM, bm_suite: BenchmarkSuite | NativeImageBenc
320
321
self .output_dir : Path = output_dir
321
322
self .final_image_name = self .executable_name + '-' + vm .config_name ()
322
323
self .profile_path : Path = self .output_dir / f"{ self .executable_name } .iprof"
324
+ self .source_mappings_path : Path = self .output_dir / f"{ self .executable_name } .sourceMappings.json"
325
+ self .perf_script_path : Path = self .output_dir / f"{ self .executable_name } .perf.script.out"
326
+ self .perf_data_path : Path = self .output_dir / f"{ self .executable_name } .perf.data"
323
327
self .config_dir : Path = self .output_dir / "config"
324
328
self .log_dir : Path = self .output_dir
325
329
self .ml_log_dump_path : Path = self .output_dir / f"{ self .executable_name } .ml.log.csv"
326
- base_image_build_args = ['--no-fallback' , '-g' ]
330
+ base_image_build_args = ['--no-fallback' ]
331
+ if not vm .pgo_use_perf :
332
+ # Can only have debug info when not using perf, [GR-66850]
333
+ base_image_build_args .append ('-g' )
327
334
base_image_build_args += ['-H:+VerifyGraalGraphs' , '-H:+VerifyPhases' ,
328
335
'--diagnostics-mode' ] if vm .is_gate else []
329
336
base_image_build_args += ['-H:+ReportExceptionStackTraces' ]
@@ -712,9 +719,13 @@ class NativeImageVM(GraalVm):
712
719
def __init__ (self , name , config_name , extra_java_args = None , extra_launcher_args = None ):
713
720
super ().__init__ (name , config_name , extra_java_args , extra_launcher_args )
714
721
self .vm_args = None
722
+ # When this is set, run the instrumentation-image and instrumentation-run stages.
723
+ # Does not necessarily do instrumentation.
715
724
self .pgo_instrumentation = False
716
725
self .pgo_exclude_conditional = False
717
726
self .pgo_sampler_only = False
727
+ self .pgo_use_perf = False
728
+ self .pgo_perf_invoke_profile_collection_strategy : Optional [PerfInvokeProfileCollectionStrategy ] = None
718
729
self .is_gate = False
719
730
self .is_quickbuild = False
720
731
self .layered = False
@@ -803,6 +814,10 @@ def config_name(self):
803
814
and self .force_profile_inference is False \
804
815
and self .profile_inference_feature_extraction is False :
805
816
config += ["pgo" ]
817
+ if self .pgo_use_perf :
818
+ config += ["perf-sampler" ]
819
+ if self .pgo_perf_invoke_profile_collection_strategy is not None :
820
+ config += [str (self .pgo_perf_invoke_profile_collection_strategy )]
806
821
if self .analysis_context_sensitivity is not None :
807
822
sensitivity = self .analysis_context_sensitivity
808
823
if sensitivity .startswith ("_" ):
@@ -848,7 +863,7 @@ def _configure_from_name(self, config_name):
848
863
# Note: the order of entries here must match the order of statements in NativeImageVM.config_name()
849
864
rule = r'^(?P<native_architecture>native-architecture-)?(?P<string_inlining>string-inlining-)?(?P<otw>otw-)?(?P<compacting_gc>compacting-gc-)?(?P<preserve_all>preserve-all-)?(?P<preserve_classpath>preserve-classpath-)?' \
850
865
r'(?P<future_defaults_all>future-defaults-all-)?(?P<gate>gate-)?(?P<upx>upx-)?(?P<quickbuild>quickbuild-)?(?P<layered>layered-)?(?P<graalos>graalos-)?(?P<gc>g1gc-)?' \
851
- r'(?P<llvm>llvm-)?(?P<pgo>pgo-|pgo-sampler-)?(?P<inliner>inline-)?' \
866
+ r'(?P<llvm>llvm-)?(?P<pgo>pgo-|pgo-sampler-|pgo-perf-sampler-invoke-multiple-|pgo-perf-sampler-invoke-|pgo-perf-sampler- )?(?P<inliner>inline-)?' \
852
867
r'(?P<analysis_context_sensitivity>insens-|allocsens-|1obj-|2obj1h-|3obj2h-|4obj3h-)?(?P<jdk_profiles>jdk-profiles-collect-|adopted-jdk-pgo-)?' \
853
868
r'(?P<profile_inference>profile-inference-feature-extraction-|profile-inference-call-count-|profile-inference-pgo-|profile-inference-debug-)?(?P<sampler>safepoint-sampler-|async-sampler-)?(?P<optimization_level>O0-|O1-|O2-|O3-|Os-)?(default-)?(?P<edition>ce-|ee-)?$'
854
869
@@ -926,6 +941,17 @@ def _configure_from_name(self, config_name):
926
941
elif pgo_mode == "pgo-sampler" :
927
942
self .pgo_instrumentation = True
928
943
self .pgo_sampler_only = True
944
+ elif pgo_mode == "pgo-perf-sampler" :
945
+ self .pgo_instrumentation = True
946
+ self .pgo_use_perf = True
947
+ elif pgo_mode == "pgo-perf-sampler-invoke" :
948
+ self .pgo_instrumentation = True
949
+ self .pgo_use_perf = True
950
+ self .pgo_perf_invoke_profile_collection_strategy = PerfInvokeProfileCollectionStrategy .ALL
951
+ elif pgo_mode == "pgo-perf-sampler-invoke-multiple" :
952
+ self .pgo_instrumentation = True
953
+ self .pgo_use_perf = True
954
+ self .pgo_perf_invoke_profile_collection_strategy = PerfInvokeProfileCollectionStrategy .MULTIPLE_CALLEES
929
955
else :
930
956
mx .abort (f"Unknown pgo mode: { pgo_mode } " )
931
957
@@ -1330,8 +1356,11 @@ def image_build_statistics_rules(self, benchmarks):
1330
1356
return rules
1331
1357
1332
1358
def image_build_timers_rules (self , benchmarks ):
1333
- measured_phases = ['total' , 'setup' , 'classlist' , 'analysis' , 'universe' , 'compile' , 'layout' , 'dbginfo' ,
1359
+ measured_phases = ['total' , 'setup' , 'classlist' , 'analysis' , 'universe' , 'compile' , 'layout' ,
1334
1360
'image' , 'write' ]
1361
+ if not self .pgo_use_perf :
1362
+ # No debug info with perf, [GR-66850]
1363
+ measured_phases .append ('dbginfo' )
1335
1364
rules = []
1336
1365
for i in range (0 , len (measured_phases )):
1337
1366
phase = measured_phases [i ]
@@ -1434,8 +1463,12 @@ def run_stage_agent(self):
1434
1463
1435
1464
def run_stage_instrument_image (self ):
1436
1465
executable_name_args = ['-o' , str (self .config .instrumented_image_path )]
1437
- instrument_args = ['--pgo-sampling' ] if self .pgo_sampler_only else ['--pgo-instrument' ]
1438
- instrument_args += [f"-R:ProfilesDumpFile={ self .config .profile_path } " ]
1466
+ instrument_args = []
1467
+ if self .pgo_use_perf :
1468
+ instrument_args += svm_experimental_options ([f'-H:PGOPerfSourceMappings={ self .config .source_mappings_path } ' ])
1469
+ else :
1470
+ instrument_args += ['--pgo-sampling' if self .pgo_sampler_only else '--pgo-instrument' , f"-R:ProfilesDumpFile={ self .config .profile_path } " ]
1471
+
1439
1472
if self .jdk_profiles_collect :
1440
1473
instrument_args += svm_experimental_options (['-H:+AOTPriorityInline' , '-H:-SamplingCollect' ,
1441
1474
f'-H:ProfilingPackagePrefixes={ self .generate_profiling_package_prefixes ()} ' ])
@@ -1484,13 +1517,48 @@ def _ensureSamplesAreInProfile(self, profile_path: PathLike):
1484
1517
assert sample ["records" ][
1485
1518
0 ] > 0 , f"Sampling profiles seem to have a 0 in records in file { profile_path } "
1486
1519
1520
+ def _collect_perf_results_into_iprof (self ):
1521
+ with open (self .config .perf_script_path , 'w' ) as outfile :
1522
+ mx .log (f"Started perf script at { self .get_stage_runner ().get_timestamp ()} " )
1523
+ exit_code = mx .run (['perf' , 'script' , f'--input={ self .config .perf_data_path } ' , '--max-stack=2048' ], out = outfile )
1524
+ if exit_code == 0 :
1525
+ mx .log (f"Finished perf script at { self .get_stage_runner ().get_timestamp ()} " )
1526
+ mx .log (f"Perf compressed data file size: { os .path .getsize (self .config .perf_data_path )} bytes" )
1527
+ mx .log (f"Perf script file size: { os .path .getsize (self .config .perf_script_path )} bytes" )
1528
+ else :
1529
+ mx .abort (f"Perf script failed with exit code: { exit_code } " )
1530
+ mx .log (f"Started generating iprof at { self .get_stage_runner ().get_timestamp ()} " )
1531
+ nic_command = [os .path .join (self .home (), 'bin' , 'native-image-configure' ), 'generate-iprof-from-perf' , f'--perf={ self .config .perf_script_path } ' , f'--source-mappings={ self .config .source_mappings_path } ' , f'--output-file={ self .config .profile_path } ' ]
1532
+ if self .pgo_perf_invoke_profile_collection_strategy == PerfInvokeProfileCollectionStrategy .ALL :
1533
+ nic_command += ["--enable-experimental-option=SampledVirtualInvokeProfilesAll" ]
1534
+ elif self .pgo_perf_invoke_profile_collection_strategy == PerfInvokeProfileCollectionStrategy .MULTIPLE_CALLEES :
1535
+ nic_command += ["--enable-experimental-option=SampledVirtualInvokeProfilesMultipleCallees" ]
1536
+
1537
+ mx .run (nic_command )
1538
+ mx .log (f"Finished generating iprof at { self .get_stage_runner ().get_timestamp ()} " )
1539
+ os .remove (self .config .perf_script_path )
1540
+ os .remove (self .config .perf_data_path )
1541
+ os .remove (self .config .source_mappings_path )
1542
+
1487
1543
def run_stage_instrument_run (self ):
1488
1544
image_run_cmd = [str (self .config .instrumented_image_path )]
1489
1545
image_run_cmd += self .config .extra_jvm_args
1490
1546
image_run_cmd += self .config .extra_profile_run_args
1547
+ if self .pgo_use_perf :
1548
+ image_run_cmd = ['perf' , 'record' , '-o' , f'{ self .config .perf_data_path } ' , '--call-graph' , 'fp,2048' , '--freq=999' ] + image_run_cmd
1549
+
1491
1550
with self .get_stage_runner () as s :
1492
- exit_code = s .execute_command (self , image_run_cmd )
1551
+ if self .pgo_use_perf :
1552
+ mx .log (f"Started perf record at { self .get_stage_runner ().get_timestamp ()} " )
1553
+ exit_code = s .execute_command (self , image_run_cmd )
1554
+ mx .log (f"Finished perf record at { self .get_stage_runner ().get_timestamp ()} " )
1555
+ else :
1556
+ exit_code = s .execute_command (self , image_run_cmd )
1557
+
1493
1558
if exit_code == 0 :
1559
+ if self .pgo_use_perf :
1560
+ self ._collect_perf_results_into_iprof ()
1561
+
1494
1562
if not self .config .profile_path .exists ():
1495
1563
# The shutdown hook does not trigger for certain apps (GR-60456)
1496
1564
mx .abort (
@@ -1543,6 +1611,13 @@ def get_layered_build_args(self) -> List[str]:
1543
1611
def run_stage_image (self ):
1544
1612
executable_name_args = ['-o' , self .config .final_image_name ]
1545
1613
pgo_args = [f"--pgo={ self .config .profile_path } " ]
1614
+ if self .pgo_use_perf :
1615
+ # -g is already set in base_image_build_args if we're not using perf. When using perf, if debug symbols
1616
+ # are present they will interfere with sample decoding using source mappings.
1617
+ # We still set -g for the optimized build to stay consistent with the other configs.
1618
+ # [GR-66850] would allow enabling -g during instrument-image even with perf.
1619
+ executable_name_args = ['-g' ] + executable_name_args
1620
+ pgo_args += svm_experimental_options (['-H:+PGOPrintProfileQuality' , '-H:+PGOIgnoreVersionCheck' ])
1546
1621
if self .adopted_jdk_pgo :
1547
1622
# choose appropriate profiles
1548
1623
jdk_version = mx_sdk_vm .get_jdk_version_for_profiles ()
@@ -3636,6 +3711,17 @@ def _strip_arg_with_number_gen(_strip_arg, _args):
3636
3711
result = _strip_arg_with_number_gen (strip_arg , result )
3637
3712
return list (result )
3638
3713
3714
+ class PerfInvokeProfileCollectionStrategy (Enum ):
3715
+ """
3716
+ The strategy for extracting virtual invoke method profiles from perf sampling data.
3717
+ ALL: Generate a profile for each callsite.
3718
+ MULTIPLE_CALLEES: Only generate profiles for callsites with at least 2 different sampled targets.
3719
+ """
3720
+ ALL = "invoke"
3721
+ MULTIPLE_CALLEES = "invoke-multiple"
3722
+
3723
+ def __str__ (self ):
3724
+ return self .value
3639
3725
3640
3726
class StagesInfo :
3641
3727
"""
0 commit comments