[GR-61337] Add mx jvm configs for profiling using perf during the instrumentation-run stage

David Hua · David Hua · commit 4e5a87c3060e · 2025-07-18T05:59:03.000Z
PullRequest: graal/21332
diff --git a/sdk/mx.sdk/mx_sdk_benchmark.py b/sdk/mx.sdk/mx_sdk_benchmark.py
@@ -54,6 +54,7 @@
 import argparse
 import zipfile
 from dataclasses import dataclass
+from enum import Enum
 from os import PathLike
 from os.path import exists, basename
 from pathlib import Path
@@ -302,9 +303,9 @@ def __init__(self, vm: NativeImageVM, bm_suite: BenchmarkSuite | NativeImageBenc
         for option in self.extra_agentlib_options:
             if option.startswith('config-output-dir'):
                 mx.abort("config-output-dir must not be set in the extra_agentlib_options.")
-        # Do not strip the run arguments if safepoint-sampler or pgo_sampler_only configuration is active
+        # Do not strip the run arguments if safepoint-sampler configuration is active or we want pgo samples (either from instrumentation or perf)
         self.extra_profile_run_args = bm_suite.extra_profile_run_arg(self.benchmark_name, args, list(image_run_args),
-                                                                     not (vm.safepoint_sampler or vm.pgo_sampler_only))
+                                                                     not (vm.safepoint_sampler or vm.pgo_sampler_only or vm.pgo_use_perf))
         self.extra_agent_profile_run_args = bm_suite.extra_agent_profile_run_arg(self.benchmark_name, args,
                                                                                  list(image_run_args))
         self.params = ['extra-image-build-argument', 'extra-jvm-arg', 'extra-run-arg', 'extra-agent-run-arg',
@@ -320,10 +321,16 @@ def __init__(self, vm: NativeImageVM, bm_suite: BenchmarkSuite | NativeImageBenc
         self.output_dir: Path = output_dir
         self.final_image_name = self.executable_name + '-' + vm.config_name()
         self.profile_path: Path = self.output_dir / f"{self.executable_name}.iprof"
+        self.source_mappings_path: Path = self.output_dir / f"{self.executable_name}.sourceMappings.json"
+        self.perf_script_path: Path = self.output_dir / f"{self.executable_name}.perf.script.out"
+        self.perf_data_path: Path = self.output_dir / f"{self.executable_name}.perf.data"
         self.config_dir: Path = self.output_dir / "config"
         self.log_dir: Path = self.output_dir
         self.ml_log_dump_path: Path = self.output_dir / f"{self.executable_name}.ml.log.csv"
-        base_image_build_args = ['--no-fallback', '-g']
+        base_image_build_args = ['--no-fallback']
+        if not vm.pgo_use_perf:
+            # Can only have debug info when not using perf, [GR-66850]
+            base_image_build_args.append('-g')
         base_image_build_args += ['-H:+VerifyGraalGraphs', '-H:+VerifyPhases',
                                   '--diagnostics-mode'] if vm.is_gate else []
         base_image_build_args += ['-H:+ReportExceptionStackTraces']
@@ -712,9 +719,13 @@ class NativeImageVM(GraalVm):
     def __init__(self, name, config_name, extra_java_args=None, extra_launcher_args=None):
         super().__init__(name, config_name, extra_java_args, extra_launcher_args)
         self.vm_args = None
+        # When this is set, run the instrumentation-image and instrumentation-run stages.
+        # Does not necessarily do instrumentation.
         self.pgo_instrumentation = False
         self.pgo_exclude_conditional = False
         self.pgo_sampler_only = False
+        self.pgo_use_perf = False
+        self.pgo_perf_invoke_profile_collection_strategy: Optional[PerfInvokeProfileCollectionStrategy] = None
         self.is_gate = False
         self.is_quickbuild = False
         self.layered = False
@@ -803,6 +814,10 @@ def config_name(self):
                 and self.force_profile_inference is False \
                 and self.profile_inference_feature_extraction is False:
             config += ["pgo"]
+        if self.pgo_use_perf:
+            config += ["perf-sampler"]
+            if self.pgo_perf_invoke_profile_collection_strategy is not None:
+                config += [str(self.pgo_perf_invoke_profile_collection_strategy)]
         if self.analysis_context_sensitivity is not None:
             sensitivity = self.analysis_context_sensitivity
             if sensitivity.startswith("_"):
@@ -848,7 +863,7 @@ def _configure_from_name(self, config_name):
         # Note: the order of entries here must match the order of statements in NativeImageVM.config_name()
         rule = r'^(?P<native_architecture>native-architecture-)?(?P<string_inlining>string-inlining-)?(?P<otw>otw-)?(?P<compacting_gc>compacting-gc-)?(?P<preserve_all>preserve-all-)?(?P<preserve_classpath>preserve-classpath-)?' \
                r'(?P<future_defaults_all>future-defaults-all-)?(?P<gate>gate-)?(?P<upx>upx-)?(?P<quickbuild>quickbuild-)?(?P<layered>layered-)?(?P<graalos>graalos-)?(?P<gc>g1gc-)?' \
-               r'(?P<llvm>llvm-)?(?P<pgo>pgo-|pgo-sampler-)?(?P<inliner>inline-)?' \
+               r'(?P<llvm>llvm-)?(?P<pgo>pgo-|pgo-sampler-|pgo-perf-sampler-invoke-multiple-|pgo-perf-sampler-invoke-|pgo-perf-sampler-)?(?P<inliner>inline-)?' \
                r'(?P<analysis_context_sensitivity>insens-|allocsens-|1obj-|2obj1h-|3obj2h-|4obj3h-)?(?P<jdk_profiles>jdk-profiles-collect-|adopted-jdk-pgo-)?' \
                r'(?P<profile_inference>profile-inference-feature-extraction-|profile-inference-call-count-|profile-inference-pgo-|profile-inference-debug-)?(?P<sampler>safepoint-sampler-|async-sampler-)?(?P<optimization_level>O0-|O1-|O2-|O3-|Os-)?(default-)?(?P<edition>ce-|ee-)?$'
 
@@ -926,6 +941,17 @@ def _configure_from_name(self, config_name):
             elif pgo_mode == "pgo-sampler":
                 self.pgo_instrumentation = True
                 self.pgo_sampler_only = True
+            elif pgo_mode == "pgo-perf-sampler":
+                self.pgo_instrumentation = True
+                self.pgo_use_perf = True
+            elif pgo_mode == "pgo-perf-sampler-invoke":
+                self.pgo_instrumentation = True
+                self.pgo_use_perf = True
+                self.pgo_perf_invoke_profile_collection_strategy = PerfInvokeProfileCollectionStrategy.ALL
+            elif pgo_mode == "pgo-perf-sampler-invoke-multiple":
+                self.pgo_instrumentation = True
+                self.pgo_use_perf = True
+                self.pgo_perf_invoke_profile_collection_strategy = PerfInvokeProfileCollectionStrategy.MULTIPLE_CALLEES
             else:
                 mx.abort(f"Unknown pgo mode: {pgo_mode}")
 
@@ -1330,8 +1356,11 @@ def image_build_statistics_rules(self, benchmarks):
         return rules
 
     def image_build_timers_rules(self, benchmarks):
-        measured_phases = ['total', 'setup', 'classlist', 'analysis', 'universe', 'compile', 'layout', 'dbginfo',
+        measured_phases = ['total', 'setup', 'classlist', 'analysis', 'universe', 'compile', 'layout',
                            'image', 'write']
+        if not self.pgo_use_perf:
+            # No debug info with perf, [GR-66850]
+            measured_phases.append('dbginfo')
         rules = []
         for i in range(0, len(measured_phases)):
             phase = measured_phases[i]
@@ -1434,8 +1463,12 @@ def run_stage_agent(self):
 
     def run_stage_instrument_image(self):
         executable_name_args = ['-o', str(self.config.instrumented_image_path)]
-        instrument_args = ['--pgo-sampling'] if self.pgo_sampler_only else ['--pgo-instrument']
-        instrument_args += [f"-R:ProfilesDumpFile={self.config.profile_path}"]
+        instrument_args = []
+        if self.pgo_use_perf:
+            instrument_args += svm_experimental_options([f'-H:PGOPerfSourceMappings={self.config.source_mappings_path}'])
+        else:
+            instrument_args += ['--pgo-sampling' if self.pgo_sampler_only else '--pgo-instrument', f"-R:ProfilesDumpFile={self.config.profile_path}"]
+
         if self.jdk_profiles_collect:
             instrument_args += svm_experimental_options(['-H:+AOTPriorityInline', '-H:-SamplingCollect',
                                                          f'-H:ProfilingPackagePrefixes={self.generate_profiling_package_prefixes()}'])
@@ -1484,13 +1517,48 @@ def _ensureSamplesAreInProfile(self, profile_path: PathLike):
                     assert sample["records"][
                                0] > 0, f"Sampling profiles seem to have a 0 in records in file {profile_path}"
 
+    def _collect_perf_results_into_iprof(self):
+        with open(self.config.perf_script_path, 'w') as outfile:
+            mx.log(f"Started perf script at {self.get_stage_runner().get_timestamp()}")
+            exit_code = mx.run(['perf', 'script', f'--input={self.config.perf_data_path}', '--max-stack=2048'], out=outfile)
+            if exit_code == 0:
+                mx.log(f"Finished perf script at {self.get_stage_runner().get_timestamp()}")
+                mx.log(f"Perf compressed data file size: {os.path.getsize(self.config.perf_data_path)} bytes")
+                mx.log(f"Perf script file size: {os.path.getsize(self.config.perf_script_path)} bytes")
+            else:
+                mx.abort(f"Perf script failed with exit code: {exit_code}")
+        mx.log(f"Started generating iprof at {self.get_stage_runner().get_timestamp()}")
+        nic_command = [os.path.join(self.home(), 'bin', 'native-image-configure'), 'generate-iprof-from-perf', f'--perf={self.config.perf_script_path}', f'--source-mappings={self.config.source_mappings_path}', f'--output-file={self.config.profile_path}']
+        if self.pgo_perf_invoke_profile_collection_strategy == PerfInvokeProfileCollectionStrategy.ALL:
+            nic_command += ["--enable-experimental-option=SampledVirtualInvokeProfilesAll"]
+        elif self.pgo_perf_invoke_profile_collection_strategy == PerfInvokeProfileCollectionStrategy.MULTIPLE_CALLEES:
+            nic_command += ["--enable-experimental-option=SampledVirtualInvokeProfilesMultipleCallees"]
+
+        mx.run(nic_command)
+        mx.log(f"Finished generating iprof at {self.get_stage_runner().get_timestamp()}")
+        os.remove(self.config.perf_script_path)
+        os.remove(self.config.perf_data_path)
+        os.remove(self.config.source_mappings_path)
+
     def run_stage_instrument_run(self):
         image_run_cmd = [str(self.config.instrumented_image_path)]
         image_run_cmd += self.config.extra_jvm_args
         image_run_cmd += self.config.extra_profile_run_args
+        if self.pgo_use_perf:
+            image_run_cmd = ['perf', 'record', '-o', f'{self.config.perf_data_path}', '--call-graph', 'fp,2048', '--freq=999'] + image_run_cmd
+
         with self.get_stage_runner() as s:
-            exit_code = s.execute_command(self, image_run_cmd)
+            if self.pgo_use_perf:
+                mx.log(f"Started perf record at {self.get_stage_runner().get_timestamp()}")
+                exit_code = s.execute_command(self, image_run_cmd)
+                mx.log(f"Finished perf record at {self.get_stage_runner().get_timestamp()}")
+            else:
+                exit_code = s.execute_command(self, image_run_cmd)
+
             if exit_code == 0:
+                if self.pgo_use_perf:
+                    self._collect_perf_results_into_iprof()
+
                 if not self.config.profile_path.exists():
                     # The shutdown hook does not trigger for certain apps (GR-60456)
                     mx.abort(
@@ -1543,6 +1611,13 @@ def get_layered_build_args(self) -> List[str]:
     def run_stage_image(self):
         executable_name_args = ['-o', self.config.final_image_name]
         pgo_args = [f"--pgo={self.config.profile_path}"]
+        if self.pgo_use_perf:
+            # -g is already set in base_image_build_args if we're not using perf. When using perf, if debug symbols
+            # are present they will interfere with sample decoding using source mappings.
+            # We still set -g for the optimized build to stay consistent with the other configs.
+            # [GR-66850] would allow enabling -g during instrument-image even with perf.
+            executable_name_args = ['-g'] + executable_name_args
+            pgo_args += svm_experimental_options(['-H:+PGOPrintProfileQuality', '-H:+PGOIgnoreVersionCheck'])
         if self.adopted_jdk_pgo:
             # choose appropriate profiles
             jdk_version = mx_sdk_vm.get_jdk_version_for_profiles()
@@ -3636,6 +3711,17 @@ def _strip_arg_with_number_gen(_strip_arg, _args):
         result = _strip_arg_with_number_gen(strip_arg, result)
     return list(result)
 
+class PerfInvokeProfileCollectionStrategy(Enum):
+    """
+    The strategy for extracting virtual invoke method profiles from perf sampling data.
+    ALL: Generate a profile for each callsite.
+    MULTIPLE_CALLEES: Only generate profiles for callsites with at least 2 different sampled targets.
+    """
+    ALL = "invoke"
+    MULTIPLE_CALLEES = "invoke-multiple"
+
+    def __str__(self):
+        return self.value
 
 class StagesInfo:
     """