[GR-69564] [GR-69910] [GR-69767] Implement iteration summaries in the Polybench harness

Andrija Kolic · Andrija Kolic · commit cbe5cbd148ce · 2025-09-29T16:00:33.000Z
PullRequest: graal/22204
diff --git a/graal-common.json b/graal-common.json
@@ -1,6 +1,6 @@
 {
   "README": "This file contains definitions that are useful for the jsonnet CI files of the graal and graal-enterprise repositories.",
   "ci": {
-    "overlay": "49f8376ffc0773e2069aea7be211e06eafc1088b"
+    "overlay": "aecf4e4409bde4bfcc1009e5b8c895e150500dfa"
   }
 }
diff --git a/truffle/mx.truffle/mx_polybench/model.py b/truffle/mx.truffle/mx_polybench/model.py
@@ -520,7 +520,8 @@ def rules(self, output, benchmarks, bmSuiteArgs):
         if metric_name == "time":
             # For metric "time", two metrics are reported:
             # - "warmup" (per-iteration data for "warmup" and "run" iterations)
-            # - "time" (per-iteration data for only the "run" iterations)
+            # - "time-sample" (per-iteration data for only the "run" iterations)
+            # - "time" (aggregation of per-iteration data for the "run" iterations after outlier removal)
             rules += [
                 mx_benchmark.StdOutRule(
                     r"\[.*\] iteration ([0-9]*): (?P<value>.*) (?P<unit>.*)",
@@ -540,7 +541,7 @@ def rules(self, output, benchmarks, bmSuiteArgs):
                     {
                         "benchmark": benchmark_name,
                         "metric.better": "lower",
-                        "metric.name": "time",
+                        "metric.name": "time-sample",
                         "metric.unit": ("<unit>", str),
                         "metric.value": ("<value>", float),
                         "metric.type": "numeric",
@@ -549,6 +550,19 @@ def rules(self, output, benchmarks, bmSuiteArgs):
                     },
                     startPattern=r"::: Running :::",
                 ),
+                ExcludeWarmupRule(
+                    r"\[.*\] run aggregate summary: (?P<value>.*) (?P<unit>.*)",
+                    {
+                        "benchmark": benchmark_name,
+                        "metric.better": "lower",
+                        "metric.name": "time",
+                        "metric.unit": ("<unit>", str),
+                        "metric.value": ("<value>", float),
+                        "metric.type": "numeric",
+                        "metric.score-function": "id",
+                    },
+                    startPattern=r"::: Running :::",
+                ),
             ]
         elif metric_name in ("allocated-memory", "metaspace-memory", "application-memory", "instructions"):
             rules += [
diff --git a/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/AverageSummary.java b/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/AverageSummary.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.polybench;
+
+import java.util.Optional;
+
+/**
+ * Summarizes the results of a polybench benchmark with an average value computed from iteration
+ * datapoints.
+ */
+class AverageSummary implements Summary {
+    @Override
+    public Optional<Double> postprocess(double[] results) {
+        double sum = 0;
+        for (Double result : results) {
+            sum += result;
+        }
+        return results.length > 0 ? Optional.of(sum / results.length) : Optional.empty();
+    }
+
+    @Override
+    public String toString() {
+        return "AverageSummary{}";
+    }
+}
diff --git a/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/Config.java b/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/Config.java
@@ -26,6 +26,7 @@
 
 import org.graalvm.polyglot.Value;
 
+import java.io.InvalidObjectException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -44,6 +45,7 @@ public class Config {
     int iterations;
     Metric metric;
     boolean evalSourceOnlyDefault;
+    Summary summary;
 
     final List<String> unrecognizedArguments = new ArrayList<>();
 
@@ -79,7 +81,7 @@ public MultiEngineConfig initMultiEngine() {
         return multiEngine;
     }
 
-    public void parseBenchSpecificDefaults(Value benchmark) {
+    public void parseBenchSpecificDefaults(Value benchmark) throws InvalidObjectException {
         if (warmupIterations == UNINITIALIZED_ITERATIONS) {
             if (benchmark.hasMember("warmupIterations")) {
                 Value warmupIterationsMember = benchmark.getMember("warmupIterations");
@@ -96,6 +98,37 @@ public void parseBenchSpecificDefaults(Value benchmark) {
                 iterations = DEFAULT_ITERATIONS;
             }
         }
+        parseBenchSpecificSummary(benchmark);
+    }
+
+    private void parseBenchSpecificSummary(Value benchmark) throws InvalidObjectException {
+        if (!benchmark.hasMember("summary")) {
+            // No 'summary' member provided in the benchmark
+            return;
+        }
+        Value summaryMember = benchmark.getMember("summary");
+        if (summaryMember.canExecute()) {
+            summaryMember = summaryMember.execute();
+        }
+        if (!summaryMember.hasMember("get")) {
+            throw new InvalidObjectException("Failed at parsing the 'summary' benchmark member due to it missing a 'get' member!");
+        }
+        Value summaryGetMethod = summaryMember.getMember("get");
+        String summaryClassName = summaryGetMethod.execute("name").asString();
+        if (OutlierRemovalAverageSummary.class.getSimpleName().equals(summaryClassName)) {
+            Value lowerThresholdValue = summaryGetMethod.execute("lower-threshold");
+            Value upperThresholdValue = summaryGetMethod.execute("upper-threshold");
+            if (!lowerThresholdValue.fitsInDouble() || !upperThresholdValue.fitsInDouble()) {
+                String msg = "Failed at parsing 'summary' benchmark member with name of '" + summaryClassName + "'";
+                msg += " because 'lower-threshold' or 'upper-threshold' is not present, or does not fit into a double!";
+                throw new InvalidObjectException(msg);
+            }
+            summary = new OutlierRemovalAverageSummary(lowerThresholdValue.asDouble(), upperThresholdValue.asDouble());
+        } else if (AverageSummary.class.getSimpleName().equals(summaryClassName)) {
+            summary = new AverageSummary();
+        } else {
+            throw new InvalidObjectException("Failed at parsing the 'summary' benchmark member due to unrecognized name of '" + summaryClassName + "'!");
+        }
     }
 
     @Override
@@ -107,7 +140,10 @@ public String toString() {
                         "iterations:        " + (iterations == UNINITIALIZED_ITERATIONS ? "default" : iterations + "\n");
         if (multiEngine != null) {
             config += "runs:              " + multiEngine.numberOfRuns + "\n" +
-                            "shared engine:     " + multiEngine.sharedEngine;
+                            "shared engine:     " + multiEngine.sharedEngine + "\n";
+        }
+        if (summary != null) {
+            config += "summary:           " + summary + "\n";
         }
         return config;
     }
diff --git a/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/OutlierRemovalAverageSummary.java b/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/OutlierRemovalAverageSummary.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.polybench;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+/**
+ * Summarizes the results of a polybench benchmark with an average of iteration datapoints that fall
+ * between `lowerThreshold` and `upperThreshold` percentiles.
+ */
+class OutlierRemovalAverageSummary extends AverageSummary {
+    private final double lowerThreshold;
+    private final double upperThreshold;
+
+    OutlierRemovalAverageSummary(double lowerThreshold, double upperThreshold) {
+        this.lowerThreshold = lowerThreshold;
+        this.upperThreshold = upperThreshold;
+    }
+
+    @Override
+    public Optional<Double> postprocess(double[] results) {
+        if (results.length == 0) {
+            return Optional.empty();
+        }
+
+        int n = results.length;
+        Arrays.sort(results);
+        int fromIndex = (int) Math.ceil(lowerThreshold * n);
+        int toIndex = (int) Math.floor(upperThreshold * n);
+        if (fromIndex >= toIndex) {
+            return Optional.empty();
+        }
+
+        return super.postprocess(Arrays.copyOfRange(results, fromIndex, toIndex));
+    }
+
+    @Override
+    public String toString() {
+        return "OutlierRemovalAverageSummary{lowerThreshold=" + lowerThreshold + ", upperThreshold=" + upperThreshold + "}";
+    }
+}
diff --git a/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/PolyBenchLauncher.java b/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/PolyBenchLauncher.java
@@ -489,12 +489,12 @@ private void runHarness(Context.Builder contextBuilder, boolean evalSourceOnly,
                 Workload workload = lookup(context, evalResult.languageId, evalResult.value, "run");
 
                 log("::: Running warmup :::");
-                repeatIterations(context, workload, evalResult.sourceName, true, config.warmupIterations);
+                repeatIterations(context, workload, evalResult.sourceName, true, config.warmupIterations, config.summary);
                 log("");
 
                 log("::: Running :::");
                 config.metric.reset();
-                repeatIterations(context, workload, evalResult.sourceName, false, config.iterations);
+                repeatIterations(context, workload, evalResult.sourceName, false, config.iterations, config.summary);
                 log("");
             }
 
@@ -515,9 +515,10 @@ private static String round(double v) {
         return String.format("%.2f", v);
     }
 
-    private void repeatIterations(Context context, Workload workload, String name, boolean warmup, int iterations) {
+    private void repeatIterations(Context context, Workload workload, String name, boolean warmup, int iterations, Summary summary) {
         // Enter explicitly to avoid context switches for each iteration.
         context.enter();
+        double[] iterationResults = new double[iterations];
         try {
             for (int i = 0; i < iterations; i++) {
                 config.metric.beforeIteration(warmup, i, config);
@@ -531,14 +532,19 @@ private void repeatIterations(Context context, Workload workload, String name, b
                 final Optional<Double> value = config.metric.reportAfterIteration(config);
                 if (value.isPresent()) {
                     log("[" + name + "] iteration " + i + ": " + round(value.get()) + " " + config.metric.unit());
+                    iterationResults[i] = value.get();
                 }
             }
 
+            log("------");
             final Optional<Double> value = config.metric.reportAfterAll();
             if (value.isPresent()) {
-                log("------");
                 log("[" + name + "] " + (warmup ? "after warmup: " : "after run: ") + round(value.get()) + " " + config.metric.unit());
             }
+            Optional<Double> summaryAggregate = summary != null ? summary.postprocess(iterationResults) : Optional.empty();
+            if (summaryAggregate.isPresent()) {
+                log("[" + name + "] " + (warmup ? "warmup" : "run") + " aggregate summary: " + round(summaryAggregate.get()) + " " + config.metric.unit());
+            }
         } finally {
             context.leave();
         }
diff --git a/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/Summary.java b/truffle/src/org.graalvm.polybench/src/org/graalvm/polybench/Summary.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.polybench;
+
+import java.util.Optional;
+
+/**
+ * Summarizes the results of a polybench benchmark with an aggregate metric value computed from
+ * iteration datapoints.
+ */
+interface Summary {
+    Optional<Double> postprocess(double[] results);
+}
diff --git a/vm/ci/ci_common/common-bench.jsonnet b/vm/ci/ci_common/common-bench.jsonnet
@@ -69,13 +69,14 @@ local repo_config = import '../../../ci/repo-configuration.libsonnet';
     # Extends the provided polybench command with common arguments used in CI. We want the command at the call site
     # to be simple (i.e., a flat array of string literals) so it can be easily copied and run locally; using this
     # wrapper allows us to inject CI-specific fields without specifying them in the command.
+    hwloc_command_prefix:: if std.length(std.find('bench', self.targets)) > 0 then ["hwloc-bind", "--cpubind", "node:0", "--membind", "node:0", "--"] else [],
     polybench_wrap(command)::
       assert command[0] == 'mx' : "polybench command should start with 'mx'";
       // Dynamically import /truffle-enterprise when running on enterprise.
       local extra_imports = if is_enterprise then ['--dy', '/truffle-enterprise'] else [];
-      ['mx'] + extra_imports + command[1:] + ['--mx-benchmark-args', '--results-file', self.result_file] +
+      self.hwloc_command_prefix + ['mx'] + extra_imports + command[1:] + ['--mx-benchmark-args', '--results-file', self.result_file] +
       (if (fail_fast) then ['--fail-fast'] else []),
-    notify_groups:: ['polybench'],
+    notify_groups:: ['polybench']
   },
 
   polybench_vm_hpc_common: self.polybench_vm_common('linux', 'amd64', skip_machine=true) + self.polybench_hpc_linux_common(shape='e4_8_64') + {
@@ -99,9 +100,9 @@ local repo_config = import '../../../ci/repo-configuration.libsonnet';
   },
 
   build_polybenchmarks: [
-      ['mx', '--env', '${VM_ENV}', '--dy', 'polybenchmarks', 'sforceimports'],
-      ['mx', '-p', '../../polybenchmarks', 'build_benchmarks'],
-      ['mx', '--dy', 'polybenchmarks', 'build', '--dependencies', 'POLYBENCHMARKS_BENCHMARKS']
+    ['mx', '--env', '${VM_ENV}', '--dy', 'polybenchmarks', 'sforceimports'],
+    ['mx', '-p', '../../polybenchmarks', 'build_benchmarks'],
+    ['mx', '--dy', 'polybenchmarks', 'build', '--dependencies', 'POLYBENCHMARKS_BENCHMARKS']
   ],
 
   js_bench_compilation_throughput(pgo): self.vm_bench_common + common.heap.default + {

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"README": "This file contains definitions that are useful for the jsonnet CI files of the graal and graal-enterprise repositories.",`
`3`	`3`	`"ci": {`
`4`		`- "overlay": "49f8376ffc0773e2069aea7be211e06eafc1088b"`
	`4`	`+ "overlay": "aecf4e4409bde4bfcc1009e5b8c895e150500dfa"`
`5`	`5`	`}`
`6`	`6`	`}`