Add --drop-exec option to filter warmup samples

aemerson · aemerson · commit e1ff5cf36827 · 2025-10-24T22:14:07.000-07:00
This commit introduces a new --drop-exec option that allows users to
drop the first N execution samples when running with --exec-multisample.
This is useful for mitigating warmup effects that can skew performance
measurements.

The option accepts an integer N specifying how many initial samples to
drop, and works with all execution modes (normal, --exec, and
--exec-interleaved-builds).
diff --git a/lnt/tests/test_suite.py b/lnt/tests/test_suite.py
@@ -308,6 +308,23 @@ def run_test(self, opts):
             self._fatal("--single-result must be given a single test name, "
                         "not a directory name")
 
+        # Parse and validate --drop-exec option
+        if opts.drop_exec is not None:
+            # Check for incompatible options
+            if opts.only_compile:
+                self._fatal("--drop-exec cannot be used with --only-compile")
+            if opts.build:
+                self._fatal("--drop-exec cannot be used with --build")
+            if opts.exec_multisample <= 1:
+                self._fatal("--drop-exec requires --exec-multisample > 1")
+
+            # opts.drop_exec is already an integer from click
+            if opts.drop_exec < 1:
+                self._fatal("--drop-exec must be at least 1")
+
+            if opts.drop_exec >= opts.exec_multisample:
+                self._fatal("--drop-exec would drop all %d samples" % opts.exec_multisample)
+
         opts.cppflags = ' '.join(opts.cppflags)
         opts.cflags = ' '.join(opts.cflags)
         opts.cxxflags = ' '.join(opts.cxxflags)
@@ -428,6 +445,9 @@ def run_test(self, opts):
                 reports.append(run_report)
                 json_reports.append(json_data)
 
+        # Filter execution samples if --drop-exec was specified
+        reports, json_reports = self._filter_exec_samples(reports, json_reports)
+
         report = self._create_merged_report(reports)
 
         # Write the report out so it can be read by the submission tool.
@@ -537,8 +557,14 @@ def _run_interleaved_builds(self, opts):
             build_dir = build_info['build_dir']
             logger.info("Writing report for build: %s" % build_dir)
 
+            # Filter execution samples if --drop-exec was specified
+            build_reports, build_json_reports = self._filter_exec_samples(
+                reports_by_build[build_idx],
+                json_by_build[build_idx]
+            )
+
             # Merge reports for this build
-            merged_report = self._create_merged_report(reports_by_build[build_idx])
+            merged_report = self._create_merged_report(build_reports)
 
             # Write JSON report to build directory
             report_path = os.path.join(build_dir, 'report.json')
@@ -548,13 +574,13 @@ def _run_interleaved_builds(self, opts):
 
             # Write xUnit XML to build directory
             xml_path = os.path.join(build_dir, 'test-results.xunit.xml')
-            str_template = _lit_json_to_xunit_xml(json_by_build[build_idx])
+            str_template = _lit_json_to_xunit_xml(build_json_reports)
             with open(xml_path, 'w') as fd:
                 fd.write(str_template)
 
             # Write CSV to build directory
             csv_path = os.path.join(build_dir, 'test-results.csv')
-            str_template = _lit_json_to_csv(json_by_build[build_idx])
+            str_template = _lit_json_to_csv(build_json_reports)
             with open(csv_path, 'w') as fd:
                 fd.write(str_template)
 
@@ -607,6 +633,32 @@ def _create_merged_report(self, reports):
         test_samples = sum([r.tests for r in reports], [])
         return lnt.testing.Report(machine, run, test_samples)
 
+    def _filter_exec_samples(self, reports, json_reports):
+        """Filter out execution samples based on --drop-exec option.
+
+        Returns filtered (reports, json_reports) tuples.
+        """
+        drop_exec_count = self.opts.drop_exec
+
+        if drop_exec_count is None or drop_exec_count == 0 or len(reports) == 0:
+            return reports, json_reports
+
+        # Drop the first N samples
+        filtered_reports = reports[drop_exec_count:]
+        filtered_json_reports = json_reports[drop_exec_count:]
+
+        # Log what we're dropping
+        if drop_exec_count == 1:
+            logger.info("Dropping first execution sample (iteration 0)")
+        else:
+            logger.info("Dropping first %d execution samples (iterations 0-%d)" %
+                       (drop_exec_count, drop_exec_count - 1))
+
+        logger.info("Kept %d of %d execution samples after --drop-exec filtering" %
+                    (len(filtered_reports), len(reports)))
+
+        return filtered_reports, filtered_json_reports
+
     def _test_suite_dir(self):
         return self.opts.test_suite_root
 
@@ -1341,6 +1393,10 @@ def diagnose(self):
 @click.option("--compile-multisample", "compile_multisample",
               help="Accumulate compile test data from multiple runs",
               type=int, default=1, metavar="N")
+@click.option("--drop-exec", "drop_exec",
+              help="Drop the first N execution samples to mitigate warmup effects. "
+                   "Used with --exec-multisample.",
+              type=int, default=None, metavar="N")
 @click.option("-d", "--diagnose", "diagnose",
               help="Produce a diagnostic report for a particular "
                    "test, this will not run all the tests.  Must be"
diff --git a/tests/runtest/test_suite-drop-exec.shtest b/tests/runtest/test_suite-drop-exec.shtest
@@ -0,0 +1,128 @@
+# Check --drop-exec feature for filtering execution samples
+# This test verifies that --drop-exec correctly drops first N execution samples
+
+# Test 1: --drop-exec without --exec-multisample should fail
+# RUN: not lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-ERR1 \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --drop-exec 1 \
+# RUN:     > %t.err1.log 2> %t.err1.err
+# RUN: filecheck --check-prefix CHECK-ERR1 < %t.err1.err %s
+# CHECK-ERR1: --drop-exec requires --exec-multisample > 1
+
+# Test 2: --drop-exec with --only-compile should fail
+# RUN: not lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-ERR2 \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --only-compile \
+# RUN:     --drop-exec 1 \
+# RUN:     > %t.err2.log 2> %t.err2.err
+# RUN: filecheck --check-prefix CHECK-ERR2 < %t.err2.err %s
+# CHECK-ERR2: --drop-exec cannot be used with --only-compile
+
+# Test 3: --drop-exec with --build should fail
+# RUN: not lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-ERR3 \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --build \
+# RUN:     --drop-exec 1 \
+# RUN:     > %t.err3.log 2> %t.err3.err
+# RUN: filecheck --check-prefix CHECK-ERR3 < %t.err3.err %s
+# CHECK-ERR3: --drop-exec cannot be used with --build
+
+# Test 4: --drop-exec dropping all samples should fail
+# RUN: not lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-ERR4 \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --exec-multisample 2 \
+# RUN:     --drop-exec 2 \
+# RUN:     > %t.err4.log 2> %t.err4.err
+# RUN: filecheck --check-prefix CHECK-ERR4 < %t.err4.err %s
+# CHECK-ERR4: --drop-exec would drop all 2 samples
+
+# Test 5: --drop-exec 1 with 3 samples should work
+# RUN: rm -rf %t.SANDBOX-DROP1
+# RUN: lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-DROP1 \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --exec-multisample 3 \
+# RUN:     --drop-exec 1 \
+# RUN:     --output %t.drop1.json \
+# RUN:     > %t.drop1.log 2> %t.drop1.err
+# RUN: filecheck --check-prefix CHECK-DROP1 < %t.drop1.err %s
+# CHECK-DROP1: Dropping first execution sample (iteration 0)
+# CHECK-DROP1: Kept 2 of 3 execution samples after --drop-exec filtering
+
+# Test 6: --drop-exec 2 with 5 samples should work
+# RUN: rm -rf %t.SANDBOX-DROP2
+# RUN: lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-DROP2 \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --exec-multisample 5 \
+# RUN:     --drop-exec 2 \
+# RUN:     --output %t.drop2.json \
+# RUN:     > %t.drop2.log 2> %t.drop2.err
+# RUN: filecheck --check-prefix CHECK-DROP2 < %t.drop2.err %s
+# CHECK-DROP2: Dropping first 2 execution samples (iterations 0-1)
+# CHECK-DROP2: Kept 3 of 5 execution samples after --drop-exec filtering
+
+# Test 7: --drop-exec with test-prebuilt mode
+# First build
+# RUN: rm -rf %t.SANDBOX-BUILD-PREBUILT
+# RUN: lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-BUILD-PREBUILT \
+# RUN:     --no-timestamp \
+# RUN:     --test-suite %S/Inputs/test-suite-cmake \
+# RUN:     --cc %{shared_inputs}/FakeCompilers/clang-r154331 \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-make %S/Inputs/test-suite-cmake/fake-make \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --build \
+# RUN:     > %t.build-prebuilt.log 2> %t.build-prebuilt.err
+
+# Now test prebuilt with --drop-exec
+# RUN: rm -rf %t.SANDBOX-PREBUILT-TEST
+# RUN: lnt runtest test-suite \
+# RUN:     --sandbox %t.SANDBOX-PREBUILT-TEST \
+# RUN:     --no-timestamp \
+# RUN:     --exec \
+# RUN:     --build-dir %t.SANDBOX-BUILD-PREBUILT/build \
+# RUN:     --use-cmake %S/Inputs/test-suite-cmake/fake-cmake \
+# RUN:     --use-lit %S/Inputs/test-suite-cmake/fake-lit \
+# RUN:     --exec-multisample 3 \
+# RUN:     --drop-exec 1 \
+# RUN:     --output %t.prebuilt-drop.json \
+# RUN:     > %t.prebuilt-drop.log 2> %t.prebuilt-drop.err
+# RUN: filecheck --check-prefix CHECK-PREBUILT-DROP < %t.prebuilt-drop.err %s
+# CHECK-PREBUILT-DROP: Dropping first execution sample (iteration 0)
+# CHECK-PREBUILT-DROP: Kept 2 of 3 execution samples after --drop-exec filtering