[benchmark] BenchmarkDoctor checks setup time

palimondo · palimondo · commit a24d0ff7a50c · 2018-10-15T09:06:38.000+02:00
Add a check against unreasonably long setup times for benchmarks that do their initialization work in the `setUpFunction`. Given the typical benchmark measurements will last about 1 second, it’s reasonable to expect the setup to take at most 20% extra, on top of that: 200 ms.

The `DictionaryKeysContains*` benchmarks are an instance of this mistake. The setup of `DictionaryKeysContainsNative` takes 3 seconds on my machine, to prepare a dictionary for the run function, whose typical runtime is 90 μs. The setup of Cocoa version takes 8 seconds!!! It is trivial to rewite these with much smaller dictionaries that demonstrate the point of these benchmarks perfectly well, without the need to wait for ages to setup these benchmarks.
diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
@@ -296,6 +296,7 @@ class BenchmarkDoctor(object):
             self._name_matches_capital_words_convention,
             self._name_is_at_most_40_chars_long,
             self._no_setup_overhead,
+            self._reasonable_setup_time,
             self._optimized_runtime_in_range,
             self._constant_memory_use
         ]
@@ -383,6 +384,17 @@ class BenchmarkDoctor(object):
                 'Move initialization of benchmark data to the `setUpFunction` '
                 'registered in `BenchmarkInfo`.')
 
+    @staticmethod
+    def _reasonable_setup_time(measurements):
+        setup = min([result.setup
+                     for result in BenchmarkDoctor._select(measurements)])
+        if 200000 < setup:  # 200 ms
+            BenchmarkDoctor.log_runtime.error(
+                "'%s' setup took at least %d μs.",
+                measurements['name'], setup)
+            BenchmarkDoctor.log_runtime.info(
+                'The `setUpFunction` should take no more than 200 ms.')
+
     @staticmethod
     def _constant_memory_use(measurements):
         select = BenchmarkDoctor._select
diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py
@@ -421,9 +421,9 @@ def test_no_prefix_for_base_logging(self):
         self.assertEquals(f.format(lr), 'INFO Hi!')
 
 
-def _PTR(min=700, mem_pages=1000):
+def _PTR(min=700, mem_pages=1000, setup=None):
     """Create PerformanceTestResult Stub."""
-    return Stub(min=min, mem_pages=mem_pages)
+    return Stub(min=min, mem_pages=mem_pages, setup=setup)
 
 
 def _run(test, num_samples=None, num_iters=None, verbose=None,
@@ -621,6 +621,29 @@ def test_benchmark_has_no_significant_setup_overhead(self):
             ["Move initialization of benchmark data to the `setUpFunction` "
              "registered in `BenchmarkInfo`."], self.logs['info'])
 
+    def test_benchmark_setup_takes_reasonable_time(self):
+        """Setup < 200 ms (20% extra on top of the typical 1 s measurement)"""
+        with captured_output() as (out, _):
+            doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([]))
+            doctor.analyze({
+                'name': 'NormalSetup',
+                'NormalSetup O i1a': _PTR(setup=199999),
+                'NormalSetup O i2a': _PTR(setup=200001)})
+            doctor.analyze({
+                'name': 'LongSetup',
+                'LongSetup O i1a': _PTR(setup=200001),
+                'LongSetup O i2a': _PTR(setup=200002)})
+        output = out.getvalue()
+
+        self.assertIn('runtime: ', output)
+        self.assertNotIn('NormalSetup', output)
+        self.assert_contains(
+            ["'LongSetup' setup took at least 200001 μs."],
+            self.logs['error'])
+        self.assert_contains(
+            ["The `setUpFunction` should take no more than 200 ms."],
+            self.logs['info'])
+
     def test_benchmark_has_constant_memory_use(self):
         """Benchmark's memory footprint must not vary with num-iters."""
         with captured_output() as (out, _):