Skip to content

Commit a02aa36

Browse files
Misc improvements to benchmarking infrastructure (#130)
This PR makes a couple minor improvements for the benchmarking code. - Actually checks that the tests being run pass. - Added more tests from the Chromium unittests and removed the chromium browser_tests that won't run headlessly. - Add a new utility, filter_tests.py, that runs through all of the tests in a test suite description and makes sure they pass. - Fixed list_gtests.py (single character fix, main didn't have a parameter before) - Add some info on the new utility to the documentation - Add debugging info in benchmark_report_converter for when there is a difference between two benchmark sets, it actually prints out what the exact difference is. - Remove advisor flag from list of required flags when building the LLVM test suite as it isn't required when the no compile flags are set.
1 parent 0fb5861 commit a02aa36

13 files changed

+40377
-15731
lines changed

compiler_opt/benchmark/benchmark_chromium.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,16 @@
5757

5858
FLAGS = flags.FLAGS
5959

60+
test_prefix = './compiler_opt/benchmark/chromium_test_descriptions/'
61+
62+
test_description_files = [
63+
'base_perftests.json', 'browser_tests.json', 'components_perftests.json',
64+
'base_unittests.json', 'cc_unittests.json', 'components_unittests.json',
65+
'content_unittests.json'
66+
]
67+
6068
default_test_descriptions = [
61-
'./compiler_opt/tools/chromium_test_descriptions/base_perftests.json',
62-
'./compiler_opt/tools/chromium_test_descriptions/browser_tests.json',
63-
'./compiler_opt/tools/chromium_test_descriptions/components_perftests.json'
69+
f'{test_prefix}{test_dsc}' for test_dsc in test_description_files
6470
]
6571

6672
flags.DEFINE_multi_string(

compiler_opt/benchmark/benchmark_llvm_test_suite.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@
104104
'perf_counter', [], 'A perf counter to be used (may be defined more than'
105105
'once).')
106106

107-
flags.mark_flag_as_required('advisor')
108107
flags.mark_flag_as_required('output_path')
109108

110109

compiler_opt/benchmark/benchmark_report.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from typing import List
2525
from typing import Tuple
2626

27+
from absl import logging
28+
2729
# For each benchmark, and for each counter, capture the recorded values.
2830
PerBenchmarkResults = Dict[str, Dict[str, List[float]]]
2931

@@ -130,9 +132,16 @@ class BenchmarkComparison:
130132
"""Analysis of 2 benchmark runs."""
131133

132134
def __init__(self, base_report: BenchmarkReport, exp_report: BenchmarkReport):
135+
base_names_set = set(base_report.names())
136+
exp_names_set = set(exp_report.names())
133137
if base_report.suite_name() != exp_report.suite_name():
134138
raise ValueError('cannot compare different suites')
135-
if set(base_report.names()) != set(exp_report.names()):
139+
if base_names_set != exp_names_set:
140+
diff_base_exp = base_names_set.difference(exp_names_set)
141+
diff_exp_base = exp_names_set.difference(base_names_set)
142+
diff_set = diff_base_exp.union(diff_exp_base)
143+
logging.info('The following tests differ between the test suites: %s',
144+
diff_set)
136145
raise ValueError('suite runs have different benchmark names')
137146
if set(base_report.counters()) != set(exp_report.counters()):
138147
raise ValueError(

compiler_opt/benchmark/benchmark_report_converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
1717
To run:
1818
python3 \
19-
compiler_opt/benchmark/benchmark_report_counter.py \
19+
compiler_opt/benchmark/benchmark_report_converter.py \
2020
--base=/tmp/base_report.json \
2121
--exp=/tmp/exp_report.json \
2222
--counters=INSTRUCTIONS \

compiler_opt/benchmark/chromium_test_descriptions/base_unittests.json

Lines changed: 5445 additions & 0 deletions
Large diffs are not rendered by default.

compiler_opt/benchmark/chromium_test_descriptions/browser_tests.json

Lines changed: 2 additions & 15711 deletions
Large diffs are not rendered by default.

compiler_opt/benchmark/chromium_test_descriptions/cc_unittests.json

Lines changed: 6145 additions & 0 deletions
Large diffs are not rendered by default.

compiler_opt/benchmark/chromium_test_descriptions/components_unittests.json

Lines changed: 23898 additions & 0 deletions
Large diffs are not rendered by default.

compiler_opt/benchmark/chromium_test_descriptions/content_unittests.json

Lines changed: 4731 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# coding=utf-8
2+
# Copyright 2020 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""A script for filtering gtests based on whether or not they pass/fail
16+
17+
Within Chromium specifically, there are some test executables that have a lot
18+
of tests that are excellent for benchmarking, but running these test suites
19+
in their entirety can sometimes be problematic as some subsets of the tests
20+
available in the executable might require certain hardware configurations
21+
such as an X configuration with working graphics drivers, and we would prefer
22+
to avoid those tests. This exectuable goes through an entire test suite
23+
description and returns another test suite description containing only tests
24+
that pass.
25+
26+
Usage:
27+
PYTHONPATH=$PYTHONPATH:. python3 \
28+
./compiler_opt/benchmark/filter_tests.py \
29+
--input_tests=./compiler_opt/benchmark/chromium_test_descriptions \
30+
/browser_tests.json \
31+
--output_tests=./browser_tests_filtered.json \
32+
--num_threads=32 \
33+
--executable_path=/chromium/src/out/Release/browser_tests
34+
"""
35+
36+
import json
37+
import os
38+
39+
from absl import flags
40+
from absl import app
41+
from absl import logging
42+
43+
from compiler_opt.benchmark import gtest_executable_utils
44+
45+
FLAGS = flags.FLAGS
46+
47+
flags.DEFINE_string('input_tests', '',
48+
'The path to the test description JSON to filter.')
49+
flags.DEFINE_string(
50+
'output_tests', '',
51+
'The path to the JSON file to place the output test suite '
52+
'description.')
53+
flags.DEFINE_integer(
54+
'num_threads', 1, 'The number of threads to use for running tests in '
55+
'parallel.')
56+
flags.DEFINE_string(
57+
'executable_path', '',
58+
'The path to the Chromium build directory where all the '
59+
'test executables are stored')
60+
61+
62+
def main(_):
63+
if not os.path.exists(FLAGS.executable_path):
64+
logging.fatal('Executable path does not exist.')
65+
with open(FLAGS.input_tests, encoding='UTF-8') as test_description_file:
66+
test_suite_description = json.load(test_description_file)
67+
test_outputs = gtest_executable_utils.run_test_suite(
68+
test_suite_description, FLAGS.executable_path, [], FLAGS.num_threads)
69+
test_list = []
70+
for test_output in test_outputs:
71+
test_list.append(test_output['name'])
72+
# copy the old test suite and just replace the tests array
73+
new_test_suite_description = test_suite_description
74+
new_test_suite_description['tests'] = test_list
75+
with open(FLAGS.output_tests, 'w', encoding='UTF-8') as tests_output_file:
76+
json.dump(new_test_suite_description, tests_output_file)
77+
78+
79+
if __name__ == '__main__':
80+
app.run(main)

0 commit comments

Comments
 (0)