Skip to content

Commit f26ceaf

Browse files
khokhlov962V8 LUCI CQ
authored andcommitted
[LoadLine] Output loading stage breakdown
Refine the breakdown query for the LoadLine benchmark: * Drop the "init" stage since it's not useful * Track both "network" and "process_launch" from the navigation start * Make the query work for non-debug loadline (remove dependency on extra categories) Also add the query to the non-debug config and print the breakdown along with the benchmark scores. Change-Id: Ifb4c15d439bcd5a07730206e641655bd07cfd6b3 Reviewed-on: https://chromium-review.googlesource.com/c/crossbench/+/6633000 Commit-Queue: Mikhail Khokhlov <[email protected]> Reviewed-by: Victor Vianna <[email protected]>
1 parent 0b90d88 commit f26ceaf

File tree

10 files changed

+189
-56
lines changed

10 files changed

+189
-56
lines changed

config/benchmark/loadline/probe_config.hjson

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
trace_processor: {
66
queries: [
77
"loadline/benchmark_score",
8+
"loadline/breakdown",
89
],
910
batch: false,
1011
},

config/benchmark/loadline/probe_config_experimental.hjson

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
trace_processor: {
44
queries: [
55
"loadline/benchmark_score",
6+
"loadline/breakdown",
67
"loadline/experimental/cpu",
78
"loadline/experimental/dom",
89
"loadline/experimental/interaction_latency",

crossbench/benchmarks/loadline/loadline.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,30 +33,46 @@ class LoadLineProbe(BenchmarkProbeMixin, Probe):
3333
BENCHMARK_NAME: str = "LoadLine"
3434
BENCHMARK_VERSION: str = ""
3535

36+
def __init__(self, *args, **kwargs):
37+
super().__init__(*args, **kwargs)
38+
self._scores_file: Optional[pth.LocalPath] = None
39+
self._breakdown_file: Optional[pth.LocalPath] = None
40+
3641
@override
3742
def log_browsers_result(self, group: BrowsersRunGroup) -> None:
38-
logging.info("-" * 80)
3943
logging.critical("%s Benchmark (%s)", self.BENCHMARK_NAME,
4044
self.BENCHMARK_VERSION)
41-
logging.critical("%s results:", self.BENCHMARK_NAME)
45+
logging.info("-" * 80)
46+
logging.critical("%s scores:", self.BENCHMARK_NAME)
47+
logging.critical(
48+
tabulate(
49+
pd.read_csv(self._scores_file), headers="keys", tablefmt="plain"))
4250
logging.info("- " * 40)
51+
logging.critical("%s breakdown (loading stage durations, in ms):",
52+
self.BENCHMARK_NAME)
4353
logging.critical(
4454
tabulate(
45-
pd.read_csv(
46-
group.get_local_probe_result_path(self).with_suffix(".csv")),
47-
headers="keys",
55+
pd.read_csv(self._breakdown_file), headers="keys",
4856
tablefmt="plain"))
4957

5058
@override
5159
def merge_browsers(self, group: BrowsersRunGroup) -> ProbeResult:
52-
csv_file = group.get_local_probe_result_path(self).with_suffix(".csv")
53-
self._compute_score(group).to_csv(csv_file)
54-
return LocalProbeResult(csv=(csv_file,))
60+
self._scores_file = group.get_local_probe_result_path(self).with_name(
61+
"benchmark_score.csv")
62+
self._compute_score(group).to_csv(self._scores_file)
63+
self._breakdown_file = group.get_local_probe_result_path(self).with_name(
64+
"breakdown.csv")
65+
self._compute_breakdown(group).to_csv(self._breakdown_file)
66+
return LocalProbeResult(csv=(self._scores_file, self._breakdown_file))
5567

5668
@abc.abstractmethod
5769
def _compute_score(self, group: BrowsersRunGroup) -> pd.DataFrame:
5870
pass
5971

72+
@abc.abstractmethod
73+
def _compute_breakdown(self, group: BrowsersRunGroup) -> pd.DataFrame:
74+
pass
75+
6076

6177
class LoadLinePageFilter(LoadingPageFilter):
6278
"""LoadLine benchmark for phone/tablet."""

crossbench/benchmarks/loadline/loadline_1.py

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from typing import TYPE_CHECKING, Type
88

9+
import logging
910
import numpy as np
1011
import pandas as pd
1112
from typing_extensions import override
@@ -29,6 +30,30 @@
2930
VERSION_STRING = "1.3.0"
3031

3132

33+
def process_scores(df: pd.DataFrame) -> pd.DataFrame:
34+
df = df.groupby(["cb_browser",
35+
"cb_story"])["score"].mean().reset_index().pivot(
36+
columns=["cb_story"],
37+
index=["cb_browser"],
38+
values=["score"])
39+
df = df.droplevel(0, axis=1)
40+
df["TOTAL_SCORE"] = np.exp(np.log(df).mean(axis=1))
41+
df.index.rename("browser", inplace=True)
42+
df = df.reindex(
43+
columns=(["TOTAL_SCORE"] +
44+
sorted(list(c for c in df.columns if c != "TOTAL_SCORE"))))
45+
return df
46+
47+
48+
def process_breakdown(df: pd.DataFrame) -> pd.DataFrame:
49+
df["os"] = df[["network", "process_launch"]].max(axis=1)
50+
df = df.groupby(["cb_browser", "cb_story"
51+
])[["os", "renderer", "compositor", "gpu",
52+
"surfaceflinger"]].mean()
53+
df.index.names = ["browser", "story"]
54+
return df
55+
56+
3257
class LoadLine1Probe(LoadLineProbe):
3358
NAME = "loadline_probe"
3459
BENCHMARK_NAME = "LoadLine"
@@ -38,29 +63,30 @@ class LoadLine1Probe(LoadLineProbe):
3863
def get_context_cls(self,) -> Type[LoadLine1ProbeContext]:
3964
return LoadLine1ProbeContext
4065

41-
@override
42-
def _compute_score(self, group: BrowsersRunGroup) -> pd.DataFrame:
66+
def _load_query_result(self, group: BrowsersRunGroup,
67+
query: str) -> pd.DataFrame:
4368
all_results = group.results.get_by_name(TraceProcessorProbe.NAME).csv_list
44-
loadline_result: pth.LocalPath | None = None
69+
query_result: pth.LocalPath | None = None
4570
for result in all_results:
46-
# Look for the trace processor query result.
47-
if result.name == "loadline_benchmark_score.csv":
48-
loadline_result = result
71+
if result.stem == query:
72+
query_result = result
4973
break
50-
assert loadline_result is not None, f"{self.NAME}: query result not found"
51-
52-
df = pd.read_csv(loadline_result)
53-
df = df.groupby(["cb_browser",
54-
"cb_story"])["score"].mean().reset_index().pivot(
55-
columns=["cb_story"],
56-
index=["cb_browser"],
57-
values=["score"])
58-
df = df.droplevel(0, axis=1)
59-
df["TOTAL_SCORE"] = np.exp(np.log(df).mean(axis=1))
60-
df.index.rename("browser", inplace=True)
61-
return df.reindex(
62-
columns=(["TOTAL_SCORE"] +
63-
sorted(list(c for c in df.columns if c != "TOTAL_SCORE"))))
74+
assert query_result is not None, f"{self.NAME}: {query} result not found"
75+
return pd.read_csv(query_result)
76+
77+
@override
78+
def _compute_score(self, group: BrowsersRunGroup) -> pd.DataFrame:
79+
df = self._load_query_result(group, "loadline_benchmark_score")
80+
return process_scores(df)
81+
82+
@override
83+
def _compute_breakdown(self, group: BrowsersRunGroup) -> pd.DataFrame:
84+
df = self._load_query_result(group, "loadline_breakdown")
85+
if any(df["network"] > df["process_launch"]):
86+
logging.warning("Some runs were affected by network latency. "
87+
"Results can be non-representative.")
88+
return process_breakdown(df)
89+
6490

6591

6692
class LoadLine1ProbeContext(ProbeContext[LoadLine1Probe]):

crossbench/benchmarks/loadline/loadline_2.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ def _compute_score(self, group: BrowsersRunGroup) -> pd.DataFrame:
5757
columns=(["TOTAL_SCORE"] +
5858
sorted(list(c for c in total.columns if c != "TOTAL_SCORE"))))
5959

60+
@override
61+
def _compute_breakdown(self, group: BrowsersRunGroup) -> pd.DataFrame:
62+
# TODO(crbug.com/425325733): Implement breakdown for LoadLine 2.
63+
return pd.DataFrame(index=pd.Index([], name="Not implemented"))
64+
6065

6166
class LoadLine2ProbeContext(ProbeContext[LoadLine2Probe]):
6267

crossbench/probes/perfetto/trace_processor/modules/ext/loadline_stages.sql

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,31 @@
1+
-- Create tables with page loading breakdown into stages for the LoadLine
2+
-- benchmark.
3+
-- TODO(crbug.com/425325733): Support LoadLine 2 as well.
4+
15
INCLUDE PERFETTO MODULE ext.loadline_benchmark;
26

37
DROP VIEW IF EXISTS loadline_presentation;
48
CREATE VIEW loadline_presentation AS
59
SELECT
610
first_navigation_start() + 60e9 / loadline_benchmark_score() AS presentation;
711

12+
-- Finds the "Commit sent" moment which is the time when the browser gets the
13+
-- response from the network stack.
814
DROP VIEW IF EXISTS loadline_request;
915
CREATE VIEW loadline_request AS
10-
SELECT ts AS start_request, ts + dur AS end_request
16+
SELECT MIN(ts) AS end_request
1117
FROM slice
1218
WHERE
13-
name = 'WillStartRequest'
14-
AND ts >= first_navigation_start()
15-
ORDER BY ts
16-
LIMIT 1;
19+
name = 'CommitSentToFirstSubresourceLoadStart'
20+
AND ts >= first_navigation_start();
1721

1822
DROP VIEW IF EXISTS loadline_renderer_ready;
1923
CREATE VIEW loadline_renderer_ready AS
20-
SELECT ts + dur AS renderer_ready
24+
SELECT MIN(ts) AS renderer_ready
2125
FROM slice
2226
WHERE
23-
name = 'ReadyToCommitNavigation'
24-
AND ts >= first_navigation_start()
25-
ORDER BY ts
26-
LIMIT 1;
27+
name = 'DocumentLoader::CommitNavigation'
28+
AND ts >= first_navigation_start();
2729

2830
-- Find the frame in the pipeline which was chosen as the "loading complete"
2931
-- moment for the purpose of LoadLine score. The exact end timestamp might
@@ -69,7 +71,6 @@ DROP VIEW IF EXISTS loadline_stages;
6971
CREATE VIEW loadline_stages AS
7072
SELECT
7173
first_navigation_start() AS navigation_start,
72-
start_request,
7374
end_request,
7475
renderer_ready,
7576
frame_commit,

crossbench/probes/perfetto/trace_processor/queries/loadline/breakdown.sql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@ INCLUDE PERFETTO MODULE ext.loadline_stages;
33
-- Reports durations of loadline stages in milliseconds.
44
-- Stages approximately correspond to the Chrome subsystem which is most
55
-- important for the page loading performance.
6+
-- Note that "network" and "process_launch" stages happen in parallel, so page
7+
-- load is only blocked on the longer of the two.
68
-- For more info on page loading process in Chrome, see the following docs:
79
-- https://chromium.googlesource.com/chromium/src/+/main/docs/navigation.md
810
-- https://chromium.googlesource.com/chromium/src/+/main/docs/life_of_a_frame.md
911
-- https://chromium.googlesource.com/chromium/src/+/main/components/page_load_metrics/
1012
SELECT
11-
(start_request - navigation_start) / 1e6 AS init,
12-
(end_request - start_request) / 1e6 AS network,
13-
(renderer_ready - end_request) / 1e6 AS launch,
14-
(frame_commit - renderer_ready) / 1e6 AS renderer,
13+
(end_request - navigation_start) / 1e6 AS network,
14+
(renderer_ready - navigation_start) / 1e6 AS process_launch,
15+
(frame_commit - MAX(renderer_ready, end_request)) / 1e6 AS renderer,
1516
(submit_compositor_frame - frame_commit) / 1e6 AS compositor,
1617
(frame_swap - submit_compositor_frame) / 1e6 AS gpu,
1718
(presentation - frame_swap) / 1e6 AS surfaceflinger

tests/crossbench/benchmarks/test_loadline.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,19 @@
1111
import datetime as dt
1212
from typing import Sequence
1313

14+
import pandas as pd
1415
from typing_extensions import override
1516

1617
from crossbench.action_runner.default_action_runner import DefaultActionRunner
1718
from crossbench.benchmarks.loadline import (LoadLine1PhoneBenchmark,
1819
LoadLine1TabletBenchmark)
20+
from crossbench.benchmarks.loadline import loadline_1
1921
from crossbench.benchmarks.loadline.loadline import LoadLinePageFilter
2022
from crossbench.benchmarks.loading.playback_controller import \
2123
PlaybackController
2224
from crossbench.benchmarks.loading.tab_controller import TabController
2325
from tests import test_helper
24-
from tests.crossbench.base import BaseCliTestCase
26+
from tests.crossbench.base import BaseCliTestCase, BaseCrossbenchTestCase
2527
from tests.crossbench.benchmarks.helper import SubStoryTestCase
2628

2729

@@ -100,8 +102,49 @@ def test_run_default_tablet(self):
100102
pass
101103

102104

105+
class TestLoadLine1Helpers(BaseCrossbenchTestCase):
106+
107+
def test_process_scores(self):
108+
query_result = pd.DataFrame(
109+
columns=["score", "cb_browser", "cb_story", "cb_temperature", "cb_run"],
110+
data=[[4, "chrome", "story1", 0, 0], [6, "chrome", "story1", 0, 1],
111+
[19, "chrome", "story2", 0, 0], [21, "chrome", "story2", 0, 1]])
112+
scores = loadline_1.process_scores(query_result)
113+
114+
self.assertEqual(scores.shape, (1, 3))
115+
self.assertAlmostEqual(scores["TOTAL_SCORE"][0], 10)
116+
self.assertAlmostEqual(scores["story1"][0], 5)
117+
self.assertAlmostEqual(scores["story2"][0], 20)
118+
119+
def test_process_breakdown(self):
120+
query_result = pd.DataFrame(
121+
columns=[
122+
"network", "process_launch", "renderer", "compositor", "gpu",
123+
"surfaceflinger", "cb_browser", "cb_story", "cb_temperature",
124+
"cb_run"
125+
],
126+
data=[[5, 3, 9, 11, 10, 10, "chrome", "story1", 0, 0],
127+
[5, 3, 11, 9, 10, 10, "chrome", "story1", 0, 1],
128+
[7, 10, 19, 21, 20, 20, "chrome", "story2", 0, 0],
129+
[7, 10, 21, 19, 20, 20, "chrome", "story2", 0, 1]])
130+
breakdown = loadline_1.process_breakdown(query_result)
131+
132+
self.assertEqual(breakdown.shape, (2, 5))
133+
self.assertAlmostEqual(breakdown["os"][0], 5)
134+
self.assertAlmostEqual(breakdown["os"][1], 10)
135+
self.assertAlmostEqual(breakdown["renderer"][0], 10)
136+
self.assertAlmostEqual(breakdown["renderer"][1], 20)
137+
self.assertAlmostEqual(breakdown["compositor"][0], 10)
138+
self.assertAlmostEqual(breakdown["compositor"][1], 20)
139+
self.assertAlmostEqual(breakdown["gpu"][0], 10)
140+
self.assertAlmostEqual(breakdown["gpu"][1], 20)
141+
self.assertAlmostEqual(breakdown["surfaceflinger"][0], 10)
142+
self.assertAlmostEqual(breakdown["surfaceflinger"][1], 20)
143+
144+
103145
# Don't expose abstract base test cases.
104146
del BaseLoadLineBenchmarkTestCase
147+
del BaseCrossbenchTestCase
105148
del BaseCliTestCase
106149
del SubStoryTestCase
107150

0 commit comments

Comments
 (0)