Skip to content

Commit 11856b0

Browse files
Merge pull request #798 from square/sedwards/add-high-freq-benchmark
Add High Frequency Event Tests and Benchmarks and cleanup
2 parents f8d97d9 + b81fd1f commit 11856b0

File tree

12 files changed

+745
-523
lines changed

12 files changed

+745
-523
lines changed

benchmarks/performance-poetry/complex-benchmark/src/main/java/com/squareup/benchmarks/performance/complex/poetry/benchmark/ComplexPoetryBenchmarks.kt

Lines changed: 146 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,9 @@ import org.junit.runner.RunWith
3030
* [benchmarkStartup] will measure startup times with different compilation modes.
3131
* The above can be run as tests using Full, Partial, or No aot compiling on the app.
3232
*
33-
* [benchmarkNodeAndRenderPassTraceSections] will measure the trace timings instrumented via the
34-
* [PerformanceTracingInterceptor] installed by default in the Workflow tree.
35-
*
36-
* [benchmarkLatencyTraceSections] will measure the time between a UI event to producing a
37-
* Rendering and the time between the Rendering and the Choreographer rendering the frame.
33+
* For the rest of the benchmarks, see individual kdoc.
3834
*/
35+
@OptIn(ExperimentalMetricApi::class)
3936
@RunWith(AndroidJUnit4::class)
4037
class ComplexPoetryBenchmarks {
4138
@get:Rule
@@ -83,31 +80,49 @@ class ComplexPoetryBenchmarks {
8380
* a Nexus 6.
8481
*/
8582
@Test fun benchmarkNodeAndRenderPassTraceSectionsFullAot() {
86-
benchmarkNodeAndRenderPassTraceSections(CompilationMode.Full())
83+
benchmarkNodeAndRenderPassTraceSections()
8784
}
8885

89-
@OptIn(ExperimentalMetricApi::class)
90-
private fun benchmarkNodeAndRenderPassTraceSections(compilationMode: CompilationMode) {
91-
val traceMetricsList = NODES_TO_TRACE.flatMap { node ->
92-
List(RENDER_PASSES + 1) { i ->
93-
val passNumber = i.toString()
94-
.padStart(PerformanceTracingInterceptor.RENDER_PASS_DIGITS, '0')
95-
val sectionName = "${passNumber}_Render_Pass_Node_${node.first}_"
96-
TraceSectionMetric(sectionName)
97-
}
98-
} + List(RENDER_PASSES + 1) { i ->
99-
val passNumber = i.toString()
86+
/**
87+
* If you thought the test above was long, this one is 10x as long because of how
88+
* many render pass sections we are tracing (though we try to sample only half of them).
89+
*/
90+
@Test fun benchmarkNodeAndRenderPassTraceSectionsFullAotHighFrequency() {
91+
benchmarkNodeAndRenderPassTraceSections(iterations = 5, useHighFrequencyEvents = true)
92+
}
93+
94+
private fun benchmarkNodeAndRenderPassTraceSections(
95+
iterations: Int = 10,
96+
useHighFrequencyEvents: Boolean = false
97+
) {
98+
val renderPasses =
99+
if (useHighFrequencyEvents) RENDER_PASSES_HIGH_FREQUENCY / 2 else RENDER_PASSES
100+
val traceMetricsList = List(renderPasses + 1) { i ->
101+
// Just sample every other render pass for high frequency.
102+
val index = if (useHighFrequencyEvents) i * 2 else i
103+
val passNumber = index.toString()
100104
.padStart(PerformanceTracingInterceptor.RENDER_PASS_DIGITS, '0')
101105
val sectionName = "${passNumber}_Render_Pass_"
102106
TraceSectionMetric(sectionName)
107+
}.toMutableList()
108+
if (!useHighFrequencyEvents) {
109+
// If we don't have high frequency events then trace each node.
110+
traceMetricsList += NODES_TO_TRACE.flatMap { node ->
111+
List(renderPasses + 1) { i ->
112+
val passNumber = i.toString()
113+
.padStart(PerformanceTracingInterceptor.RENDER_PASS_DIGITS, '0')
114+
val sectionName = "${passNumber}_Render_Pass_Node_${node.first}_"
115+
TraceSectionMetric(sectionName)
116+
}
117+
}
103118
}
104119

105120
benchmarkRule.measureRepeated(
106121
packageName = PACKAGE_NAME,
107122
metrics = traceMetricsList,
108-
iterations = 20,
123+
iterations = iterations,
109124
startupMode = StartupMode.WARM,
110-
compilationMode = compilationMode,
125+
compilationMode = CompilationMode.Full(),
111126
setupBlock = {
112127
pressHome()
113128
device.landscapeOrientation()
@@ -117,6 +132,12 @@ class ComplexPoetryBenchmarks {
117132
intent.apply {
118133
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_INITIALIZING, true)
119134
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_RENDERING, true)
135+
if (useHighFrequencyEvents) {
136+
putExtra(
137+
PerformancePoetryActivity.EXTRA_PERF_CONFIG_REPEAT,
138+
PerformancePoetryActivity.HIGH_FREQUENCY_REPEAT_COUNT
139+
)
140+
}
120141
}
121142
}
122143
device.landscapeOrientation()
@@ -125,15 +146,15 @@ class ComplexPoetryBenchmarks {
125146
}
126147
}
127148

128-
/**
129-
* Another LONG test.
130-
*/
131-
@Test fun benchmarkLatencyTraceSectionsFullAot() {
132-
benchmarkLatencyTraceSections(CompilationMode.Full())
133-
}
149+
@Test fun benchmarkActionTraceSectionsRegularFrequency() = benchmarkActionTraceSections()
150+
151+
@Test fun benchmarkActionTraceSectionsHighFrequency() = benchmarkActionTraceSections(
152+
highFrequency = true,
153+
iterations = 5
154+
)
134155

135156
/**
136-
* This test is focused on measuring the latency of Workflow's 'work'. By that we mean how much
157+
* This test is focused on measuring the length of Workflow's 'work'. By that we mean how much
137158
* time Workflow spends producing the next set of updates to the View's that can be passed
138159
* to Android to draw as a frame.
139160
*
@@ -152,28 +173,98 @@ class ComplexPoetryBenchmarks {
152173
* - "E-<Screen>-<Event>-XX"
153174
*
154175
* Where XX is the counted instance of that event's response.
176+
*
177+
* This only works to measure 'latency' if we handle each event -> render() cycle in one main
178+
* thread message as we are taking advantage of that fact in setting up the trace sections.
179+
* If this is not the case and we care most about latency itself, then use
180+
* [benchmarkLatencyWithFrameCallbacks()] as an alternate heuristic.
181+
*
182+
* However, if we just want to measure the total amount of time Workflow 'works' for the
183+
* scenario under test, then we can add up all these action handling sections as long
184+
* as they are all being recorded in the sections map - [ACTION_TRACE_SECTIONS]. Note that
185+
* this is an iterative process for the scenario and after any change to how the Workflow
186+
* runtime processes actions as we will need to record all the different action processing
187+
* labels and ensure they are included in the map. To validate we cover all of Workflow's work
188+
* it is a good idea to add in tracing for all main thread messages and take a look at the
189+
* Perfetto trace to see if there is any 'work' we are missing. We can do that by adding
190+
* [PerformancePoetryActivity.EXTRA_TRACE_ALL_MAIN_THREAD_MESSAGES].
155191
*/
156-
@OptIn(ExperimentalMetricApi::class)
157-
fun benchmarkLatencyTraceSections(compilationMode: CompilationMode) {
158-
fun addLatency(intent: Intent) {
192+
private fun benchmarkActionTraceSections(
193+
highFrequency: Boolean = false,
194+
iterations: Int = 10
195+
) {
196+
fun addActionTracing(intent: Intent) {
159197
intent.apply {
198+
if (highFrequency) {
199+
putExtra(
200+
PerformancePoetryActivity.EXTRA_PERF_CONFIG_REPEAT,
201+
PerformancePoetryActivity.HIGH_FREQUENCY_REPEAT_COUNT
202+
)
203+
}
160204
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_INITIALIZING, true)
161-
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_EVENT_LATENCY, true)
205+
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_ACTION_TRACING, true)
162206
}
163207
}
164208

209+
val traceSections =
210+
if (highFrequency) ACTION_TRACE_SECTIONS_HIGH_FREQUENCY else ACTION_TRACE_SECTIONS
211+
165212
benchmarkRule.measureRepeated(
166213
packageName = PACKAGE_NAME,
167-
metrics = LATENCY_TRACE_SECTIONS.map { TraceSectionMetric(it) },
168-
iterations = 20,
214+
metrics = traceSections.map { TraceSectionMetric(it) },
215+
iterations = iterations,
169216
startupMode = StartupMode.WARM,
170-
compilationMode = compilationMode,
217+
compilationMode = CompilationMode.Full(),
171218
setupBlock = {
172219
pressHome()
173220
device.landscapeOrientation()
174221
}
175222
) {
176-
startActivityAndWait(::addLatency)
223+
startActivityAndWait(::addActionTracing)
224+
device.landscapeOrientation()
225+
device.waitForIdle()
226+
227+
device.openRavenAndNavigate()
228+
}
229+
}
230+
231+
/**
232+
* This measures latency using the Papa method where for each rendering we trace from the start
233+
* of the interaction event to the time after Choreographer has painted the changes using a
234+
* frame callback. Note that these *include* the time to paint the first frame by Choreographer.
235+
*
236+
* These are still helpful though if we process in the Workflow runtime with multiple main thread
237+
* messages (e.g. processing multiple action selects).
238+
*
239+
* This also does not include the time that the Workflow runtime uses to handle the actual UI
240+
* interaction event and produce a new rendering. In practice for this application the render
241+
* pass times are < 1ms as we don't have a lot of render() pathologies and work. There isn't
242+
* anything we need to track there either as optimization for this is covered by reducing the
243+
* # of total render passes, and specifically for stale nodes.
244+
*
245+
* What this gives us is a high level look at whether processing multiple actions or other such
246+
* optimizations will increase the overall latency.
247+
*/
248+
@Test fun benchmarkLatencyWithFrameCallbacks() {
249+
fun addLatencyTracing(intent: Intent) {
250+
intent.apply {
251+
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_INITIALIZING, true)
252+
putExtra(PerformancePoetryActivity.EXTRA_PERF_CONFIG_FRAME_LATENCY, true)
253+
}
254+
}
255+
256+
benchmarkRule.measureRepeated(
257+
packageName = PACKAGE_NAME,
258+
metrics = FRAME_LATENCY_TRACE_SECTIONS.map { TraceSectionMetric(it) },
259+
iterations = 10,
260+
startupMode = StartupMode.WARM,
261+
compilationMode = CompilationMode.Full(),
262+
setupBlock = {
263+
pressHome()
264+
device.landscapeOrientation()
265+
}
266+
) {
267+
startActivityAndWait(::addLatencyTracing)
177268
device.landscapeOrientation()
178269
device.waitForIdle()
179270

@@ -182,37 +273,33 @@ class ComplexPoetryBenchmarks {
182273
}
183274

184275
companion object {
185-
const val RENDER_PASSES = 58
276+
const val RENDER_PASSES = 57
277+
const val RENDER_PASSES_HIGH_FREQUENCY = 181
186278
const val PACKAGE_NAME = "com.squareup.benchmarks.performance.complex.poetry"
187279

188-
val LATENCY_TRACE_SECTIONS = listOf(
280+
val ACTION_TRACE_SECTIONS = listOf(
189281
"E-PoemList-PoemSelected-00",
190282
"Worker-ComplexCallBrowser(2)-Finished-00",
191-
"Worker-PoemLoading-Finished-00",
192283
"E-StanzaList-StanzaSelected-00",
193-
"Worker-PoemLoading-Finished-01",
194-
"E-StanzaWorkflow-ShowNextStanza-00",
195-
"Worker-PoemLoading-Finished-02",
196-
"E-StanzaWorkflow-ShowNextStanza-01",
197-
"Worker-PoemLoading-Finished-03",
198-
"E-StanzaWorkflow-ShowNextStanza-02",
199-
"Worker-PoemLoading-Finished-04",
200-
"E-StanzaWorkflow-ShowNextStanza-03",
201-
"Worker-PoemLoading-Finished-05",
202-
"E-StanzaWorkflow-ShowNextStanza-04",
203-
"Worker-PoemLoading-Finished-06",
204-
"E-StanzaWorkflow-ShowPreviousStanza-00",
205-
"Worker-PoemLoading-Finished-07",
206-
"E-StanzaWorkflow-ShowPreviousStanza-01",
207-
"Worker-PoemLoading-Finished-08",
208-
"E-StanzaWorkflow-ShowPreviousStanza-02",
209-
"Worker-PoemLoading-Finished-09",
210-
"E-StanzaWorkflow-ShowPreviousStanza-03",
211-
"Worker-PoemLoading-Finished-10",
212-
"E-StanzaWorkflow-ShowPreviousStanza-04",
213-
"Worker-PoemLoading-Finished-11",
214284
"E-StanzaList-Exit-00",
215285
"Worker-ComplexCallBrowser(-1)-Finished-00",
216-
)
286+
) + (0..11).map {
287+
"Worker-PoemLoading-Finished-${it.pad()}"
288+
} + (0..4).map {
289+
"E-StanzaWorkflow-ShowNextStanza-${it.pad()}"
290+
} + (0..4).map {
291+
"E-StanzaWorkflow-ShowPreviousStanza-${it.pad()}"
292+
}
293+
294+
val ACTION_TRACE_SECTIONS_HIGH_FREQUENCY = ACTION_TRACE_SECTIONS + (0..250).map {
295+
"Worker-EventRepetition-Finished-${it.pad()}"
296+
}
297+
298+
val FRAME_LATENCY_TRACE_SECTIONS = (0..27).map {
299+
"Frame-Latency-${it.pad()}_"
300+
}
301+
302+
private fun Int.pad() =
303+
toString().padStart(3, '0')
217304
}
218305
}

0 commit comments

Comments
 (0)