From fc41882dc775b71e702a9ec3e2dcf17275d9e84c Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Wed, 12 Mar 2025 14:36:22 -0700 Subject: [PATCH 1/3] Try to cool down --- .../benchmark/android-llm-device-farm-test-spec.yml.j2 | 4 ++++ .../main/java/org/pytorch/minibench/BenchmarkActivity.java | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 index 1ed5ede738c..74d874b2661 100644 --- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 +++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 @@ -109,6 +109,10 @@ phases: - | adb -s $DEVICEFARM_DEVICE_UDID shell am force-stop org.pytorch.minibench + adb -s $DEVICEFARM_DEVICE_UDID shell dumpsys deviceidle force-idle + adb -s $DEVICEFARM_DEVICE_UDID shell dumpsys deviceidle unforce + adb -s $DEVICEFARM_DEVICE_UDID shell sleep 10 + if [ -n "$BIN_FOUND" ]; then adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.LlmBenchmarkActivity \ --es "model_dir" "/data/local/tmp/minibench" \ diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 238e05f8c54..5e4c3bfc34a 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -85,11 +85,11 @@ protected void onPostExecute(Void aVoid) { // The list of metrics we have atm includes: // Avg inference latency after N iterations // Currently the result has large variance from outliers, so only use - // 80% samples in the middle (trimmean 0.2) + // 100% samples in the middle (trimmean 0) Collections.sort(stats.latency); int resultSize = stats.latency.size(); List usedLatencyResults = - stats.latency.subList(resultSize / 10, resultSize * 9 / 10); + stats.latency.subList(0, resultSize); results.add( new BenchmarkMetric( From 6fc599dd2dca90003d87074908714bb0483e467d Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Wed, 12 Mar 2025 16:04:44 -0700 Subject: [PATCH 2/3] Update --- .../java/org/pytorch/minibench/BenchmarkActivity.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 5e4c3bfc34a..f0223941462 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -85,18 +85,25 @@ protected void onPostExecute(Void aVoid) { // The list of metrics we have atm includes: // Avg inference latency after N iterations // Currently the result has large variance from outliers, so only use - // 100% samples in the middle (trimmean 0) + // 80% samples in the middle (trimmean 0.2) Collections.sort(stats.latency); int resultSize = stats.latency.size(); List usedLatencyResults = - stats.latency.subList(0, resultSize); + stats.latency.subList(resultSize / 10, resultSize * 9 / 10); results.add( new BenchmarkMetric( benchmarkModel, "avg_inference_latency(ms)", + stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), + 0.0f)); + results.add( + new BenchmarkMetric( + benchmarkModel, + "trimmean_inference_latency(ms)", usedLatencyResults.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); + // Model load time results.add( new BenchmarkMetric( From d08d4aed954ff4e5d42f33a17132643fee8732a6 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Wed, 12 Mar 2025 16:06:08 -0700 Subject: [PATCH 3/3] Update --- .../src/main/java/org/pytorch/minibench/BenchmarkActivity.java | 1 - 1 file changed, 1 deletion(-) diff --git a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index f0223941462..78830d5a54d 100644 --- a/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -103,7 +103,6 @@ protected void onPostExecute(Void aVoid) { "trimmean_inference_latency(ms)", usedLatencyResults.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); - // Model load time results.add( new BenchmarkMetric(