Skip to content

Commit 5cb0eb6

Browse files
refactor(benchmark): Restructure benchmark infrastructure and address review feedback
- Move dataset generation to setUp() method as suggested by @lrhn - Add support for variable input sizes via command-line arguments - Split benchmark code into three modular files: * benchmark_utils.dart: Reusable infrastructure (SortBenchmarkBase, DatasetGenerators, result printing) * legacy_quicksort.dart: Previous implementation for performance comparison * sort_benchmark.dart: Clean benchmark implementation using utilities - Rename "baseline" to "legacy" for clearer semantics - Make printResultsAsMarkdownTable() generic with configurable algorithm names - Fix table formatting with proper alignment and dynamic column widths - Add DatasetGenerators class with centralized dataset creation methods - Add "Nearly Sorted" benchmark pattern for more comprehensive testing - Include standard deviation in BenchmarkResult for statistical analysis This refactoring improves maintainability, reusability, and makes it easier to benchmark different sorting algorithms with various input patterns and sizes. Addresses feedback from PR #922 review.
1 parent 3960d63 commit 5cb0eb6

File tree

4 files changed

+499
-322
lines changed

4 files changed

+499
-322
lines changed
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// Reusable utilities for benchmarking sorting algorithms.
6+
library;
7+
8+
import 'dart:math';
9+
import 'package:benchmark_harness/benchmark_harness.dart';
10+
11+
// Sink variable to prevent the compiler from optimizing away benchmark code.
12+
int sink = 0;
13+
14+
/// The aggregated result of a benchmark run.
15+
class BenchmarkResult {
16+
final double mean;
17+
final int median;
18+
final double stdDev;
19+
final List<int> allTimes;
20+
21+
BenchmarkResult(this.mean, this.median, this.stdDev, this.allTimes);
22+
}
23+
24+
/// Base class for sorting benchmarks with dataset generation.
25+
abstract class SortBenchmarkBase extends BenchmarkBase {
26+
final int size;
27+
late final List<List<int>> _datasets;
28+
int _iteration = 0;
29+
int _checksum = 0;
30+
31+
SortBenchmarkBase(super.name, this.size);
32+
33+
/// Generate datasets for this benchmark condition.
34+
List<List<int>> generateDatasets();
35+
36+
@override
37+
void setup() {
38+
_datasets = generateDatasets();
39+
}
40+
41+
/// Get the next list to sort (creates a copy).
42+
List<int> get nextList {
43+
final dataset = _datasets[_iteration];
44+
_iteration++;
45+
if (_iteration == _datasets.length) _iteration = 0;
46+
return dataset.toList();
47+
}
48+
49+
/// Update checksum to prevent compiler optimization.
50+
void updateChecksum(List<int> list) {
51+
sink ^= list.first ^ list.last ^ list[list.length >> 1] ^ _checksum++;
52+
}
53+
54+
/// The core sorting operation to benchmark.
55+
void performSort();
56+
57+
@override
58+
void run() => performSort();
59+
}
60+
61+
/// Data pattern generators for consistent testing.
62+
class DatasetGenerators {
63+
/// Generate random integer lists.
64+
static List<List<int>> random(int size, {int count = 128, int? seed}) {
65+
final r = Random(seed ?? 12345);
66+
return List.generate(
67+
count, (_) => List.generate(size, (_) => r.nextInt(size)));
68+
}
69+
70+
/// Generate sorted lists.
71+
static List<List<int>> sorted(int size) {
72+
return [List.generate(size, (i) => i, growable: true)];
73+
}
74+
75+
/// Generate reverse-sorted lists.
76+
static List<List<int>> reverse(int size) {
77+
return [List.generate(size, (i) => size - i - 1, growable: true)];
78+
}
79+
80+
/// Generate lists with few unique values.
81+
static List<List<int>> fewUnique(int size,
82+
{int uniqueCount = 7, int count = 128, int? seed}) {
83+
final r = Random(seed ?? 67890);
84+
return List.generate(
85+
count, (_) => List.generate(size, (_) => r.nextInt(uniqueCount)));
86+
}
87+
88+
/// Generate pathological input (worst-case for naive quicksort).
89+
/// Contains even-indexed elements followed by odd-indexed in reverse.
90+
static List<List<int>> pathological(int size) {
91+
final sorted = List.generate(size, (i) => i, growable: false);
92+
final secondLoopStart = (size - 1).isOdd ? size - 1 : size - 2;
93+
final pathological = [
94+
for (var i = 0; i < size; i += 2) sorted[i],
95+
for (var i = secondLoopStart; i > -1; i -= 2) sorted[i],
96+
];
97+
return [pathological];
98+
}
99+
100+
/// Generate nearly sorted lists (only a few elements out of place).
101+
static List<List<int>> nearlySorted(int size,
102+
{double swapPercent = 0.05, int count = 128, int? seed}) {
103+
final r = Random(seed ?? 11111);
104+
return List.generate(count, (_) {
105+
final list = List.generate(size, (i) => i, growable: true);
106+
final numSwaps = (size * swapPercent).round();
107+
for (var i = 0; i < numSwaps; i++) {
108+
final idx1 = r.nextInt(size);
109+
final idx2 = r.nextInt(size);
110+
final temp = list[idx1];
111+
list[idx1] = list[idx2];
112+
list[idx2] = temp;
113+
}
114+
return list;
115+
});
116+
}
117+
}
118+
119+
/// Run a benchmark multiple times and collect statistics.
120+
BenchmarkResult runBenchmark(SortBenchmarkBase benchmark, int samples) {
121+
final times = <int>[];
122+
123+
// Setup datasets
124+
benchmark.setup();
125+
126+
// Warmup runs (not timed)
127+
for (var i = 0; i < 3; i++) {
128+
benchmark.run();
129+
}
130+
131+
// Timed runs
132+
for (var i = 0; i < samples; i++) {
133+
final stopwatch = Stopwatch()..start();
134+
benchmark.run();
135+
stopwatch.stop();
136+
times.add(stopwatch.elapsedMicroseconds);
137+
}
138+
139+
times.sort();
140+
final mean = times.reduce((a, b) => a + b) / samples;
141+
final median = times[samples >> 1];
142+
143+
// Calculate standard deviation
144+
final variance =
145+
times.map((t) => pow(t - mean, 2)).reduce((a, b) => a + b) / samples;
146+
final stdDev = sqrt(variance);
147+
148+
return BenchmarkResult(mean, median, stdDev, times);
149+
}
150+
151+
/// Print benchmark results as a markdown table.
152+
///
153+
/// [baselineName] and [comparisonName] are the labels for the
154+
/// two implementations
155+
/// being compared (e.g., "Legacy", "pdqsort", "MergeSort", etc.).
156+
void printResultsAsMarkdownTable(
157+
Map<String, (BenchmarkResult, BenchmarkResult)> results, int size,
158+
{required String baselineName,
159+
required String comparisonName,
160+
bool showStdDev = false}) {
161+
final separator = '=' * 100;
162+
print('\n$separator');
163+
print('Benchmark Results (Size: $size): $comparisonName vs. $baselineName');
164+
print(separator);
165+
166+
// Calculate dynamic column widths based on name lengths
167+
final baselineColWidth = max(baselineName.length + 5, 13);
168+
final comparisonColWidth = max(comparisonName.length + 5, 13);
169+
170+
final baselineHeader = '$baselineName (µs)'.padRight(baselineColWidth);
171+
final comparisonHeader = '$comparisonName (µs)'.padRight(comparisonColWidth);
172+
173+
if (showStdDev) {
174+
print(
175+
'''| Data Condition | $baselineHeader | $comparisonHeader | Improvement | StdDev |''');
176+
print(
177+
'''| :------------------ | :${'-' * (baselineColWidth - 2)}: | :${'-' * (comparisonColWidth - 2)}: | :---------: | :-----------: |''');
178+
} else {
179+
print(
180+
'''| Data Condition | $baselineHeader | $comparisonHeader | Improvement | Winner |''');
181+
print(
182+
'''| :------------------ | :${'-' * (baselineColWidth - 2)}: | :${'-' * (comparisonColWidth - 2)}: | :---------: | :-------------: |''');
183+
}
184+
185+
print(
186+
'''| **Mean** | ${' ' * baselineColWidth} | ${' ' * comparisonColWidth} | | |''');
187+
188+
for (final entry in results.entries) {
189+
final condition = entry.key;
190+
final (baseline, comparison) = entry.value;
191+
192+
final improvement = (baseline.mean - comparison.mean) / baseline.mean * 100;
193+
final improvementString =
194+
'${improvement > 0 ? '+' : ''}${improvement.toStringAsFixed(2)}%';
195+
final baselineMean = baseline.mean.round().toString();
196+
final comparisonMean = comparison.mean.round().toString();
197+
198+
if (showStdDev) {
199+
final stdDevString =
200+
'${baseline.stdDev.round()}/${comparison.stdDev.round()}';
201+
print(
202+
'''| ${condition.padRight(19)} | ${baselineMean.padLeft(baselineColWidth)} | ${comparisonMean.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${stdDevString.padLeft(13)} |''');
203+
} else {
204+
final winner = improvement > 0 ? '$comparisonName 🚀' : baselineName;
205+
print(
206+
'''| ${condition.padRight(19)} | ${baselineMean.padLeft(baselineColWidth)} | ${comparisonMean.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${winner.padLeft(15)} |''');
207+
}
208+
}
209+
210+
print(
211+
'''| **Median** | ${' ' * baselineColWidth} | ${' ' * comparisonColWidth} | | |''');
212+
213+
for (final entry in results.entries) {
214+
final condition = entry.key;
215+
final (baseline, comparison) = entry.value;
216+
217+
final improvement =
218+
(baseline.median - comparison.median) / baseline.median * 100;
219+
final improvementString =
220+
'${improvement > 0 ? '+' : ''}${improvement.toStringAsFixed(2)}%';
221+
final baselineMedian = baseline.median.toString();
222+
final comparisonMedian = comparison.median.toString();
223+
224+
if (showStdDev) {
225+
print(
226+
'''| ${condition.padRight(19)} | ${baselineMedian.padLeft(baselineColWidth)} | ${comparisonMedian.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${' '.padLeft(13)} |''');
227+
} else {
228+
final winner = improvement > 0 ? '$comparisonName 🚀' : baselineName;
229+
print(
230+
'''| ${condition.padRight(19)} | ${baselineMedian.padLeft(baselineColWidth)} | ${comparisonMedian.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${winner.padLeft(15)} |''');
231+
}
232+
}
233+
234+
print(separator);
235+
}

pkgs/collection/benchmark/dataset_generator.dart

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)