Skip to content

Commit aeb7408

Browse files
authored
Improve benchmark tools (#159)
* Update the benchmark.json format to be more easily decodable. The new type can decode the previous format to enable comparisons with older versions. * Move existing benchmark tools to a local Swift package * Improve the benchmark diff calculations * Add additional benchmark tools to measure specific swift-docc commits * Remove fixed TODO * Add a basic tool to visualize trends across multiple benchmarks * Fix argument parsing issue with benchmark measure-commits tool * Add option to write benchmark diff results to a JSON file * Add flag to recompute output-size metrics missing in older checkouts * Remove outdated code comment * Update conceptual benchmarking documentation * Update benchmark.json OpenAPI spec * Update benchmark commands in conceptual documentation * Fix bug where the metrics were gathered for the wrong docc executable * Fix bug where change percentages wasn't reported correctly. * Include trend bars of all the values when samples show signs of bias * Handle input bias special case where all values are practically same * Silence a few warnings by explicitly specifying `self` * Fix formatting of percentage value * Flip order of compare-against-commit results in diff * Customize the names of the before and after columns in the diff table * Adapt test for legacy benchmark decoding to avoid locale differences * Rename compare-against-commit to `compare-to` * Rephrase warning about possible bias in input data to be actionable Also, add footnote for each conclussion that a human needs to check * Fix whitespace in license headers * Update bin/test to validate new location for benchmark tools * Use alternative formatting of durations for non-Darwin platforms * Remove benchmark check from bin/test The CI had issues with the local package * Add test that extra unknown metrics in legacy format are decoded. * Create missing intermediate directories before writing json output * Change docc arguments to `Option` to improve `measure-commits` CLI * Address code review feedback: - Rephrase statistical significance explanations - Give context to full precision values - Use bold text (where supported) to benchmark output - Use color (where supported) to emphasize diff changes * Remove restriction on number of allowed footnotes the DiffResultsTable * Fix index-out-of-bounds access when diff displays many footnotes
1 parent 10994be commit aeb7408

38 files changed

+2324
-538
lines changed

Sources/SwiftDocC/Benchmark/Benchmark.swift

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,28 +57,33 @@ public class Benchmark: Encodable {
5757
case date, metrics, arguments, platform
5858
}
5959

60-
public func encode(to encoder: Encoder) throws {
61-
var container = encoder.container(keyedBy: CodingKeys.self)
60+
/// Prepare the gathered measurements into a benchmark results.
61+
///
62+
/// The prepared benchmark results are sorted in a stable order that's suitable for presentation.
63+
///
64+
/// - Returns: The prepared benchmark results for all the gathered metrics.
65+
public func results() -> BenchmarkResults? {
66+
guard isEnabled else { return nil }
6267

63-
let dateFormatter = DateFormatter()
64-
dateFormatter.dateStyle = .medium
65-
dateFormatter.timeStyle = .medium
66-
try container.encode(dateFormatter.string(from: date), forKey: .date)
67-
68-
let arguments = Array(CommandLine.arguments.dropFirst())
69-
try container.encode(arguments, forKey: .arguments)
70-
71-
try container.encode(platform, forKey: .platform)
72-
73-
let metrics = self.metrics.compactMap { log -> BenchmarkResult? in
68+
let metrics = metrics.compactMap { log -> BenchmarkResults.Metric? in
7469
guard let result = log.result else {
7570
return nil
7671
}
7772
let id = (log as? DynamicallyIdentifiableMetric)?.identifier ?? type(of: log).identifier
7873
let displayName = (log as? DynamicallyIdentifiableMetric)?.displayName ?? type(of: log).displayName
79-
return BenchmarkResult(identifier: id, displayName: displayName, result: result)
74+
return .init(id: id, displayName: displayName, value: result)
8075
}
81-
try container.encode(metrics, forKey: .metrics)
76+
77+
return BenchmarkResults(
78+
platformName: platform,
79+
timestamp: date,
80+
doccArguments: Array(CommandLine.arguments.dropFirst()),
81+
unorderedMetrics: metrics
82+
)
83+
}
84+
85+
public func encode(to encoder: Encoder) throws {
86+
try results()?.encode(to: encoder)
8287
}
8388
}
8489

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2022 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See https://swift.org/LICENSE.txt for license information
8+
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
import Foundation
12+
13+
/// The results of a single benchmark run.
14+
public struct BenchmarkResults: Codable {
15+
/// The name of the platform where the benchmark ran.
16+
public let platformName: String
17+
18+
/// The timestamp for when the benchmark started.
19+
public let timestamp: Date
20+
21+
/// The arguments that Swift-DocC ran with when gathering benchmark data.
22+
public let doccArguments: [String]
23+
24+
/// Creates a new benchmark result to gather measurement results into.
25+
///
26+
/// - Parameters:
27+
/// - platformName: The name of the platform that the benchmark ran on.
28+
/// - timestamp: The timestamp when benchmark started.
29+
/// - doccArguments: The arguments that Swift-DocC ran with when gathering benchmark data.
30+
/// - unorderedMetrics: A list of unordered metrics for this benchmark.
31+
public init(
32+
platformName: String,
33+
timestamp: Date = Date(),
34+
doccArguments: [String] = Array(CommandLine.arguments.dropFirst()),
35+
unorderedMetrics: [Metric]
36+
) {
37+
self.platformName = platformName
38+
self.timestamp = timestamp
39+
self.doccArguments = doccArguments
40+
self.metrics = Self.sortedMetrics(unorderedMetrics)
41+
}
42+
43+
/// A private convenience method for sorting metrics in a stable order based on their
44+
fileprivate static func sortedMetrics(_ unorderedMetrics: [Metric]) -> [Metric] {
45+
// Sort by value type and then by name
46+
return unorderedMetrics.sorted { (lhs, rhs) in
47+
if lhs.value.kindSortOrder == rhs.value.kindSortOrder {
48+
return lhs.displayName < rhs.displayName
49+
} else {
50+
return lhs.value.kindSortOrder < rhs.value.kindSortOrder
51+
}
52+
}
53+
}
54+
55+
/// The list of metrics gathered in this benchmark.
56+
///
57+
/// - Note: The metrics are sorted based on presentation priority.
58+
public var metrics: [Metric]
59+
60+
/// A gathered metric.
61+
public struct Metric: Codable, Equatable {
62+
/// The ID for this gathered metric.
63+
///
64+
/// Use the ID to match up metrics across benchmarks to compare results.
65+
public var id: String
66+
/// The name that describe this gathered metric. Suitable for presentation.
67+
public var displayName: String
68+
/// The gathered value for this metric.
69+
public var value: Value
70+
71+
/// Creates a new metric to represent gathered measurements.
72+
/// - Parameters:
73+
/// - id: The ID for this gathered metric.
74+
/// - displayName: The name that describe this gathered metric.
75+
/// - value: The gathered value for this metric.
76+
public init(id: String, displayName: String, value: BenchmarkResults.Metric.Value) {
77+
self.id = id
78+
self.displayName = displayName
79+
self.value = value
80+
}
81+
82+
/// The gathered value for a metric.
83+
public enum Value: Codable, Equatable {
84+
/// A duration in seconds.
85+
case duration(Double)
86+
/// A number of bytes for a memory measurement.
87+
case bytesInMemory(Int64)
88+
/// A number of bytes for a storage measurement.
89+
case bytesOnDisk(Int64)
90+
/// A checksum.
91+
case checksum(String)
92+
93+
enum CodingKeys: CodingKey {
94+
case type, value
95+
}
96+
private enum ValueKind: String, Codable {
97+
case duration, bytesInMemory, bytesOnDisk, checksum
98+
}
99+
100+
public init(from decoder: Decoder) throws {
101+
let container = try decoder.container(keyedBy: CodingKeys.self)
102+
103+
switch try container.decode(ValueKind.self, forKey: .type) {
104+
case .bytesInMemory:
105+
self = try .bytesInMemory(container.decode(Int64.self, forKey: .value))
106+
case .bytesOnDisk:
107+
self = try .bytesOnDisk(container.decode(Int64.self, forKey: .value))
108+
case .duration:
109+
self = try .duration(container.decode(Double.self, forKey: .value))
110+
case .checksum:
111+
self = try .checksum(container.decode(String.self, forKey: .value))
112+
}
113+
}
114+
115+
public func encode(to encoder: Encoder) throws {
116+
var container = encoder.container(keyedBy: CodingKeys.self)
117+
118+
switch self {
119+
case .bytesInMemory(let value):
120+
try container.encode(ValueKind.bytesInMemory, forKey: .type)
121+
try container.encode(value, forKey: .value)
122+
case .bytesOnDisk(let value):
123+
try container.encode(ValueKind.bytesOnDisk, forKey: .type)
124+
try container.encode(value, forKey: .value)
125+
case .duration(let value):
126+
try container.encode(ValueKind.duration, forKey: .type)
127+
try container.encode(value, forKey: .value)
128+
case .checksum(let value):
129+
try container.encode(ValueKind.checksum, forKey: .type)
130+
try container.encode(value, forKey: .value)
131+
}
132+
}
133+
}
134+
}
135+
}
136+
137+
// MARK: Legacy format
138+
139+
extension BenchmarkResults {
140+
private enum LegacyCodingKeys: String, CodingKey {
141+
case platform, date, arguments, metrics
142+
}
143+
144+
private enum LegacyMetricCodingKeys: String, CodingKey {
145+
case identifier, displayName, result
146+
}
147+
148+
public enum CodingKeys: CodingKey {
149+
case platformName, timestamp, doccArguments, metrics
150+
}
151+
152+
public init(from decoder: Decoder) throws {
153+
let container = try decoder.container(keyedBy: CodingKeys.self)
154+
155+
if container.contains(.platformName) {
156+
self.platformName = try container.decode(String.self, forKey: .platformName)
157+
self.timestamp = try container.decode(Date.self, forKey: .timestamp)
158+
self.doccArguments = try container.decode([String].self, forKey: .doccArguments)
159+
self.metrics = try container.decode([Metric].self, forKey: .metrics)
160+
} else {
161+
// Legacy format
162+
let container = try decoder.container(keyedBy: LegacyCodingKeys.self)
163+
164+
self.platformName = try container.decode(String.self, forKey: .platform)
165+
166+
let dateFormatter = DateFormatter()
167+
dateFormatter.dateStyle = .medium
168+
dateFormatter.timeStyle = .medium
169+
guard let date = try dateFormatter.date(from: container.decode(String.self, forKey: .date)) else {
170+
throw DecodingError.dataCorruptedError(forKey: .date, in: container, debugDescription: "Unable to decode benchmark Date value from legacy benchmark.json format.")
171+
}
172+
173+
self.timestamp = date
174+
self.doccArguments = try container.decode([String].self, forKey: .arguments)
175+
176+
var metricsContainer = try container.nestedUnkeyedContainer(forKey: .metrics)
177+
var unsortedMetrics: [Metric] = []
178+
if let containerCount = metricsContainer.count {
179+
unsortedMetrics.reserveCapacity(containerCount)
180+
}
181+
while !metricsContainer.isAtEnd {
182+
let metricContainer = try metricsContainer.nestedContainer(keyedBy: LegacyMetricCodingKeys.self)
183+
let id = try metricContainer.decode(String.self, forKey: .identifier)
184+
let name = try metricContainer.decode(String.self, forKey: .displayName)
185+
186+
if name.hasSuffix(" (msec)") {
187+
let value = try metricContainer.decode(Double.self, forKey: .result)
188+
unsortedMetrics.append(.init(id: id, displayName: String(name.dropLast(7)), value: .duration(value / 1000.0)))
189+
continue
190+
} else if name.hasSuffix("memory footprint (bytes)") {
191+
let value = try metricContainer.decode(Int64.self, forKey: .result)
192+
unsortedMetrics.append(.init(id: id, displayName: String(name.dropLast(8)), value: .bytesInMemory(value)))
193+
continue
194+
} else if name.hasSuffix(" (bytes)") {
195+
let value = try metricContainer.decode(Int64.self, forKey: .result)
196+
unsortedMetrics.append(.init(id: id, displayName: String(name.dropLast(8)), value: .bytesOnDisk(value)))
197+
continue
198+
} else {
199+
let value = try metricContainer.decode(String.self, forKey: .result)
200+
unsortedMetrics.append(.init(id: id, displayName: name, value: .checksum(value)))
201+
continue
202+
}
203+
}
204+
205+
self.metrics = Self.sortedMetrics(unsortedMetrics)
206+
}
207+
}
208+
}
209+
210+
private extension BenchmarkResults.Metric.Value {
211+
var kindSortOrder: Int {
212+
switch self {
213+
case .duration:
214+
return 0
215+
case .bytesInMemory:
216+
return 1
217+
case .bytesOnDisk:
218+
return 2
219+
case .checksum:
220+
return 3
221+
}
222+
}
223+
}
224+

Sources/SwiftDocC/Benchmark/Metrics.swift

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,37 +10,8 @@
1010

1111
import Foundation
1212

13-
/// An encodable value which is either a number or a string.
14-
///
15-
/// Using either number or string values allows benchmark reports
16-
/// to be compared without having a predefined list of possible value types.
17-
///
18-
/// For example when comparing two benchmark reports a delta can be produced
19-
/// for any numeric value without the understanding whether a number is a duration
20-
/// in seconds or megabytes.
21-
///
22-
/// Similarly, string values can be checked for equality without understanding
23-
/// what the metric represents.
24-
public enum MetricValue: Encodable {
25-
public func encode(to encoder: Encoder) throws {
26-
var container = encoder.singleValueContainer()
27-
28-
switch self {
29-
case .number(let num): try container.encode(num)
30-
case .integer(let integer): try container.encode(integer)
31-
case .string(let string): try container.encode(string)
32-
}
33-
}
34-
35-
/// A textual metric to produce match/no match deltas.
36-
case string(String)
37-
38-
/// A number metric which can be used to produce percentage delta changes.
39-
case number(Double)
40-
41-
/// An integer metric suitable for counters or other non-floating numbers.
42-
case integer(Int64)
43-
}
13+
///A metric value which is either a duration, a number of bytes, or a checksum.
14+
public typealias MetricValue = BenchmarkResults.Metric.Value
4415

4516
/// A generic, named metric.
4617
public protocol BenchmarkMetric {
@@ -58,8 +29,7 @@ public protocol DynamicallyIdentifiableMetric: BenchmarkMetric {
5829
var displayName: String { get }
5930
}
6031

61-
/// A metric that runs over a period of time and needs
62-
/// to be started and stopped to produce its result.
32+
/// A metric that runs over a period of time and needs to be started and stopped to produce its result.
6333
public protocol BenchmarkBlockMetric: BenchmarkMetric {
6434
func begin() -> Void
6535
func end() -> Void

Sources/SwiftDocC/Benchmark/Metrics/Duration.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ extension Benchmark {
1919
public static let displayName = "Duration for an operation"
2020

2121
public var identifier: String { "duration-\(self.id)" }
22-
public var displayName: String { "Duration for '\(self.id)' (msec)" }
22+
public var displayName: String { "Duration for '\(self.id)'" }
2323

2424
public var result: MetricValue?
2525

@@ -46,15 +46,15 @@ extension Benchmark {
4646
// We need to multiply the resulting duration by 1000 to store
4747
// a value in milliseconds as an integer to avoid floating point
4848
// encoding artifacts.
49-
result = .integer(Int64((ProcessInfo.processInfo.systemUptime - startTime) * 1000.0))
49+
result = .duration((ProcessInfo.processInfo.systemUptime - startTime))
5050
}
5151

5252
/// Convenience init to use when the duration is tracked elsewhere.
5353
/// - Parameter id: The id for the metric.
54-
/// - Parameter duration: The duration value in milliseconds to be logged.
54+
/// - Parameter duration: The duration value in seconds to be logged.
5555
public init(id: String, duration: TimeInterval) {
5656
self.id = id
57-
result = .integer(Int64(duration * 1000.0))
57+
result = .duration(duration)
5858
}
5959
}
6060
}

Sources/SwiftDocC/Benchmark/Metrics/ExternalTopicsHash.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ extension Benchmark {
3939
}).sorted().joined()
4040
+ context.externallyResolvedSymbols.map({ $0.absoluteString }).sorted().joined()
4141

42-
result = .string(Checksum.md5(of: Data(sourceString.utf8)))
42+
result = .checksum(Checksum.md5(of: Data(sourceString.utf8)))
4343
}
4444

4545
public var result: MetricValue?

Sources/SwiftDocC/Benchmark/Metrics/OutputSize.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ extension Benchmark {
1414
/// Measures the total output size of a DocC archive.
1515
public struct ArchiveOutputSize: BenchmarkMetric {
1616
public static let identifier = "total-archive-output-size"
17-
public static let displayName = "Total DocC archive size (bytes)"
17+
public static let displayName = "Total DocC archive size"
1818
public var result: MetricValue?
1919

2020
public init(archiveDirectory: URL) {
@@ -25,7 +25,7 @@ extension Benchmark {
2525
/// Measures the output size of the data subdirectory in a DocC archive.
2626
public struct DataDirectoryOutputSize: BenchmarkMetric {
2727
public static let identifier = "data-subdirectory-output-size"
28-
public static let displayName = "Data subdirectory size (bytes)"
28+
public static let displayName = "Data subdirectory size"
2929
public var result: MetricValue?
3030

3131
public init(dataDirectory: URL) {
@@ -36,7 +36,7 @@ extension Benchmark {
3636
/// Measures the output size of the index subdirectory in a DocC archive.
3737
public struct IndexDirectoryOutputSize: BenchmarkMetric {
3838
public static let identifier = "index-subdirectory-output-size"
39-
public static let displayName = "Index subdirectory size (bytes)"
39+
public static let displayName = "Index subdirectory size"
4040
public var result: MetricValue?
4141

4242
public init(indexDirectory: URL) {
@@ -67,6 +67,6 @@ extension MetricValue {
6767
bytes += Int64((try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0)
6868
}
6969

70-
self = .integer(bytes)
70+
self = .bytesOnDisk(bytes)
7171
}
7272
}

0 commit comments

Comments
 (0)