Skip to content

Commit 5d345e7

Browse files
authored
rfac: Performance improvement to recalculateBins (#113)
* Add performance tests for Histogram method `recalculateBins` and run `swift test --generate-linuxmain`. * rfac: Faster algorithm for `recalculateBins` which is one of the main culprits in slowing down the tests. * rfac: Add `isSorted` flag to `HistogramSeries` and improve the performance of `recalculateBins` This flag is set if `data` is sorted and unset if the user sets the `data` property on the series manually, it is used to choose the fastest algorithm for `recalculateBins`. * rfac: Clean up calculateSeriesData. * docs: Add comments explaining both algorithms and their performance. * chor: val -> value * rfac: use `max` function and in the second algorithm hoist it out of the loop for better performance. Improve/update comments. * Attempt to add Big-O notation to the algorithms... is this right? * doc: add comment * doc: update comment * change class back to struct and run `swift test --generate-linuxmain` * [docs][chor] improve comment on `isSorted` and change `true` to `binIndex < binEndIndex` as suggested in PR#113 * rfac: strip `recalculateBins` down to one algorithm that does not require `data` to be sorted and update tests * rfac: update tests manifest * rfac: don't sort the data in the initialiser * bfix: fix code that took advantage of the data being sorted without actually being sure of that * docs: Improve comments about workaround * rfac: Create new test suite for performance tests
1 parent 6111313 commit 5d345e7

File tree

5 files changed

+90
-18
lines changed

5 files changed

+90
-18
lines changed

Sources/SwiftPlot/Histogram.swift

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,25 @@ extension Histogram: HasGraphLayout {
9090
var results = DrawingData()
9191
var markers = PlotMarkers()
9292

93-
var minimumX = histogramSeries.data.first!
94-
var maximumX = histogramSeries.data.last!
93+
// FIXME: Handle `histogramSeries.data` being empty.
94+
// TODO: (Performance) Fix this because we are going through
95+
// the data twice right now for each series we have...
96+
/// We were depending on `histogram.data` being sorted to get the `minimumX` and `maximumX`
97+
/// by getting the `.first`and `.last` element. This led to bugs down the line where it is expected
98+
/// that `minimumX < maximumX`. So this is the workaround for now.
99+
///
100+
/// This was added as part of PR #113
101+
guard var minimumX = histogramSeries.data.min(),
102+
var maximumX = histogramSeries.data.max() else {
103+
fatalError("Histogram: (temporary) We are not handling the case where the user supplied an empty array.")
104+
}
105+
95106
for series in histogramStackSeries {
96-
minimumX = min(minimumX, series.data.first!)
97-
maximumX = max(maximumX, series.data.last!)
107+
guard let minX = series.data.min(), let maxX = series.data.max() else {
108+
fatalError("Histogram: (temporary) We are not handling the case where the user supplied an empty array.")
109+
}
110+
minimumX = min(minimumX, minX)
111+
maximumX = max(maximumX, maxX)
98112
}
99113
minimumX = T(roundFloor10(Float(minimumX)))
100114
maximumX = T(roundCeil10(Float(maximumX)))
@@ -308,31 +322,53 @@ extension Histogram: HasGraphLayout {
308322

309323
private extension Histogram {
310324

325+
/// If the data is not sorted, run through each value in `data`, binary search the right bin and increment its frequency.
326+
///
327+
/// Performance: O(n log(m)). n: `data.count`, m: `binFrequency.count`.
328+
/// This algorithm runs through `data` once and for each value in `data` a binary search is performed on the bins' lower x limits.
329+
/// It runs through `binFrequency` array once to get the maximum frequency.
330+
/// It get/sets `binFrequency` value `data.count` amount of times.
311331
func recalculateBins(series: HistogramSeries<T>,
312332
binStart: T,
313333
binEnd: T,
314334
binInterval: T) -> (binFrequency: [Float], maxFrequency: Float) {
315-
316-
var maximumFrequency = Float(0)
317-
var binFrequency = stride(from: Float(binStart), through: Float(binEnd), by: Float(binInterval)).map {
318-
start -> Float in
319-
let end = start + Float(binInterval)
320-
var count: Float = 0
321-
for d in series.data {
322-
if(d < T(end) && d >= T(start)) {
323-
count += 1
335+
var binFrequency = [Float](repeating: 0.0, count: series.bins)
336+
let lastIndex = binFrequency.endIndex - 1
337+
for value in series.data {
338+
var start = 0
339+
var end = lastIndex
340+
var current = start + (end - start) / 2
341+
while end - start > 1 {
342+
if value >= binStart + T(current) * binInterval {
343+
start = current
344+
} else {
345+
end = current
324346
}
347+
current = start + (end - start) / 2
325348
}
326-
maximumFrequency = max(count, maximumFrequency)
327-
return count
349+
350+
binFrequency[current] += 1
328351
}
352+
var maximumFrequency = binFrequency.max() ?? 0.0
353+
329354
if (isNormalized) {
330355
let factor = Float(series.data.count)*Float(binInterval)
331-
for index in 0..<binFrequency.count {
356+
for index in 0..<series.bins {
332357
binFrequency[index]/=factor
333358
}
334359
maximumFrequency/=factor
335360
}
336361
return (binFrequency, maximumFrequency)
337362
}
338363
}
364+
365+
// For testing private methods.
366+
367+
extension Histogram {
368+
func testRecalculateBins(series: HistogramSeries<T>,
369+
binStart: T,
370+
binEnd: T,
371+
binInterval: T) {
372+
let (_, _) = recalculateBins(series: series, binStart: binStart, binEnd: binEnd, binInterval: binInterval)
373+
}
374+
}

Sources/SwiftPlot/HistogramSeries.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public struct HistogramSeries<T> where T: Comparable {
1010
label: String,
1111
color: Color,
1212
histogramType: HistogramSeriesOptions.HistogramType) {
13-
self.data = data.sorted()
13+
self.data = data
1414
self.bins = bins
1515
self.label = label
1616
self.color = color
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import XCTest
2+
@testable import SwiftPlot
3+
//import SVGRenderer
4+
//#if canImport(AGGRenderer)
5+
//import AGGRenderer
6+
//#endif
7+
//#if canImport(QuartzRenderer)
8+
//import QuartzRenderer
9+
//#endif
10+
11+
extension PerformanceTests {
12+
13+
/// Performance tests for the `Histogram.recalculateBins`.
14+
func testPerformanceHistogramRecalculateBins() throws {
15+
let histogram = Histogram<Float>(isNormalized: false, enableGrid: false)
16+
17+
let histogramSeries = HistogramSeries(data: histogram_step_values, bins: 50, label: "HISTOGRAM PERFORMANCE `recalculateBins`", color: .black, histogramType: .bar)
18+
measure {
19+
histogram.testRecalculateBins(series: histogramSeries, binStart: 40, binEnd: 160, binInterval: (160-40)/Float(histogram.histogramSeries.bins))
20+
}
21+
}
22+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import XCTest
2+
3+
final class PerformanceTests: SwiftPlotTestCase {}

Tests/SwiftPlotTests/XCTestManifests.swift

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ extension AnnotationTests {
1717
static let __allTests__AnnotationTests = [
1818
("testAnnotationArrow", testAnnotationArrow),
1919
("testAnnotationArrowDart", testAnnotationArrowDart),
20-
("testAnnotationArrowWedge", testAnnotationArrowWedge),
2120
("testAnnotationArrowDoubleHeaded", testAnnotationArrowDoubleHeaded),
21+
("testAnnotationArrowWedge", testAnnotationArrowWedge),
2222
("testAnnotationText", testAnnotationText),
2323
("testAnnotationTextBoundingBox", testAnnotationTextBoundingBox),
2424
]
@@ -51,6 +51,7 @@ extension HistogramTests {
5151
static let __allTests__HistogramTests = [
5252
("testHistogram", testHistogram),
5353
("testHistogramMultiStacked", testHistogramMultiStacked),
54+
("testHistogramMultiStackedColorBleed", testHistogramMultiStackedColorBleed),
5455
("testHistogramMultiStackedStep", testHistogramMultiStackedStep),
5556
("testHistogramStacked", testHistogramStacked),
5657
("testHistogramStackedStep", testHistogramStackedStep),
@@ -85,6 +86,15 @@ extension LineChartTests {
8586
]
8687
}
8788

89+
extension PerformanceTests {
90+
// DO NOT MODIFY: This is autogenerated, use:
91+
// `swift test --generate-linuxmain`
92+
// to regenerate.
93+
static let __allTests__PerformanceTests = [
94+
("testPerformanceHistogramRecalculateBins", testPerformanceHistogramRecalculateBins),
95+
]
96+
}
97+
8898
extension ScatterPlotTests {
8999
// DO NOT MODIFY: This is autogenerated, use:
90100
// `swift test --generate-linuxmain`
@@ -110,6 +120,7 @@ public func __allTests() -> [XCTestCaseEntry] {
110120
testCase(BarchartTests.__allTests__BarchartTests),
111121
testCase(HistogramTests.__allTests__HistogramTests),
112122
testCase(LineChartTests.__allTests__LineChartTests),
123+
testCase(PerformanceTests.__allTests__PerformanceTests),
113124
testCase(ScatterPlotTests.__allTests__ScatterPlotTests),
114125
testCase(SubPlotTests.__allTests__SubPlotTests),
115126
]

0 commit comments

Comments
 (0)