From 806e5c41162b3e29e5aff08fcd55e54b860f93e3 Mon Sep 17 00:00:00 2001 From: Chris Randles Date: Wed, 17 Dec 2025 20:16:49 -0500 Subject: [PATCH] perf: [pkg/timeseries/dataset] refactor start and end calculation Signed-off-by: Chris Randles --- pkg/timeseries/dataset/dataset.go | 30 +------ pkg/timeseries/dataset/point.go | 56 +++++-------- pkg/timeseries/dataset/point_test.go | 115 +++++++++++++++++++-------- 3 files changed, 109 insertions(+), 92 deletions(-) diff --git a/pkg/timeseries/dataset/dataset.go b/pkg/timeseries/dataset/dataset.go index fbce5516c..9ccb04e88 100644 --- a/pkg/timeseries/dataset/dataset.go +++ b/pkg/timeseries/dataset/dataset.go @@ -146,18 +146,8 @@ func (ds *DataSet) CroppedClone(e timeseries.Extent) timeseries.Timeseries { sc := &Series{ Header: s.Header.Clone(), } - start, end, l := 0, -1, len(s.Points) - var iwg sync.WaitGroup - iwg.Add(2) - go func() { - start = s.Points.onOrJustAfter(startNS, 0, l-1) - iwg.Done() - }() - go func() { - end = s.Points.onOrJustBefore(endNS, 0, l-1) + 1 - iwg.Done() - }() - iwg.Wait() + l := len(s.Points) + start, end := s.Points.findRange(startNS, endNS, 0, l-1) if start < l && end <= l && end > start { sc.Points = s.Points.CloneRange(start, end) sc.PointSize = sc.Points.Size() @@ -369,20 +359,8 @@ func (ds *DataSet) DefaultRangeCropper(e timeseries.Extent) { index := j wg.Go(func() { - var ( - start, end, l = 0, -1, len(s.Points) - iwg sync.WaitGroup - ) - iwg.Add(2) - go func() { - start = s.Points.onOrJustAfter(startNS, 0, l-1) - iwg.Done() - }() - go func() { - end = s.Points.onOrJustBefore(endNS, 0, l-1) + 1 - iwg.Done() - }() - iwg.Wait() + l := len(s.Points) + start, end := s.Points.findRange(startNS, endNS, 0, l-1) if start < l && end <= l && end > start { s.Points = s.Points.CloneRange(start, end) s.PointSize = s.Points.Size() diff --git a/pkg/timeseries/dataset/point.go b/pkg/timeseries/dataset/point.go index c0fefe973..7a65d3298 100644 --- a/pkg/timeseries/dataset/point.go +++ b/pkg/timeseries/dataset/point.go @@ -20,6 +20,7 @@ package dataset import ( "slices" + "sort" "github.com/trickstercache/trickster/v2/pkg/timeseries/epoch" "github.com/trickstercache/trickster/v2/pkg/util/cmp" @@ -121,43 +122,30 @@ func (p Points) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -// binarySearchEpoch performs a binary search on Points based on epoch values -// with customizable comparison and adjustment logic -func (p Points) binarySearchEpoch(ts epoch.Epoch, s, e int, - baseCondition func(epoch.Epoch, epoch.Epoch) bool, - adjustment int, -) int { - if s >= e { - if baseCondition(p[s].Epoch, ts) { - return s + adjustment - } - return s - } - mid := (s + e) >> 1 - if p[mid].Epoch < ts { - return p.binarySearchEpoch(ts, mid+1, e, baseCondition, adjustment) +// findRange finds both the start and end indices for a time range that is between the start and end epochs. +func (p Points) findRange(startEpoch, endEpoch epoch.Epoch, s, e int) (int, int) { + if len(p) == 0 || s > e { + return 0, 0 } - return p.binarySearchEpoch(ts, s, mid, baseCondition, adjustment) -} -// onOrJustAfter returns the index of the element having a value of ts. if the value of ts -// is not in p, the index of the first element whose value is greater than ts is returned. -// onOrJustafter requires p to be sorted chronologically from earliest to latest epoch. -// This is a variation of justGreater found @ https://stackoverflow.com/a/56815151 -func (p Points) onOrJustAfter(ts epoch.Epoch, s, e int) int { - return p.binarySearchEpoch(ts, s, e, func(pEpoch, targetEpoch epoch.Epoch) bool { - return pEpoch < targetEpoch - }, 1) -} + // find start index (looking for the first index after s and before e where Epoch >= startEpoch) + idxStart := sort.Search((e-s)+1, func(i int) bool { + return p[s+i].Epoch >= startEpoch + }) + startPos := s + idxStart + if startPos > e { + return startPos, startPos + } -// onOrJustBefore returns the index of the element having a value of ts. if the value of ts -// is not in p, the index of the last element whose value is less than ts is returned. -// onOrJustBefore requires p to be sorted chronologically from earliest to latest epoch. -// This is a variation of justGreater found @ https://stackoverflow.com/a/56815151 -func (p Points) onOrJustBefore(ts epoch.Epoch, s, e int) int { - return p.binarySearchEpoch(ts, s, e, func(pEpoch, targetEpoch epoch.Epoch) bool { - return pEpoch > targetEpoch - }, -1) + // find end index (starting from e and going backwards to s, looking for the first index where Epoch <= endEpoch) + idxEnd := sort.Search((e-s)+1, func(i int) bool { + return p[e-i].Epoch <= endEpoch + }) + endPos := max( + // guard against empty range + e-idxEnd+1, startPos, + ) + return startPos, endPos } // sortAndDedupe sorts and deduplicates p in-place. Because deduplication can diff --git a/pkg/timeseries/dataset/point_test.go b/pkg/timeseries/dataset/point_test.go index 1d045961a..e867264a5 100644 --- a/pkg/timeseries/dataset/point_test.go +++ b/pkg/timeseries/dataset/point_test.go @@ -214,44 +214,95 @@ func TestPointsSort(t *testing.T) { } } -func TestOnOrJustAfter(t *testing.T) { - pts := testPoints() - i := pts.onOrJustAfter(0, 0, len(pts)-1) - if i != 0 { - t.Errorf("expected %d got %d", 0, i) - } - - i = pts.onOrJustAfter(epoch.Epoch(6*time.Second), 0, 0) - if i != 1 { - t.Errorf("expected %d got %d", 1, i) +func TestFindRange(t *testing.T) { + pts := Points{ + Point{Epoch: epoch.Epoch(1 * time.Second), Size: 1, Values: []any{1}}, + Point{Epoch: epoch.Epoch(3 * time.Second), Size: 1, Values: []any{2}}, + Point{Epoch: epoch.Epoch(5 * time.Second), Size: 1, Values: []any{3}}, + Point{Epoch: epoch.Epoch(7 * time.Second), Size: 1, Values: []any{4}}, + Point{Epoch: epoch.Epoch(9 * time.Second), Size: 1, Values: []any{5}}, } - i = pts.onOrJustAfter(epoch.Epoch(6*time.Second), 0, 1) - if i != 1 { - t.Errorf("expected %d got %d", 1, i) - } -} - -func TestOnOrJustBefore(t *testing.T) { - pts := testPoints() - i := pts.onOrJustBefore(0, 0, len(pts)-1) - if i != -1 { - t.Errorf("expected %d got %d", -1, i) + tests := []struct { + name string + startEpoch epoch.Epoch + endEpoch epoch.Epoch + wantStart int + wantEnd int + }{ + { + name: "exact match both ends", + startEpoch: epoch.Epoch(3 * time.Second), + endEpoch: epoch.Epoch(7 * time.Second), + wantStart: 1, + wantEnd: 4, + }, + { + name: "start before data, end in middle", + startEpoch: epoch.Epoch(0), + endEpoch: epoch.Epoch(5 * time.Second), + wantStart: 0, + wantEnd: 3, + }, + { + name: "start in middle, end after data", + startEpoch: epoch.Epoch(6 * time.Second), + endEpoch: epoch.Epoch(15 * time.Second), + wantStart: 3, + wantEnd: 5, + }, + { + name: "range entirely before data", + startEpoch: epoch.Epoch(-5 * time.Second), + endEpoch: epoch.Epoch(-1 * time.Second), + wantStart: 0, + wantEnd: 0, + }, + { + name: "range entirely after data", + startEpoch: epoch.Epoch(15 * time.Second), + endEpoch: epoch.Epoch(20 * time.Second), + wantStart: 5, + wantEnd: 5, + }, + { + name: "single point range", + startEpoch: epoch.Epoch(5 * time.Second), + endEpoch: epoch.Epoch(5 * time.Second), + wantStart: 2, + wantEnd: 3, + }, + { + name: "gap in data - start in gap", + startEpoch: epoch.Epoch(4 * time.Second), + endEpoch: epoch.Epoch(6 * time.Second), + wantStart: 2, + wantEnd: 3, + }, } - - i = pts.onOrJustBefore(epoch.Epoch(6*time.Second), 0, 0) - if i != 0 { - t.Errorf("expected %d got %d", 0, i) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotStart, gotEnd := pts.findRange(tt.startEpoch, tt.endEpoch, 0, len(pts)-1) + require.Equal(t, tt.wantStart, gotStart, "start value not expected") + require.Equal(t, tt.wantEnd, gotEnd, "end value not expected") + require.LessOrEqual(t, gotStart, gotEnd) + require.False(t, gotStart < 0 || gotStart > len(pts), "start index out of bounds") + require.False(t, gotEnd < 0 || gotEnd > len(pts), "end index out of bounds") + }) } - i = pts.onOrJustAfter(epoch.Epoch(15*time.Second), 0, 1) - if i != 2 { - t.Errorf("expected %d got %d", 2, i) - } + t.Run("empty points", func(t *testing.T) { + start, end := Points{}.findRange(epoch.Epoch(1*time.Second), epoch.Epoch(5*time.Second), 0, -1) + require.False(t, start != 0 || end != 0, "should return 0,0") + }) +} - i = pts.onOrJustAfter(epoch.Epoch(6*time.Second), 0, 1) - if i != 1 { - t.Errorf("expected %d got %d", 1, i) +func BenchmarkFindRange(b *testing.B) { + pts := genTestPoints(0, 10000) // Create a large dataset for meaningful benchmarks + startEpoch := epoch.Epoch(2500 * time.Second) + endEpoch := epoch.Epoch(7500 * time.Second) + for i := 0; i < b.N; i++ { + _, _ = pts.findRange(startEpoch, endEpoch, 0, len(pts)-1) } }