Skip to content

Commit 0bfb269

Browse files
authored
Merge pull request #8 from matter-labs/dedup-speedup
Optimizing hashing in dedup via fnv
2 parents e799ddc + 3fb16d4 commit 0bfb269

File tree

3 files changed

+58
-27
lines changed

3 files changed

+58
-27
lines changed

collectors/deduplicator.go

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package collectors
22

33
import (
4-
"sort"
5-
"strconv"
64
"time"
75

86
"github.com/prometheus-community/stackdriver_exporter/hash"
@@ -30,7 +28,7 @@ func (d *MetricDeduplicator) CheckAndMark(fqName string, labelKeys, labelValues
3028
return true // Duplicate detected
3129
}
3230
d.sentSignatures[signature] = struct{}{} // Mark as seen
33-
return false // Not a duplicate
31+
return false // Not a duplicate
3432
}
3533

3634
// hashLabelsTimestamp calculates a hash based on FQName, sorted labels, and timestamp.
@@ -39,37 +37,44 @@ func (d *MetricDeduplicator) hashLabelsTimestamp(fqName string, labelKeys, label
3937
dh = hash.Add(dh, fqName)
4038
dh = hash.AddByte(dh, hash.SeparatorByte)
4139

42-
// Create label pairs for stable sorting
43-
pairs := make([]struct {
44-
Key string
45-
Value string
46-
}, len(labelKeys))
47-
for i, key := range labelKeys {
48-
// Ensure we don't go out of bounds if labelValues is shorter (shouldn't happen in normal flow)
49-
val := ""
50-
if i < len(labelValues) {
51-
val = labelValues[i]
52-
}
53-
pairs[i] = struct {
54-
Key string
55-
Value string
56-
}{Key: key, Value: val}
40+
// Create indices for stable sorting
41+
indices := make([]int, len(labelKeys))
42+
for i := range indices {
43+
indices[i] = i
5744
}
5845

59-
// Sort pairs by key
60-
sort.Slice(pairs, func(i, j int) bool {
61-
return pairs[i].Key < pairs[j].Key
62-
})
46+
// Sort indices by key using a simple insertion sort
47+
// This is faster for small slices than sort.Slice
48+
for i := 0; i < len(indices); i++ {
49+
for j := i + 1; j < len(indices); j++ {
50+
if labelKeys[indices[i]] > labelKeys[indices[j]] {
51+
indices[i], indices[j] = indices[j], indices[i]
52+
}
53+
}
54+
}
6355

6456
// Add sorted key-value pairs to hash
65-
for _, pair := range pairs {
66-
dh = hash.Add(dh, pair.Key)
57+
for _, idx := range indices {
58+
dh = hash.Add(dh, labelKeys[idx])
6759
dh = hash.AddByte(dh, hash.SeparatorByte)
68-
dh = hash.Add(dh, pair.Value)
60+
61+
// Ensure we don't go out of bounds if labelValues is shorter
62+
if idx < len(labelValues) {
63+
dh = hash.Add(dh, labelValues[idx])
64+
}
6965
dh = hash.AddByte(dh, hash.SeparatorByte)
7066
}
7167

72-
// Add timestamp (converted to string)
73-
dh = hash.Add(dh, strconv.FormatInt(ts.UnixNano(), 10))
68+
// Add timestamp using binary operations instead of string conversion
69+
tsNano := ts.UnixNano()
70+
71+
// Mix in the timestamp bytes directly using the FNV-1a algorithm
72+
dh = hash.AddUint64(dh, uint64(tsNano))
73+
74+
// Mix in the high bits if they exist (for timestamps far in the future)
75+
if tsNano > 0xFFFFFFFF {
76+
dh = hash.AddUint64(dh, uint64(tsNano>>32))
77+
}
78+
7479
return dh
7580
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package collectors
2+
3+
import (
4+
"testing"
5+
"time"
6+
)
7+
8+
func BenchmarkHashLabelsTimestamp(b *testing.B) {
9+
dedup := NewMetricDeduplicator()
10+
now := time.Now()
11+
fqName := "benchmark_metric"
12+
keys := []string{"region", "zone", "instance", "project", "service", "method", "version"}
13+
vals := []string{"us-central1", "us-central1-a", "instance-1", "my-project", "api-service", "get", "v1"}
14+
15+
b.ResetTimer()
16+
for i := 0; i < b.N; i++ {
17+
dedup.hashLabelsTimestamp(fqName, keys, vals, now)
18+
}
19+
}

hash/fnv.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,10 @@ func AddByte(h uint64, b byte) uint64 {
4343
h *= prime64
4444
return h
4545
}
46+
47+
// AddUint64 adds a uint64 to a fnv64a hash value, returning the updated hash.
48+
func AddUint64(h uint64, val uint64) uint64 {
49+
h ^= val
50+
h *= prime64
51+
return h
52+
}

0 commit comments

Comments
 (0)