Skip to content

Commit 0e0e6bf

Browse files
author
beorn7
committed
Rework the signature aka fingerprint functions.
Move everything over to a more expensive but less collision-prone way. However, keep the old fingerprinting under the name "FastFingerprint" to be used in the storage layer (where we need collision detection anyway).
1 parent 6efaf95 commit 0e0e6bf

File tree

4 files changed

+161
-56
lines changed

4 files changed

+161
-56
lines changed

model/metric.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ type Metric map[LabelName]LabelValue
2828

2929
// Equal compares the fingerprints of both metrics.
3030
func (m Metric) Equal(o Metric) bool {
31+
// TODO do an actual map comparison
3132
return m.Fingerprint().Equal(o.Fingerprint())
3233
}
3334

3435
// Before compares the fingerprints of both metrics.
3536
func (m Metric) Before(o Metric) bool {
37+
// TODO do an actual map comparison
3638
return m.Fingerprint().Less(o.Fingerprint())
3739
}
3840

@@ -67,6 +69,12 @@ func (m Metric) Fingerprint() Fingerprint {
6769
return metricToFingerprint(m)
6870
}
6971

72+
// Fingerprint returns a Metric's Fingerprint calculated by a faster hashing
73+
// algorithm, which is, however, more susceptible to hash collisions.
74+
func (m Metric) FastFingerprint() Fingerprint {
75+
return metricToFastFingerprint(m)
76+
}
77+
7078
// Clone returns a copy of the Metric.
7179
func (m Metric) Clone() Metric {
7280
clone := Metric{}

model/metric_test.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,47 +17,56 @@ import "testing"
1717

1818
func testMetric(t testing.TB) {
1919
var scenarios = []struct {
20-
input Metric
21-
fingerprint Fingerprint
20+
input Metric
21+
fingerprint Fingerprint
22+
fastFingerprint Fingerprint
2223
}{
2324
{
24-
input: Metric{},
25-
fingerprint: 14695981039346656037,
25+
input: Metric{},
26+
fingerprint: 14695981039346656037,
27+
fastFingerprint: 14695981039346656037,
2628
},
2729
{
2830
input: Metric{
2931
"first_name": "electro",
3032
"occupation": "robot",
3133
"manufacturer": "westinghouse",
3234
},
33-
fingerprint: 11310079640881077873,
35+
fingerprint: 5911716720268894962,
36+
fastFingerprint: 11310079640881077873,
3437
},
3538
{
3639
input: Metric{
3740
"x": "y",
3841
},
39-
fingerprint: 13948396922932177635,
42+
fingerprint: 8241431561484471700,
43+
fastFingerprint: 13948396922932177635,
4044
},
4145
{
4246
input: Metric{
4347
"a": "bb",
4448
"b": "c",
4549
},
46-
fingerprint: 3198632812309449502,
50+
fingerprint: 3016285359649981711,
51+
fastFingerprint: 3198632812309449502,
4752
},
4853
{
4954
input: Metric{
5055
"a": "b",
5156
"bb": "c",
5257
},
53-
fingerprint: 5774953389407657638,
58+
fingerprint: 7122421792099404749,
59+
fastFingerprint: 5774953389407657638,
5460
},
5561
}
5662

5763
for i, scenario := range scenarios {
5864
if scenario.fingerprint != scenario.input.Fingerprint() {
5965
t.Errorf("%d. expected %d, got %d", i, scenario.fingerprint, scenario.input.Fingerprint())
6066
}
67+
if scenario.fastFingerprint != scenario.input.FastFingerprint() {
68+
t.Errorf("%d. expected %d, got %d", i, scenario.fastFingerprint, scenario.input.FastFingerprint())
69+
}
6170
}
6271
}
6372

model/signature.go

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"bytes"
1818
"hash"
1919
"hash/fnv"
20+
"sort"
2021
"sync"
2122
)
2223

@@ -46,30 +47,37 @@ func getHashAndBuf() *hashAndBuf {
4647
}
4748

4849
func putHashAndBuf(hb *hashAndBuf) {
50+
hb.h.Reset()
51+
hb.b.Reset()
4952
hashAndBufPool.Put(hb)
5053
}
5154

52-
// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given
53-
// label set.
55+
// LabelsToSignature returns an quasi-unique signature (i.e., fingerprint) for a
56+
// given label set. (Collisions are possible but unlikely if the number of label
57+
// sets the function is applied to is small.)
5458
func LabelsToSignature(labels map[string]string) uint64 {
5559
if len(labels) == 0 {
5660
return emptyLabelSignature
5761
}
5862

59-
var result uint64
63+
labelNames := make([]string, 0, len(labels))
64+
for labelName := range labels {
65+
labelNames = append(labelNames, labelName)
66+
}
67+
sort.Strings(labelNames)
68+
6069
hb := getHashAndBuf()
6170
defer putHashAndBuf(hb)
6271

63-
for labelName, labelValue := range labels {
72+
for _, labelName := range labelNames {
6473
hb.b.WriteString(labelName)
6574
hb.b.WriteByte(SeparatorByte)
66-
hb.b.WriteString(labelValue)
75+
hb.b.WriteString(labels[labelName])
76+
hb.b.WriteByte(SeparatorByte)
6777
hb.h.Write(hb.b.Bytes())
68-
result ^= hb.h.Sum64()
69-
hb.h.Reset()
7078
hb.b.Reset()
7179
}
72-
return result
80+
return hb.h.Sum64()
7381
}
7482

7583
// metricToFingerprint works exactly as LabelsToSignature but takes a Metric as
@@ -79,6 +87,34 @@ func metricToFingerprint(m Metric) Fingerprint {
7987
return Fingerprint(emptyLabelSignature)
8088
}
8189

90+
labelNames := make(LabelNames, 0, len(m))
91+
for labelName := range m {
92+
labelNames = append(labelNames, labelName)
93+
}
94+
sort.Sort(labelNames)
95+
96+
hb := getHashAndBuf()
97+
defer putHashAndBuf(hb)
98+
99+
for _, labelName := range labelNames {
100+
hb.b.WriteString(string(labelName))
101+
hb.b.WriteByte(SeparatorByte)
102+
hb.b.WriteString(string(m[labelName]))
103+
hb.b.WriteByte(SeparatorByte)
104+
hb.h.Write(hb.b.Bytes())
105+
hb.b.Reset()
106+
}
107+
return Fingerprint(hb.h.Sum64())
108+
}
109+
110+
// metricToFastFingerprint works similar to metricToFingerprint but uses a
111+
// faster and less allocation-heavy hash function, which is more susceptible to
112+
// create hash collisions. Therefore, collision detection should be applied.
113+
func metricToFastFingerprint(m Metric) Fingerprint {
114+
if len(m) == 0 {
115+
return Fingerprint(emptyLabelSignature)
116+
}
117+
82118
var result uint64
83119
hb := getHashAndBuf()
84120
defer putHashAndBuf(hb)
@@ -97,26 +133,27 @@ func metricToFingerprint(m Metric) Fingerprint {
97133

98134
// SignatureForLabels works like LabelsToSignature but takes a Metric as
99135
// parameter (rather than a label map) and only includes the labels with the
100-
// specified LabelNames into the signature calculation.
136+
// specified LabelNames into the signature calculation. The labels passed in
137+
// will be sorted by this function.
101138
func SignatureForLabels(m Metric, labels LabelNames) uint64 {
102139
if len(m) == 0 || len(labels) == 0 {
103140
return emptyLabelSignature
104141
}
105142

106-
var result uint64
143+
sort.Sort(labels)
144+
107145
hb := getHashAndBuf()
108146
defer putHashAndBuf(hb)
109147

110148
for _, label := range labels {
111149
hb.b.WriteString(string(label))
112150
hb.b.WriteByte(SeparatorByte)
113151
hb.b.WriteString(string(m[label]))
152+
hb.b.WriteByte(SeparatorByte)
114153
hb.h.Write(hb.b.Bytes())
115-
result ^= hb.h.Sum64()
116-
hb.h.Reset()
117154
hb.b.Reset()
118155
}
119-
return result
156+
return hb.h.Sum64()
120157
}
121158

122159
// SignatureWithoutLabels works like LabelsToSignature but takes a Metric as
@@ -127,24 +164,27 @@ func SignatureWithoutLabels(m Metric, labels map[LabelName]struct{}) uint64 {
127164
return emptyLabelSignature
128165
}
129166

130-
var result uint64
167+
labelNames := make(LabelNames, 0, len(m))
168+
for labelName := range m {
169+
if _, exclude := labels[labelName]; !exclude {
170+
labelNames = append(labelNames, labelName)
171+
}
172+
}
173+
if len(labelNames) == 0 {
174+
return emptyLabelSignature
175+
}
176+
sort.Sort(labelNames)
177+
131178
hb := getHashAndBuf()
132179
defer putHashAndBuf(hb)
133180

134-
for labelName, labelValue := range m {
135-
if _, exclude := labels[labelName]; exclude {
136-
continue
137-
}
181+
for _, labelName := range labelNames {
138182
hb.b.WriteString(string(labelName))
139183
hb.b.WriteByte(SeparatorByte)
140-
hb.b.WriteString(string(labelValue))
184+
hb.b.WriteString(string(m[labelName]))
185+
hb.b.WriteByte(SeparatorByte)
141186
hb.h.Write(hb.b.Bytes())
142-
result ^= hb.h.Sum64()
143-
hb.h.Reset()
144187
hb.b.Reset()
145188
}
146-
if result == 0 {
147-
return emptyLabelSignature
148-
}
149-
return result
189+
return hb.h.Sum64()
150190
}

0 commit comments

Comments
 (0)