Skip to content

Commit 3cf50db

Browse files
committed
metricvec: handle hash collision for labeled metrics
While hash collisions are quite rare, the current state of the client library carries a risk of merging two separate label values into a single metric bucket. The effects are near impossible to detect and the result will be missing or incorrect counters. This changeset handles hash collisions by falling back to collision resolution if multiple label values hash to the same value. This works similar to separate chaining using a slice. Extra storage is minimized to only the value key slice to that metrics can be differentiated within a bucket. In general, the cost of handling collisions is completely minimized under normal operation. Performance does show slight increases in certain areas, but these are more likely statistically anomalies. More importantly, zero allocation behavior for metrics is preserved on the fast path. Minimal allocations may be made during collision handling but this has minimal effect. Benchmark comparisons with and without collision resolution follow. ``` benchmark old ns/op new ns/op delta BenchmarkCounterWithLabelValues-4 99.0 107 +8.08% BenchmarkCounterWithLabelValuesConcurrent-4 79.6 91.0 +14.32% BenchmarkCounterWithMappedLabels-4 518 542 +4.63% BenchmarkCounterWithPreparedMappedLabels-4 127 137 +7.87% BenchmarkCounterNoLabels-4 19.5 19.1 -2.05% BenchmarkGaugeWithLabelValues-4 97.4 110 +12.94% BenchmarkGaugeNoLabels-4 12.4 10.3 -16.94% BenchmarkSummaryWithLabelValues-4 1204 915 -24.00% BenchmarkSummaryNoLabels-4 936 847 -9.51% BenchmarkHistogramWithLabelValues-4 147 147 +0.00% BenchmarkHistogramNoLabels-4 50.6 49.3 -2.57% BenchmarkHistogramObserve1-4 37.9 37.5 -1.06% BenchmarkHistogramObserve2-4 122 137 +12.30% BenchmarkHistogramObserve4-4 310 352 +13.55% BenchmarkHistogramObserve8-4 691 729 +5.50% BenchmarkHistogramWrite1-4 3374 3097 -8.21% BenchmarkHistogramWrite2-4 5310 5051 -4.88% BenchmarkHistogramWrite4-4 12094 10690 -11.61% BenchmarkHistogramWrite8-4 19416 17755 -8.55% BenchmarkHandler-4 11934304 13765894 +15.35% BenchmarkSummaryObserve1-4 1119 1105 -1.25% BenchmarkSummaryObserve2-4 3679 3430 -6.77% BenchmarkSummaryObserve4-4 10678 7982 -25.25% BenchmarkSummaryObserve8-4 22974 16689 -27.36% BenchmarkSummaryWrite1-4 25962 14680 -43.46% BenchmarkSummaryWrite2-4 38019 35073 -7.75% BenchmarkSummaryWrite4-4 78027 56816 -27.18% BenchmarkSummaryWrite8-4 117220 132248 +12.82% BenchmarkMetricVecWithLabelValuesBasic-4 138 133 -3.62% BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4 150 144 -4.00% BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4 263 256 -2.66% BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4 145 155 +6.90% BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4 606 634 +4.62% BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4 746 641 -14.08% benchmark old allocs new allocs delta BenchmarkCounterWithLabelValues-4 0 0 +0.00% BenchmarkCounterWithLabelValuesConcurrent-4 0 0 +0.00% BenchmarkCounterWithMappedLabels-4 2 2 +0.00% BenchmarkCounterWithPreparedMappedLabels-4 0 0 +0.00% BenchmarkCounterNoLabels-4 0 0 +0.00% BenchmarkGaugeWithLabelValues-4 0 0 +0.00% BenchmarkGaugeNoLabels-4 0 0 +0.00% BenchmarkSummaryWithLabelValues-4 0 0 +0.00% BenchmarkSummaryNoLabels-4 0 0 +0.00% BenchmarkHistogramWithLabelValues-4 0 0 +0.00% BenchmarkHistogramNoLabels-4 0 0 +0.00% BenchmarkMetricVecWithLabelValuesBasic-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4 0 0 +0.00% benchmark old bytes new bytes delta BenchmarkCounterWithLabelValues-4 0 0 +0.00% BenchmarkCounterWithLabelValuesConcurrent-4 0 0 +0.00% BenchmarkCounterWithMappedLabels-4 336 336 +0.00% BenchmarkCounterWithPreparedMappedLabels-4 0 0 +0.00% BenchmarkCounterNoLabels-4 0 0 +0.00% BenchmarkGaugeWithLabelValues-4 0 0 +0.00% BenchmarkGaugeNoLabels-4 0 0 +0.00% BenchmarkSummaryWithLabelValues-4 0 0 +0.00% BenchmarkSummaryNoLabels-4 0 0 +0.00% BenchmarkHistogramWithLabelValues-4 0 0 +0.00% BenchmarkHistogramNoLabels-4 0 0 +0.00% BenchmarkMetricVecWithLabelValuesBasic-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4 0 0 +0.00% ``` Signed-off-by: Stephen J Day <[email protected]>
1 parent c4004ef commit 3cf50db

File tree

2 files changed

+114
-20
lines changed

2 files changed

+114
-20
lines changed

prometheus/vec.go

Lines changed: 105 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,29 @@ import (
2525
// provided in this package.
2626
type MetricVec struct {
2727
mtx sync.RWMutex // Protects the children.
28-
children map[uint64]Metric
28+
children map[uint64][]metricLabelValue
2929
desc *Desc
3030

3131
newMetric func(labelValues ...string) Metric
32+
hashAdd func(h uint64, s string) uint64 // replace hash function for testing collision handling
3233
}
3334

3435
// newMetricVec returns an initialized MetricVec. The concrete value is
3536
// returned for embedding into another struct.
3637
func newMetricVec(desc *Desc, newMetric func(lvs ...string) Metric) MetricVec {
3738
return MetricVec{
38-
children: map[uint64]Metric{},
39+
children: map[uint64][]metricLabelValue{},
3940
desc: desc,
4041
newMetric: newMetric,
42+
hashAdd: hashAdd,
4143
}
4244
}
4345

46+
type metricLabelValue struct {
47+
values []string
48+
metric Metric
49+
}
50+
4451
// Describe implements Collector. The length of the returned slice
4552
// is always one.
4653
func (m *MetricVec) Describe(ch chan<- *Desc) {
@@ -52,8 +59,10 @@ func (m *MetricVec) Collect(ch chan<- Metric) {
5259
m.mtx.RLock()
5360
defer m.mtx.RUnlock()
5461

55-
for _, metric := range m.children {
56-
ch <- metric
62+
for _, metrics := range m.children {
63+
for _, metric := range metrics {
64+
ch <- metric.metric
65+
}
5766
}
5867
}
5968

@@ -88,7 +97,7 @@ func (m *MetricVec) GetMetricWithLabelValues(lvs ...string) (Metric, error) {
8897
}
8998

9099
m.mtx.RLock()
91-
metric, ok := m.children[h]
100+
metric, ok := m.getMetric(h, lvs...)
92101
m.mtx.RUnlock()
93102
if ok {
94103
return metric, nil
@@ -118,11 +127,12 @@ func (m *MetricVec) GetMetricWith(labels Labels) (Metric, error) {
118127
}
119128

120129
m.mtx.RLock()
121-
metric, ok := m.children[h]
122-
m.mtx.RUnlock()
123-
if ok {
124-
return metric, nil
130+
metrics, ok := m.children[h]
131+
if ok && len(metrics) == 1 {
132+
m.mtx.RUnlock()
133+
return metrics[0].metric, nil
125134
}
135+
m.mtx.RUnlock()
126136

127137
lvs := make([]string, len(labels))
128138
for i, label := range m.desc.variableLabels {
@@ -178,11 +188,7 @@ func (m *MetricVec) DeleteLabelValues(lvs ...string) bool {
178188
if err != nil {
179189
return false
180190
}
181-
if _, ok := m.children[h]; !ok {
182-
return false
183-
}
184-
delete(m.children, h)
185-
return true
191+
return m.deleteByHash(h, lvs...)
186192
}
187193

188194
// Delete deletes the metric where the variable labels are the same as those
@@ -203,10 +209,38 @@ func (m *MetricVec) Delete(labels Labels) bool {
203209
if err != nil {
204210
return false
205211
}
206-
if _, ok := m.children[h]; !ok {
212+
213+
var lvs []string
214+
for _, k := range m.desc.variableLabels {
215+
lvs = append(lvs, labels[k])
216+
}
217+
218+
return m.deleteByHash(h, lvs...)
219+
}
220+
221+
// deleteByHash removes the metric from the hash bucket h. If there are
222+
// multiple matches in the bucket, use lvs to select a metric and remove only
223+
// that metric.
224+
func (m *MetricVec) deleteByHash(h uint64, lvs ...string) bool {
225+
metrics, ok := m.children[h]
226+
if !ok {
207227
return false
208228
}
209-
delete(m.children, h)
229+
230+
if len(metrics) < 2 {
231+
delete(m.children, h)
232+
}
233+
234+
i := findMetric(lvs, metrics)
235+
if i >= len(metrics) {
236+
return false
237+
}
238+
239+
if len(metrics) > 1 {
240+
m.children[h] = append(metrics[:i], metrics[i+1:]...)
241+
} else {
242+
delete(m.children, h)
243+
}
210244
return true
211245
}
212246

@@ -226,7 +260,7 @@ func (m *MetricVec) hashLabelValues(vals []string) (uint64, error) {
226260
}
227261
h := hashNew()
228262
for _, val := range vals {
229-
h = hashAdd(h, val)
263+
h = m.hashAdd(h, val)
230264
}
231265
return h, nil
232266
}
@@ -241,19 +275,70 @@ func (m *MetricVec) hashLabels(labels Labels) (uint64, error) {
241275
if !ok {
242276
return 0, fmt.Errorf("label name %q missing in label map", label)
243277
}
244-
h = hashAdd(h, val)
278+
h = m.hashAdd(h, val)
245279
}
246280
return h, nil
247281
}
248282

249283
func (m *MetricVec) getOrCreateMetric(hash uint64, labelValues ...string) Metric {
250-
metric, ok := m.children[hash]
284+
metric, ok := m.getMetric(hash, labelValues...)
251285
if !ok {
252286
// Copy labelValues. Otherwise, they would be allocated even if we don't go
253287
// down this code path.
254288
copiedLabelValues := append(make([]string, 0, len(labelValues)), labelValues...)
255289
metric = m.newMetric(copiedLabelValues...)
256-
m.children[hash] = metric
290+
m.children[hash] = append(m.children[hash], metricLabelValue{values: copiedLabelValues, metric: metric})
257291
}
258292
return metric
259293
}
294+
295+
// getMetric while handling possible collisions in the hash space. Must be
296+
// called while holding read mutex.
297+
func (m *MetricVec) getMetric(h uint64, lvs ...string) (Metric, bool) {
298+
metrics, ok := m.children[h]
299+
if ok {
300+
return m.selectMetric(lvs, metrics)
301+
}
302+
303+
return nil, false
304+
}
305+
306+
func (m *MetricVec) selectMetric(lvs []string, metrics []metricLabelValue) (Metric, bool) {
307+
switch len(metrics) {
308+
case 0:
309+
return nil, false
310+
case 1:
311+
// collisions are rare, this should be the fast path.
312+
return metrics[0].metric, true
313+
}
314+
315+
i := findMetric(lvs, metrics)
316+
317+
if i < len(metrics) {
318+
return metrics[i].metric, true
319+
}
320+
321+
return nil, false
322+
}
323+
324+
// findMetric returns the index of the matching metric or len(metrics) if not
325+
// found.
326+
func findMetric(lvs []string, metrics []metricLabelValue) int {
327+
next:
328+
for i, metric := range metrics {
329+
if len(metric.values) != len(lvs) {
330+
continue
331+
}
332+
333+
for j, v := range metric.values {
334+
if v != lvs[j] {
335+
continue next
336+
}
337+
}
338+
// falling out of the loop here means we have a match!
339+
340+
return i
341+
}
342+
343+
return len(metrics)
344+
}

prometheus/vec_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,16 @@ func TestDelete(t *testing.T) {
5252

5353
func TestDeleteLabelValues(t *testing.T) {
5454
vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
55+
testDeleteLabelValues(t, vec)
56+
}
57+
58+
func TestDeleteLabelValuesWithCollisions(t *testing.T) {
59+
vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
60+
vec.hashAdd = func(h uint64, s string) uint64 { return 1 }
61+
testDeleteLabelValues(t, vec)
62+
}
5563

64+
func testDeleteLabelValues(t *testing.T, vec *MetricVec) {
5665
if got, want := vec.DeleteLabelValues("v1", "v2"), false; got != want {
5766
t.Errorf("got %v, want %v", got, want)
5867
}

0 commit comments

Comments
 (0)