|
9 | 9 | package aggmetric
|
10 | 10 |
|
11 | 11 | import (
|
| 12 | + "context" |
12 | 13 | "hash/fnv"
|
13 | 14 | "strings"
|
14 | 15 | "time"
|
15 | 16 |
|
16 | 17 | "github.com/cockroachdb/cockroach/pkg/util/cache"
|
| 18 | + "github.com/cockroachdb/cockroach/pkg/util/log" |
17 | 19 | "github.com/cockroachdb/cockroach/pkg/util/metric"
|
18 | 20 | "github.com/cockroachdb/cockroach/pkg/util/syncutil"
|
| 21 | + "github.com/cockroachdb/cockroach/pkg/util/timeutil" |
19 | 22 | "github.com/cockroachdb/errors"
|
| 23 | + "github.com/cockroachdb/redact" |
20 | 24 | "github.com/google/btree"
|
21 | 25 | io_prometheus_client "github.com/prometheus/client_model/go"
|
22 | 26 | )
|
23 | 27 |
|
24 | 28 | var delimiter = []byte{'_'}
|
25 | 29 |
|
26 | 30 | const (
|
27 |
| - dbLabel = "database" |
28 |
| - appLabel = "application_name" |
29 |
| - cacheSize = 5000 |
30 |
| - childMetricTTL = 20 * time.Second |
| 31 | + dbLabel = "database" |
| 32 | + appLabel = "application_name" |
| 33 | + cacheSize = 5000 |
| 34 | + retentionTimeTillEviction = 20 * time.Second |
31 | 35 | )
|
32 | 36 |
|
| 37 | +// This is a no-op context used during logging. |
| 38 | +var noOpCtx = context.TODO() |
| 39 | + |
33 | 40 | // Builder is used to ease constructing metrics with the same labels.
|
34 | 41 | type Builder struct {
|
35 | 42 | labels []string
|
@@ -99,6 +106,41 @@ func (cs *childSet) initWithBTreeStorageType(labels []string) {
|
99 | 106 | }
|
100 | 107 | }
|
101 | 108 |
|
| 109 | +func (cs *childSet) initWithCacheStorageType(labels []string, metricName string) { |
| 110 | + cs.labels = labels |
| 111 | + |
| 112 | + cs.mu.children = &UnorderedCacheWrapper{ |
| 113 | + cache: cache.NewUnorderedCache(cache.Config{ |
| 114 | + Policy: cache.CacheLRU, |
| 115 | + ShouldEvict: func(size int, key, value any) bool { |
| 116 | + if childMetric, ok := value.(ChildMetric); ok { |
| 117 | + // Check if the child metric has exceeded 20 seconds and cache size is greater than 5000 |
| 118 | + if labelSliceCachedChildMetric, ok := childMetric.(LabelSliceCachedChildMetric); ok { |
| 119 | + currentTime := timeutil.Now() |
| 120 | + age := currentTime.Sub(labelSliceCachedChildMetric.CreatedAt()) |
| 121 | + return size > cacheSize && age > retentionTimeTillEviction |
| 122 | + } |
| 123 | + } |
| 124 | + return size > cacheSize |
| 125 | + }, |
| 126 | + OnEvictedEntry: func(entry *cache.Entry) { |
| 127 | + if childMetric, ok := entry.Value.(ChildMetric); ok { |
| 128 | + labelValues := childMetric.labelValues() |
| 129 | + |
| 130 | + // log metric name and label values of evicted entry |
| 131 | + log.Dev.Infof(noOpCtx, "evicted child of metric %s with label values: %s\n", |
| 132 | + redact.SafeString(metricName), redact.SafeString(strings.Join(labelValues, ","))) |
| 133 | + |
| 134 | + // Invoke DecrementAndDeleteIfZero from ChildMetric which relies on LabelSliceCache |
| 135 | + if boundedChild, ok := childMetric.(LabelSliceCachedChildMetric); ok { |
| 136 | + boundedChild.DecrementLabelSliceCacheReference() |
| 137 | + } |
| 138 | + } |
| 139 | + }, |
| 140 | + }), |
| 141 | + } |
| 142 | +} |
| 143 | + |
102 | 144 | func getCacheStorage() *cache.UnorderedCache {
|
103 | 145 | cacheStorage := cache.NewUnorderedCache(cache.Config{
|
104 | 146 | Policy: cache.CacheLRU,
|
@@ -165,6 +207,75 @@ func (cs *childSet) get(labelVals ...string) (ChildMetric, bool) {
|
165 | 207 | return cs.mu.children.Get(labelVals...)
|
166 | 208 | }
|
167 | 209 |
|
| 210 | +func (cs *childSet) getOrAddWithLabelSliceCache( |
| 211 | + metricName string, |
| 212 | + createFn func(key uint64, cache *metric.LabelSliceCache) LabelSliceCachedChildMetric, |
| 213 | + labelSliceCache *metric.LabelSliceCache, |
| 214 | + labelVals ...string, |
| 215 | +) ChildMetric { |
| 216 | + // Validate label values count |
| 217 | + if len(labelVals) != len(cs.labels) { |
| 218 | + if log.V(2) { |
| 219 | + log.Dev.Errorf(noOpCtx, |
| 220 | + "cannot add child with %d label values %v to metric %s with %d labels %s", |
| 221 | + len(labelVals), redact.SafeString(metricName), redact.SafeString(strings.Join(labelVals, ",")), |
| 222 | + len(cs.labels), redact.SafeString(strings.Join(cs.labels, ","))) |
| 223 | + } |
| 224 | + return nil |
| 225 | + } |
| 226 | + |
| 227 | + cs.mu.Lock() |
| 228 | + defer cs.mu.Unlock() |
| 229 | + |
| 230 | + // Create a LabelSliceCacheKey from the label. |
| 231 | + key := metricKey(labelVals...) |
| 232 | + |
| 233 | + // Check if the child already exists |
| 234 | + if child, ok := cs.mu.children.GetValue(key); ok { |
| 235 | + return child |
| 236 | + } |
| 237 | + |
| 238 | + // Create and add the new child |
| 239 | + child := createFn(key, labelSliceCache) |
| 240 | + err := cs.mu.children.AddKey(key, child) |
| 241 | + if err != nil { |
| 242 | + if log.V(2) { |
| 243 | + log.Dev.Errorf(context.TODO(), "child metric creation failed for metric %s with error %v", redact.SafeString(metricName), err) |
| 244 | + } |
| 245 | + return nil |
| 246 | + } |
| 247 | + return child |
| 248 | +} |
| 249 | + |
| 250 | +// EachWithLabels is a generic implementation for iterating over child metrics and building prometheus metrics. |
| 251 | +// This can be used by any aggregate metric type that embeds childSet. |
| 252 | +func (cs *childSet) EachWithLabels( |
| 253 | + labels []*io_prometheus_client.LabelPair, |
| 254 | + f func(metric *io_prometheus_client.Metric), |
| 255 | + labelCache *metric.LabelSliceCache, |
| 256 | +) { |
| 257 | + cs.mu.Lock() |
| 258 | + defer cs.mu.Unlock() |
| 259 | + |
| 260 | + cs.mu.children.ForEach(func(cm ChildMetric) { |
| 261 | + m := cm.ToPrometheusMetric() |
| 262 | + childLabels := make([]*io_prometheus_client.LabelPair, 0, len(labels)+len(cs.labels)) |
| 263 | + childLabels = append(childLabels, labels...) |
| 264 | + lvs := cm.labelValues() |
| 265 | + key := metricKey(lvs...) |
| 266 | + labelValueCacheValues, _ := labelCache.Get(metric.LabelSliceCacheKey(key)) |
| 267 | + for i := range cs.labels { |
| 268 | + childLabels = append(childLabels, &io_prometheus_client.LabelPair{ |
| 269 | + Name: &cs.labels[i], |
| 270 | + Value: &labelValueCacheValues.LabelValues[i], |
| 271 | + }) |
| 272 | + } |
| 273 | + |
| 274 | + m.Label = childLabels |
| 275 | + f(m) |
| 276 | + }) |
| 277 | +} |
| 278 | + |
168 | 279 | // clear method removes all children from the childSet. It does not reset parent metric values.
|
169 | 280 | // Method should cautiously be used when childSet is reinitialised/updated. Today, it is
|
170 | 281 | // only used when cluster settings are updated to support app and db label values. For normal
|
@@ -321,6 +432,7 @@ type ChildMetric interface {
|
321 | 432 | type LabelSliceCachedChildMetric interface {
|
322 | 433 | ChildMetric
|
323 | 434 | CreatedAt() time.Time
|
| 435 | + DecrementLabelSliceCacheReference() |
324 | 436 | }
|
325 | 437 |
|
326 | 438 | type labelValuer interface {
|
@@ -368,7 +480,7 @@ func (ucw *UnorderedCacheWrapper) GetValue(key uint64) (ChildMetric, bool) {
|
368 | 480 |
|
369 | 481 | func (ucw *UnorderedCacheWrapper) AddKey(key uint64, metric ChildMetric) error {
|
370 | 482 | if _, ok := ucw.cache.Get(key); ok {
|
371 |
| - return errors.Newf("child %v already exists\n", metric.labelValues()) |
| 483 | + return errors.Newf("child %s already exists\n", redact.SafeString(strings.Join(metric.labelValues(), ","))) |
372 | 484 | }
|
373 | 485 | ucw.cache.Add(key, metric)
|
374 | 486 | return nil
|
|
0 commit comments