Skip to content

Commit b275f07

Browse files
authored
refactor: remove metrics that are not useful or not written to (#2941)
1 parent 70b40c6 commit b275f07

File tree

8 files changed

+13
-48
lines changed

8 files changed

+13
-48
lines changed

internal/datastore/crdb/pool/balancer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ import (
2323
var (
2424
connectionsPerCRDBNodeCountGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
2525
Name: "crdb_connections_per_node",
26-
Help: "the number of connections spicedb has to each crdb node",
26+
Help: "The number of active connections SpiceDB holds to each CockroachDB node, by pool (read/write). Imbalanced values across nodes suggest the connection balancer is unable to redistribute connections evenly.",
2727
}, []string{"pool", "node_id"})
2828

2929
pruningTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
3030
Name: "crdb_pruning_duration",
31-
Help: "milliseconds spent on one iteration of pruning excess connections",
31+
Help: "Duration in milliseconds of one iteration of the CockroachDB connection balancer pruning excess connections from over-represented nodes. Elevated values indicate the balancer is struggling to rebalance connections.",
3232
Buckets: []float64{.1, .2, .5, 1, 2, 5, 10, 20, 50, 100},
3333
}, []string{"pool"})
3434
)

internal/datastore/crdb/pool/pool.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ type pgxPool interface {
3030

3131
var resetHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
3232
Name: "crdb_client_resets",
33-
Help: "cockroachdb client-side tx reset distribution",
33+
Help: "Distribution of the number of client-side transaction restarts per transaction attempt. Restarts occur when CockroachDB returns a serialization failure (40001) and the driver retries the transaction from scratch. Sustained high values indicate transaction contention.",
3434
Buckets: []float64{0, 1, 2, 5, 10, 20, 50},
3535
})
3636

internal/datastore/crdb/watch.go

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"time"
1212

1313
"github.com/jackc/pgx/v5"
14-
"github.com/prometheus/client_golang/prometheus"
1514
"google.golang.org/protobuf/types/known/structpb"
1615
"google.golang.org/protobuf/types/known/timestamppb"
1716

@@ -37,18 +36,6 @@ const (
3736
queryChangefeedPreV22 = "EXPERIMENTAL CHANGEFEED FOR %s WITH updated, cursor = '%s', resolved = '%s';"
3837
)
3938

40-
var retryHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
41-
Namespace: "spicedb",
42-
Subsystem: "datastore",
43-
Name: "crdb_watch_retries",
44-
Help: "watch retry distribution",
45-
Buckets: []float64{0, 1, 2, 5, 10, 20, 50},
46-
})
47-
48-
func init() {
49-
prometheus.MustRegister(retryHistogram)
50-
}
51-
5239
type changeDetails struct {
5340
Resolved string
5441
Updated string

internal/datastore/proxy/observable.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ var (
2727
Subsystem: "datastore",
2828
Name: "loaded_relationships_count",
2929
Buckets: []float64{0, 1, 3, 10, 32, 100, 316, 1000, 3162, 10000},
30-
Help: "total number of relationships loaded for a query",
30+
Help: "Histogram of the number of relationships loaded per individual datastore query. High p99 values (>1000) may indicate broad permission checks or missing filters.",
3131
})
3232

3333
queryLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{

internal/datastore/proxy/schemacaching/watchingcache.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,21 @@ var namespacesFallbackModeGauge = prometheus.NewGauge(prometheus.GaugeOpts{
2323
Namespace: "spicedb",
2424
Subsystem: "datastore",
2525
Name: "watching_schema_cache_namespaces_fallback_mode",
26-
Help: "value of 1 if the cache is in fallback mode and 0 otherwise",
26+
Help: "Whether the watching schema cache for namespace definitions is in fallback mode (1) or normal mode (0). Fallback is triggered when the CockroachDB changefeed used to track schema updates becomes unavailable; in this state every schema lookup hits the datastore directly.",
2727
})
2828

2929
var caveatsFallbackModeGauge = prometheus.NewGauge(prometheus.GaugeOpts{
3030
Namespace: "spicedb",
3131
Subsystem: "datastore",
3232
Name: "watching_schema_cache_caveats_fallback_mode",
33-
Help: "value of 1 if the cache is in fallback mode and 0 otherwise",
33+
Help: "Whether the watching schema cache for caveat definitions is in fallback mode (1) or normal mode (0). Fallback is triggered when the CockroachDB changefeed used to track schema updates becomes unavailable; in this state every schema lookup hits the datastore directly.",
3434
})
3535

3636
var schemaCacheRevisionGauge = prometheus.NewGauge(prometheus.GaugeOpts{
3737
Namespace: "spicedb",
3838
Subsystem: "datastore",
3939
Name: "watching_schema_cache_tracked_revision",
40-
Help: "the currently tracked max revision for the schema cache",
40+
Help: "The current maximum revision tracked by the CockroachDB changefeed-backed schema cache. A value that is not advancing over time indicates the changefeed has stalled.",
4141
})
4242

4343
var definitionsReadCachedCounter = prometheus.NewCounterVec(prometheus.CounterOpts{

internal/datastore/spanner/watch.go

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"cloud.google.com/go/spanner"
1313
sppb "cloud.google.com/go/spanner/apiv1/spannerpb"
1414
"github.com/cloudspannerecosystem/spanner-change-streams-tail/changestreams"
15-
"github.com/prometheus/client_golang/prometheus"
1615
"github.com/puzpuzpuz/xsync/v4"
1716
"google.golang.org/api/option"
1817

@@ -28,18 +27,6 @@ const (
2827
CombinedChangeStreamName = "combined_change_stream"
2928
)
3029

31-
var retryHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
32-
Namespace: "spicedb",
33-
Subsystem: "datastore",
34-
Name: "spanner_watch_retries",
35-
Help: "watch retry distribution",
36-
Buckets: []float64{0, 1, 2, 5, 10, 20, 50},
37-
})
38-
39-
func init() {
40-
prometheus.MustRegister(retryHistogram)
41-
}
42-
4330
// Copied from the spanner library: https://github.com/googleapis/google-cloud-go/blob/f03779538f949fb4ad93d5247d3c6b3e5b21091a/spanner/client.go#L67
4431
// License: Apache License, Version 2.0, Copyright 2017 Google LLC
4532
var validDBPattern = regexp.MustCompile("^projects/(?P<project>[^/]+)/instances/(?P<instance>[^/]+)/databases/(?P<database>[^/]+)$")

internal/dispatch/caching/caching.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,33 +62,39 @@ func NewCachingDispatcher(cacheInst cache.Cache[keys.DispatchCacheKey, any], met
6262
Namespace: prometheusNamespace,
6363
Subsystem: prometheusSubsystem,
6464
Name: "check_total",
65+
Help: "Total number of CheckPermission dispatch requests processed.",
6566
})
6667
checkFromCacheCounter := prometheus.NewCounter(prometheus.CounterOpts{
6768
Namespace: prometheusNamespace,
6869
Subsystem: prometheusSubsystem,
6970
Name: "check_from_cache_total",
71+
Help: "Total number of CheckPermission dispatch requests served directly from the dispatch cache, avoiding re-computation.",
7072
})
7173

7274
lookupResourcesTotalCounter := prometheus.NewCounter(prometheus.CounterOpts{
7375
Namespace: prometheusNamespace,
7476
Subsystem: prometheusSubsystem,
7577
Name: "lookup_resources_total",
78+
Help: "Total number of LookupResources dispatch requests processed.",
7679
})
7780
lookupResourcesFromCacheCounter := prometheus.NewCounter(prometheus.CounterOpts{
7881
Namespace: prometheusNamespace,
7982
Subsystem: prometheusSubsystem,
8083
Name: "lookup_resources_from_cache_total",
84+
Help: "Total number of LookupResources dispatch requests served directly from the dispatch cache.",
8185
})
8286

8387
lookupSubjectsTotalCounter := prometheus.NewCounter(prometheus.CounterOpts{
8488
Namespace: prometheusNamespace,
8589
Subsystem: prometheusSubsystem,
8690
Name: "lookup_subjects_total",
91+
Help: "Total number of LookupSubjects dispatch requests processed.",
8792
})
8893
lookupSubjectsFromCacheCounter := prometheus.NewCounter(prometheus.CounterOpts{
8994
Namespace: prometheusNamespace,
9095
Subsystem: prometheusSubsystem,
9196
Name: "lookup_subjects_from_cache_total",
97+
Help: "Total number of LookupSubjects dispatch requests served directly from the dispatch cache.",
9298
})
9399

94100
if metricsEnabled && prometheusSubsystem != "" {

internal/graph/check.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,9 @@ var dispatchChunkCountHistogram = prometheus.NewHistogram(prometheus.HistogramOp
3939
Buckets: []float64{1, 2, 3, 5, 10, 25, 100, 250},
4040
})
4141

42-
var directDispatchQueryHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
43-
Name: "spicedb_check_direct_dispatch_query_count",
44-
Help: "number of queries made per direct dispatch",
45-
Buckets: []float64{1, 2},
46-
})
47-
4842
const noOriginalRelation = ""
4943

5044
func init() {
51-
prometheus.MustRegister(directDispatchQueryHistogram)
5245
prometheus.MustRegister(dispatchChunkCountHistogram)
5346
}
5447

@@ -385,10 +378,6 @@ func (cc *ConcurrentChecker) checkDirect(ctx context.Context, crc currentRequest
385378

386379
// If the direct subject or a wildcard form can be found, issue a query for just that
387380
// subject.
388-
var queryCount float64
389-
defer func() {
390-
directDispatchQueryHistogram.Observe(queryCount)
391-
}()
392381

393382
hasDirectSubject := totalDirectSubjects > 0
394383
hasWildcardSubject := totalWildcardSubjects > 0
@@ -429,8 +418,6 @@ func (cc *ConcurrentChecker) checkDirect(ctx context.Context, crc currentRequest
429418
if err != nil {
430419
return checkResultError(NewCheckFailureErr(err), emptyMetadata)
431420
}
432-
queryCount += 1.0
433-
434421
// Find the matching subject(s).
435422
for rel, err := range it {
436423
if err != nil {
@@ -482,8 +469,6 @@ func (cc *ConcurrentChecker) checkDirect(ctx context.Context, crc currentRequest
482469
if err != nil {
483470
return checkResultError(NewCheckFailureErr(err), emptyMetadata)
484471
}
485-
queryCount += 1.0
486-
487472
// Build the set of subjects over which to dispatch, along with metadata for
488473
// mapping over caveats (if any).
489474
checksToDispatch := newCheckDispatchSet()

0 commit comments

Comments
 (0)