Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions internal/datastore/crdb/pool/balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ import (
var (
connectionsPerCRDBNodeCountGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "crdb_connections_per_node",
Help: "the number of connections spicedb has to each crdb node",
Help: "The number of active connections SpiceDB holds to each CockroachDB node, by pool (read/write). Imbalanced values across nodes suggest the connection balancer is unable to redistribute connections evenly.",
}, []string{"pool", "node_id"})

pruningTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "crdb_pruning_duration",
Help: "milliseconds spent on one iteration of pruning excess connections",
Help: "Duration in milliseconds of one iteration of the CockroachDB connection balancer pruning excess connections from over-represented nodes. Elevated values indicate the balancer is struggling to rebalance connections.",
Buckets: []float64{.1, .2, .5, 1, 2, 5, 10, 20, 50, 100},
}, []string{"pool"})
)
Expand Down
2 changes: 1 addition & 1 deletion internal/datastore/crdb/pool/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type pgxPool interface {

var resetHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "crdb_client_resets",
Help: "cockroachdb client-side tx reset distribution",
Help: "Distribution of the number of client-side transaction restarts per transaction attempt. Restarts occur when CockroachDB returns a serialization failure (40001) and the driver retries the transaction from scratch. Sustained high values indicate transaction contention.",
Buckets: []float64{0, 1, 2, 5, 10, 20, 50},
})

Expand Down
13 changes: 0 additions & 13 deletions internal/datastore/crdb/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"time"

"github.com/jackc/pgx/v5"
"github.com/prometheus/client_golang/prometheus"
"google.golang.org/protobuf/types/known/structpb"
"google.golang.org/protobuf/types/known/timestamppb"

Expand All @@ -37,18 +36,6 @@ const (
queryChangefeedPreV22 = "EXPERIMENTAL CHANGEFEED FOR %s WITH updated, cursor = '%s', resolved = '%s';"
)

var retryHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI not written

Namespace: "spicedb",
Subsystem: "datastore",
Name: "crdb_watch_retries",
Help: "watch retry distribution",
Buckets: []float64{0, 1, 2, 5, 10, 20, 50},
})

func init() {
prometheus.MustRegister(retryHistogram)
}

type changeDetails struct {
Resolved string
Updated string
Expand Down
2 changes: 1 addition & 1 deletion internal/datastore/proxy/observable.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ var (
Subsystem: "datastore",
Name: "loaded_relationships_count",
Buckets: []float64{0, 1, 3, 10, 32, 100, 316, 1000, 3162, 10000},
Help: "total number of relationships loaded for a query",
Help: "Histogram of the number of relationships loaded per individual datastore query. High p99 values (>1000) may indicate broad permission checks or missing filters.",
})

queryLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Expand Down
6 changes: 3 additions & 3 deletions internal/datastore/proxy/schemacaching/watchingcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ var namespacesFallbackModeGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "spicedb",
Subsystem: "datastore",
Name: "watching_schema_cache_namespaces_fallback_mode",
Help: "value of 1 if the cache is in fallback mode and 0 otherwise",
Help: "Whether the watching schema cache for namespace definitions is in fallback mode (1) or normal mode (0). Fallback is triggered when the CockroachDB changefeed used to track schema updates becomes unavailable; in this state every schema lookup hits the datastore directly.",
})

var caveatsFallbackModeGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "spicedb",
Subsystem: "datastore",
Name: "watching_schema_cache_caveats_fallback_mode",
Help: "value of 1 if the cache is in fallback mode and 0 otherwise",
Help: "Whether the watching schema cache for caveat definitions is in fallback mode (1) or normal mode (0). Fallback is triggered when the CockroachDB changefeed used to track schema updates becomes unavailable; in this state every schema lookup hits the datastore directly.",
})

var schemaCacheRevisionGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "spicedb",
Subsystem: "datastore",
Name: "watching_schema_cache_tracked_revision",
Help: "the currently tracked max revision for the schema cache",
Help: "The current maximum revision tracked by the CockroachDB changefeed-backed schema cache. A value that is not advancing over time indicates the changefeed has stalled.",
})

var definitionsReadCachedCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Expand Down
13 changes: 0 additions & 13 deletions internal/datastore/spanner/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"cloud.google.com/go/spanner"
sppb "cloud.google.com/go/spanner/apiv1/spannerpb"
"github.com/cloudspannerecosystem/spanner-change-streams-tail/changestreams"
"github.com/prometheus/client_golang/prometheus"
"github.com/puzpuzpuz/xsync/v4"
"google.golang.org/api/option"

Expand All @@ -28,18 +27,6 @@ const (
CombinedChangeStreamName = "combined_change_stream"
)

var retryHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI not written

Namespace: "spicedb",
Subsystem: "datastore",
Name: "spanner_watch_retries",
Help: "watch retry distribution",
Buckets: []float64{0, 1, 2, 5, 10, 20, 50},
})

func init() {
prometheus.MustRegister(retryHistogram)
}

// Copied from the spanner library: https://github.com/googleapis/google-cloud-go/blob/f03779538f949fb4ad93d5247d3c6b3e5b21091a/spanner/client.go#L67
// License: Apache License, Version 2.0, Copyright 2017 Google LLC
var validDBPattern = regexp.MustCompile("^projects/(?P<project>[^/]+)/instances/(?P<instance>[^/]+)/databases/(?P<database>[^/]+)$")
Expand Down
6 changes: 6 additions & 0 deletions internal/dispatch/caching/caching.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,33 +62,39 @@ func NewCachingDispatcher(cacheInst cache.Cache[keys.DispatchCacheKey, any], met
Namespace: prometheusNamespace,
Subsystem: prometheusSubsystem,
Name: "check_total",
Help: "Total number of CheckPermission dispatch requests processed.",
})
checkFromCacheCounter := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Subsystem: prometheusSubsystem,
Name: "check_from_cache_total",
Help: "Total number of CheckPermission dispatch requests served directly from the dispatch cache, avoiding re-computation.",
})

lookupResourcesTotalCounter := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Subsystem: prometheusSubsystem,
Name: "lookup_resources_total",
Help: "Total number of LookupResources dispatch requests processed.",
})
lookupResourcesFromCacheCounter := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Subsystem: prometheusSubsystem,
Name: "lookup_resources_from_cache_total",
Help: "Total number of LookupResources dispatch requests served directly from the dispatch cache.",
})

lookupSubjectsTotalCounter := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Subsystem: prometheusSubsystem,
Name: "lookup_subjects_total",
Help: "Total number of LookupSubjects dispatch requests processed.",
})
lookupSubjectsFromCacheCounter := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Subsystem: prometheusSubsystem,
Name: "lookup_subjects_from_cache_total",
Help: "Total number of LookupSubjects dispatch requests served directly from the dispatch cache.",
})

if metricsEnabled && prometheusSubsystem != "" {
Expand Down
15 changes: 0 additions & 15 deletions internal/graph/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,9 @@ var dispatchChunkCountHistogram = prometheus.NewHistogram(prometheus.HistogramOp
Buckets: []float64{1, 2, 3, 5, 10, 25, 100, 250},
})

var directDispatchQueryHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI not useful, metric can only take 2 values

Name: "spicedb_check_direct_dispatch_query_count",
Help: "number of queries made per direct dispatch",
Buckets: []float64{1, 2},
})

const noOriginalRelation = ""

func init() {
prometheus.MustRegister(directDispatchQueryHistogram)
prometheus.MustRegister(dispatchChunkCountHistogram)
}

Expand Down Expand Up @@ -385,10 +378,6 @@ func (cc *ConcurrentChecker) checkDirect(ctx context.Context, crc currentRequest

// If the direct subject or a wildcard form can be found, issue a query for just that
// subject.
var queryCount float64
defer func() {
directDispatchQueryHistogram.Observe(queryCount)
}()

hasDirectSubject := totalDirectSubjects > 0
hasWildcardSubject := totalWildcardSubjects > 0
Expand Down Expand Up @@ -429,8 +418,6 @@ func (cc *ConcurrentChecker) checkDirect(ctx context.Context, crc currentRequest
if err != nil {
return checkResultError(NewCheckFailureErr(err), emptyMetadata)
}
queryCount += 1.0

// Find the matching subject(s).
for rel, err := range it {
if err != nil {
Expand Down Expand Up @@ -482,8 +469,6 @@ func (cc *ConcurrentChecker) checkDirect(ctx context.Context, crc currentRequest
if err != nil {
return checkResultError(NewCheckFailureErr(err), emptyMetadata)
}
queryCount += 1.0

// Build the set of subjects over which to dispatch, along with metadata for
// mapping over caveats (if any).
checksToDispatch := newCheckDispatchSet()
Expand Down
Loading