Skip to content

Commit 70d22ed

Browse files
authored
GT-526 Use agency cache lock in metrics exporter (#1470)
1 parent 7819c2b commit 70d22ed

File tree

7 files changed

+83
-41
lines changed

7 files changed

+83
-41
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
- (Documentation) Update ArangoDeploymentReplication and ArangoLocalStorage CR auto-generated docs
1515
- (Feature) Add ArangoMember Message and extend ArangoMember CRD
1616
- (Documentation) Use OpenAPI-compatible type names in docs
17+
- (Improvement) Use agency cache lock in metrics exporter
1718

1819
## [1.2.34](https://github.com/arangodb/kube-arangodb/tree/1.2.34) (2023-10-16)
1920
- (Bugfix) Fix make manifests-crd-file command

pkg/deployment/agency/cache.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,11 @@ type Health interface {
144144

145145
type Cache interface {
146146
Reload(ctx context.Context, size int, clients Connections) (uint64, error)
147+
// Deprecated: Use Apply instead.
148+
// It can cause Read/Write error when state is reloaded.
147149
Data() (state.State, bool)
150+
// Apply applies a function to the current state. Returns true if function was applied
151+
Apply(f func(state.State)) bool
148152
DataDB() (state.DB, bool)
149153
CommitIndex() uint64
150154
// Health returns true when healthy object is available.
@@ -202,6 +206,10 @@ func (c cacheSingle) Reload(_ context.Context, _ int, _ Connections) (uint64, er
202206
return 0, nil
203207
}
204208

209+
func (c cacheSingle) Apply(f func(state.State)) bool {
210+
return false
211+
}
212+
205213
func (c cacheSingle) Data() (state.State, bool) {
206214
return state.State{}, true
207215
}
@@ -232,6 +240,19 @@ func (c *cache) CommitIndex() uint64 {
232240
return index
233241
}
234242

243+
func (c *cache) Apply(f func(state.State)) bool {
244+
c.lock.RLock()
245+
defer c.lock.RUnlock()
246+
247+
data, _, ok := c.loader.State()
248+
if !ok {
249+
return false
250+
}
251+
252+
f(data.Arango)
253+
return true
254+
}
255+
235256
func (c *cache) Data() (state.State, bool) {
236257
c.lock.RLock()
237258
defer c.lock.RUnlock()

pkg/deployment/deployment.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ func (d *Deployment) GetMembersState() memberState.StateInspector {
159159
return d.memberState
160160
}
161161

162+
func (d *Deployment) WithAgencyCache(action func(state.State)) bool {
163+
return d.agencyCache.Apply(action)
164+
}
165+
166+
// Deprecated: Use WithAgencyCache instead
162167
func (d *Deployment) GetAgencyCache() (state.State, bool) {
163168
return d.agencyCache.Data()
164169
}

pkg/deployment/old_metrics.go

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// DISCLAIMER
33
//
4-
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
4+
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
55
//
66
// Licensed under the Apache License, Version 2.0 (the "License");
77
// you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ import (
2424
"sync"
2525

2626
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
27+
"github.com/arangodb/kube-arangodb/pkg/deployment/agency/state"
2728
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
2829
"github.com/arangodb/kube-arangodb/pkg/generated/metric_descriptions"
2930
"github.com/arangodb/kube-arangodb/pkg/metrics/collector"
@@ -76,8 +77,8 @@ func (i *inventory) CollectMetrics(in metrics.PushMetric) {
7677

7778
deployment.CollectMetrics(in)
7879

79-
if state := deployment.acs.CurrentClusterCache(); state != nil {
80-
t := state.GetThrottles()
80+
if cacheInspector := deployment.acs.CurrentClusterCache(); cacheInspector != nil {
81+
t := cacheInspector.GetThrottles()
8182

8283
for _, c := range definitions.AllComponents() {
8384
in.Push(i.operatorStateRefreshMetric.Gauge(float64(t.Get(c).Count()), deployment.GetNamespace(), deployment.GetName(), string(c)))
@@ -92,58 +93,59 @@ func (i *inventory) CollectMetrics(in metrics.PushMetric) {
9293
}
9394

9495
if spec.Mode.Get().HasAgents() {
95-
agency, agencyOk := deployment.GetAgencyCache()
96-
if !agencyOk {
97-
in.Push(i.deploymentAgencyStateMetric.Gauge(0, deployment.GetNamespace(), deployment.GetName()))
98-
continue
99-
}
100-
101-
in.Push(i.deploymentAgencyStateMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName()))
102-
10396
if spec.Mode.Get() == api.DeploymentModeCluster {
104-
for db, collections := range agency.Current.Collections {
105-
dbName := db
106-
if features.SensitiveInformationProtection().Enabled() {
107-
dbName = "UNKNOWN"
108-
109-
if v, ok := agency.Plan.Databases[db]; ok && v.ID != "" {
110-
dbName = v.ID
97+
applied := deployment.WithAgencyCache(func(st state.State) {
98+
for db, collections := range st.Current.Collections {
99+
dbName := db
100+
if features.SensitiveInformationProtection().Enabled() {
101+
dbName = "UNKNOWN"
102+
103+
if v, ok := st.Plan.Databases[db]; ok && v.ID != "" {
104+
dbName = v.ID
105+
}
111106
}
112-
}
113107

114-
for collection, shards := range collections {
115-
for shard, details := range shards {
116-
for id, server := range details.Servers {
117-
collectionName := "UNKNOWN"
118-
if features.SensitiveInformationProtection().Enabled() {
119-
collectionName = collection
120-
} else {
121-
if _, ok := agency.Plan.Collections[db]; ok {
122-
if _, ok := agency.Plan.Collections[db][collection]; ok {
123-
collectionName = agency.Plan.Collections[db][collection].GetName(collectionName)
108+
for collection, shards := range collections {
109+
for shard, details := range shards {
110+
for id, server := range details.Servers {
111+
collectionName := "UNKNOWN"
112+
if features.SensitiveInformationProtection().Enabled() {
113+
collectionName = collection
114+
} else {
115+
if _, ok := st.Plan.Collections[db]; ok {
116+
if _, ok := st.Plan.Collections[db][collection]; ok {
117+
collectionName = st.Plan.Collections[db][collection].GetName(collectionName)
118+
}
124119
}
125120
}
126-
}
127121

128-
m := []string{
129-
deployment.GetNamespace(),
130-
deployment.GetName(),
131-
dbName,
132-
collectionName,
133-
shard,
134-
string(server),
135-
}
122+
m := []string{
123+
deployment.GetNamespace(),
124+
deployment.GetName(),
125+
dbName,
126+
collectionName,
127+
shard,
128+
string(server),
129+
}
136130

137-
if id == 0 {
138-
in.Push(i.deploymentShardLeadersMetric.Gauge(1, m...))
131+
if id == 0 {
132+
in.Push(i.deploymentShardLeadersMetric.Gauge(1, m...))
133+
}
134+
in.Push(i.deploymentShardsMetric.Gauge(1, m...))
139135
}
140-
in.Push(i.deploymentShardsMetric.Gauge(1, m...))
141136
}
142137
}
143138
}
139+
})
140+
141+
if !applied {
142+
in.Push(i.deploymentAgencyStateMetric.Gauge(0, deployment.GetNamespace(), deployment.GetName()))
143+
continue
144144
}
145+
145146
}
146147
}
148+
in.Push(i.deploymentAgencyStateMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName()))
147149
}
148150
}
149151
}

pkg/deployment/reconcile/action_context.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ func (ac *actionContext) ShardsInSyncMap() (state.ShardsSyncStatus, bool) {
260260
return ac.context.ShardsInSyncMap()
261261
}
262262

263+
func (ac *actionContext) WithAgencyCache(action func(state.State)) bool {
264+
return ac.context.WithAgencyCache(action)
265+
}
266+
263267
func (ac *actionContext) GetAgencyCache() (state.State, bool) {
264268
return ac.context.GetAgencyCache()
265269
}

pkg/deployment/reconcile/plan_builder_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ func (c *testContext) GenerateMemberEndpoint(group api.ServerGroup, member api.M
176176
return pod2.GenerateMemberEndpoint(c.Inspector, c.ArangoDeployment, c.ArangoDeployment.Spec, group, member)
177177
}
178178

179+
func (c *testContext) WithAgencyCache(action func(state.State)) bool {
180+
return false
181+
}
182+
179183
func (c *testContext) GetAgencyCache() (state.State, bool) {
180184
return state.State{}, true
181185
}

pkg/deployment/reconciler/context.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ type DeploymentImageManager interface {
9696
}
9797

9898
type ArangoAgencyGet interface {
99+
// WithAgencyCache executes the given action with the agency cache using a Read lock. Returns true if action was applied
100+
WithAgencyCache(action func(state.State)) bool
101+
// GetAgencyCache returns the agency cache.
102+
// It can cause to Read/Write error when state is reloaded.
103+
// It is recommended to use WithAgencyCache instead.
99104
GetAgencyCache() (state.State, bool)
100105
GetAgencyArangoDBCache() (state.DB, bool)
101106
GetAgencyHealth() (agencyCache.Health, bool)

0 commit comments

Comments
 (0)