Skip to content

Commit b21192f

Browse files
committed
update dashboards and claud instructions
1 parent 72d26f0 commit b21192f

File tree

6 files changed

+428
-803
lines changed

6 files changed

+428
-803
lines changed

claude/grafana-dashboard-sync-instructions.md

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ python3 claude/tools/sync_dashboard.py
4747

4848
This script will:
4949
- Use upstream dashboards as the base structure
50-
- Add K8s variables (env, pod, service) to all dashboards
51-
- Transform all PromQL queries to use K8s label selectors
50+
- Add K8s variables (env, service) to all dashboards - NO pod variable
51+
- Transform all PromQL queries to use service-only label selectors
5252
- Preserve Scroll UIDs
5353
- Save updated dashboards to `etc/grafana/scroll/`
5454

@@ -95,7 +95,7 @@ Before committing:
9595
```
9696

9797
2. **Test each dashboard:**
98-
- [ ] Variables populate correctly (env, pod, service)
98+
- [ ] Variables populate correctly (env, service) - only 2 variables
9999
- [ ] All panels display data
100100
- [ ] No query errors
101101
- [ ] New panels work as expected
@@ -135,52 +135,63 @@ git push
135135

136136
### Standard K8s Variables
137137

138-
All Scroll dashboards must include these variables:
138+
All Scroll dashboards must include these variables (2 only - NO pod variable):
139139

140140
```json
141141
{
142142
"name": "env",
143143
"type": "query",
144-
"query": "label_values(env)",
145-
"label": "Environment"
146-
}
147-
148-
{
149-
"name": "pod",
150-
"type": "query",
151-
"query": "label_values(pod)",
152-
"label": "Pod",
153-
"multi": true,
154-
"includeAll": true
144+
"definition": "label_values(env)",
145+
"query": {
146+
"qryType": 1,
147+
"query": "label_values(env)",
148+
"refId": "PrometheusVariableQueryEditor-VariableQuery"
149+
},
150+
"regex": "(sepolia|mainnet)-eks.*"
155151
}
156152

157153
{
158154
"name": "service",
159155
"type": "query",
160-
"query": "label_values(reth_info{namespace=\"$env\"},service)",
161-
"label": "Service"
156+
"definition": "label_values(reth_info{namespace=\"$env\"},service)",
157+
"query": {
158+
"qryType": 1,
159+
"query": "label_values(reth_info{namespace=\"$env\"},service)",
160+
"refId": "PrometheusVariableQueryEditor-VariableQuery"
161+
},
162+
"regex": "(l[1|2]reth.*)"
162163
}
163164
```
164165

166+
**Important:** No `pod` variable - queries aggregate by service only, enabling data continuity when pods are replaced.
167+
165168
### Query Transformation Rules
166169

167170
The sync script applies these transformations:
168171

169172
| Upstream Pattern | Scroll Pattern (K8s) |
170173
|------------------|----------------------|
171-
| `$instance_label="$instance"` | `service=~"$service", pod="$pod"` |
172-
| `instance="$instance"` | `service="$service", pod="$pod"` |
173-
| `instance=~"$instance"` | `service=~"$service", pod="$pod"` |
174+
| `$instance_label="$instance"` | `service=~"$service"` |
175+
| `instance="$instance"` | `service=~"$service"` |
176+
| `instance=~"$instance"` | `service=~"$service"` |
174177

175178
**Example:**
176179
```promql
177180
# Upstream:
178181
reth_database_operation_duration{$instance_label="$instance", quantile="0.99"}
179182
180183
# Scroll (after transformation):
181-
reth_database_operation_duration{service=~"$service", pod="$pod", quantile="0.99"}
184+
reth_database_operation_duration{service=~"$service", quantile="0.99"}
182185
```
183186

187+
### Data Continuity Feature
188+
189+
By using **service-only** filtering (no pod label), dashboards maintain historical data when pods are replaced:
190+
- Old pod dies → new pod starts with different name
191+
- Both pods share the same `service` label
192+
- Queries aggregate across all pods for that service
193+
- Historical data remains visible seamlessly
194+
184195
## Handling Special Cases
185196

186197
### New Upstream Dashboards (Case B)
@@ -387,4 +398,5 @@ For questions about this process:
387398
---
388399

389400
**Last updated:** 2025-12-01
390-
**Last sync:** 2025-12-01 (Initial convergence with upstream)
401+
**Last sync:** 2025-12-01 (Converged with upstream, service-only pattern for data continuity)
402+
**Pattern:** 2 variables (env, service) - NO pod variable - enables seamless pod replacement

claude/tools/sync_dashboard.py

Lines changed: 22 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,14 @@
1212
from copy import deepcopy
1313

1414
def add_k8s_variables(dashboard: Dict, preserve_uid: str = None) -> Dict:
15-
"""Add standard K8s variables to dashboard templating"""
15+
"""Add standard K8s variables to dashboard templating (env and service only)"""
1616
k8s_vars = [
1717
{
1818
"current": {
19-
"selected": False,
20-
"text": "default",
21-
"value": "default"
19+
"text": "mainnet",
20+
"value": "mainnet"
2221
},
23-
"hide": 0,
24-
"includeAll": False,
25-
"label": "Environment",
26-
"multi": False,
22+
"definition": "label_values(env)",
2723
"name": "env",
2824
"options": [],
2925
"query": {
@@ -32,44 +28,15 @@ def add_k8s_variables(dashboard: Dict, preserve_uid: str = None) -> Dict:
3228
"refId": "PrometheusVariableQueryEditor-VariableQuery"
3329
},
3430
"refresh": 1,
35-
"regex": "",
36-
"skipUrlSync": False,
37-
"sort": 0,
31+
"regex": "(sepolia|mainnet)-eks.*",
3832
"type": "query"
3933
},
4034
{
4135
"current": {
42-
"selected": False,
43-
"text": "All",
44-
"value": "$__all"
36+
"text": "l1reth-el-0",
37+
"value": "l1reth-el-0"
4538
},
46-
"hide": 0,
47-
"includeAll": True,
48-
"label": "Pod",
49-
"multi": True,
50-
"name": "pod",
51-
"options": [],
52-
"query": {
53-
"qryType": 1,
54-
"query": "label_values(pod)",
55-
"refId": "PrometheusVariableQueryEditor-VariableQuery"
56-
},
57-
"refresh": 1,
58-
"regex": "",
59-
"skipUrlSync": False,
60-
"sort": 0,
61-
"type": "query"
62-
},
63-
{
64-
"current": {
65-
"selected": False,
66-
"text": "",
67-
"value": ""
68-
},
69-
"hide": 0,
70-
"includeAll": False,
71-
"label": "Service",
72-
"multi": False,
39+
"definition": "label_values(reth_info{namespace=\"$env\"},service)",
7340
"name": "service",
7441
"options": [],
7542
"query": {
@@ -78,22 +45,17 @@ def add_k8s_variables(dashboard: Dict, preserve_uid: str = None) -> Dict:
7845
"refId": "PrometheusVariableQueryEditor-VariableQuery"
7946
},
8047
"refresh": 1,
81-
"regex": "",
82-
"skipUrlSync": False,
83-
"sort": 0,
48+
"regex": "(l[1|2]reth.*)",
8449
"type": "query"
8550
}
8651
]
8752

8853
if 'templating' not in dashboard:
8954
dashboard['templating'] = {'list': []}
9055

91-
# Remove any existing env, pod, service variables to avoid duplicates
92-
existing_vars = [v for v in dashboard['templating']['list']
93-
if v.get('name') not in ['env', 'pod', 'service']]
94-
95-
# Add K8s variables at the beginning
96-
dashboard['templating']['list'] = k8s_vars + existing_vars
56+
# Replace ALL variables with ONLY K8s variables (env, pod, service)
57+
# This ensures we only have the 3 required K8s variables
58+
dashboard['templating']['list'] = k8s_vars
9759

9860
# Preserve scroll UID if provided
9961
if preserve_uid:
@@ -103,67 +65,65 @@ def add_k8s_variables(dashboard: Dict, preserve_uid: str = None) -> Dict:
10365

10466
def transform_query(query: str) -> str:
10567
"""
106-
Transform PromQL query to use K8s labels
107-
Handles various patterns of instance label usage
68+
Transform PromQL query to use K8s labels (service only, no pod)
69+
This enables data continuity when pods are replaced
10870
"""
10971
if not query or not isinstance(query, str):
11072
return query
11173

112-
original = query
113-
11474
# Pattern 1: $instance_label="$instance" or $instance_label=~"$instance"
11575
query = re.sub(
11676
r'\$instance_label\s*=~?\s*["\']?\$instance["\']?',
117-
'service=~"$service", pod="$pod"',
77+
'service=~"$service"',
11878
query
11979
)
12080

12181
# Pattern 2: instance="$instance" or instance=~"$instance" (direct usage)
12282
query = re.sub(
12383
r'instance\s*=~?\s*["\']?\$instance["\']?',
124-
'service="$service", pod="$pod"',
84+
'service=~"$service"',
12585
query
12686
)
12787

12888
# Pattern 3: {$instance_label="$instance"} at start of label set
12989
query = re.sub(
13090
r'\{\s*\$instance_label\s*=~?\s*["\']?\$instance["\']?\s*,',
131-
'{service=~"$service", pod="$pod",',
91+
'{service=~"$service",',
13292
query
13393
)
13494

13595
# Pattern 4: {instance="$instance"} at start of label set
13696
query = re.sub(
13797
r'\{\s*instance\s*=~?\s*["\']?\$instance["\']?\s*,',
138-
'{service="$service", pod="$pod",',
98+
'{service=~"$service",',
13999
query
140100
)
141101

142102
# Pattern 5: , $instance_label="$instance"} at end of label set
143103
query = re.sub(
144104
r',\s*\$instance_label\s*=~?\s*["\']?\$instance["\']?\s*\}',
145-
', service=~"$service", pod="$pod"}',
105+
', service=~"$service"}',
146106
query
147107
)
148108

149109
# Pattern 6: , instance="$instance"} at end of label set
150110
query = re.sub(
151111
r',\s*instance\s*=~?\s*["\']?\$instance["\']?\s*\}',
152-
', service="$service", pod="$pod"}',
112+
', service=~"$service"}',
153113
query
154114
)
155115

156116
# Pattern 7: {$instance_label="$instance"} as only label
157117
query = re.sub(
158118
r'\{\s*\$instance_label\s*=~?\s*["\']?\$instance["\']?\s*\}',
159-
'{service="$service", pod="$pod"}',
119+
'{service=~"$service"}',
160120
query
161121
)
162122

163123
# Pattern 8: {instance="$instance"} as only label
164124
query = re.sub(
165125
r'\{\s*instance\s*=~?\s*["\']?\$instance["\']?\s*\}',
166-
'{service="$service", pod="$pod"}',
126+
'{service=~"$service"}',
167127
query
168128
)
169129

0 commit comments

Comments
 (0)