Skip to content

Commit 00c164f

Browse files
committed
Add feature to display datasource breakdown
This can specifically be used to find unused data sources.
1 parent 54828b6 commit 00c164f

File tree

8 files changed

+239
-7
lines changed

8 files changed

+239
-7
lines changed

CHANGES.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,14 @@ grafana-wtf changelog
55

66
in progress
77
===========
8-
- Upgrade to ``colored==1.4.3``
8+
9+
2021-12-10 0.11.0
10+
=================
11+
- Upgrade to ``colored==1.4.3``. Thanks, @dslackw!
912
- Tests: Use ``.env`` file for propagating environment variables to Docker Compose
1013
- CI/GHA test matrix: Use Grafana 7.5.11 and 8.3.1 and add Python 3.10
14+
- Add feature to display datasource breakdown, specifically for finding unused
15+
data sources. Thanks, @chenlujjj!
1116

1217
2021-10-01 0.10.0
1318
=================

grafana_wtf/commands.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from grafana_wtf.core import GrafanaSearch
1515
from grafana_wtf.report import WtfReport
1616
from grafana_wtf.tabular_report import TabularReport
17-
from grafana_wtf.util import normalize_options, setup_logging, configure_http_logging, read_list
17+
from grafana_wtf.util import normalize_options, setup_logging, configure_http_logging, read_list, yaml_dump
1818

1919
log = logging.getLogger(__name__)
2020

@@ -25,6 +25,7 @@ def run():
2525
grafana-wtf [options] find [<search-expression>]
2626
grafana-wtf [options] replace <search-expression> <replacement>
2727
grafana-wtf [options] log [<dashboard_uid>] [--number=<count>]
28+
grafana-wtf [options] datasource-breakdown
2829
grafana-wtf --version
2930
grafana-wtf (-h | --help)
3031
@@ -92,6 +93,13 @@ def run():
9293
# Output full history table in Grid format
9394
grafana-wtf log --format=tabular:grid
9495
96+
Breakdown examples:
97+
98+
# Display all data sources and the dashboards using them, as well as unused data sources.
99+
grafana-wtf datasource-breakdown --format=yaml
100+
101+
# Display names of unused datasources as a flat list.
102+
grafana-wtf datasource-breakdown --format=json | jq -r '.unused[].datasource.name'
95103
96104
"""
97105

@@ -186,6 +194,24 @@ def run():
186194

187195
print(output)
188196

197+
if options.datasource_breakdown:
198+
results = engine.datasource_breakdown()
199+
200+
unused_count = len(results["unused"])
201+
if unused_count:
202+
log.warning(f"Found {unused_count} unused data source(s)")
203+
204+
if output_format == "json":
205+
output = json.dumps(results, indent=4)
206+
207+
elif output_format == "yaml":
208+
output = yaml_dump(results)
209+
210+
else:
211+
raise ValueError(f"Unknown output format \"{output_format}\"")
212+
213+
print(output)
214+
189215

190216
def get_table_format(output_format):
191217
tablefmt = None

grafana_wtf/core.py

Lines changed: 108 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# (c) 2019 Andreas Motl <[email protected]>
33
# License: GNU Affero General Public License, Version 3
44
import json
5+
from pprint import pprint
6+
57
import colored
68
import logging
79
import asyncio
@@ -13,6 +15,7 @@
1315
from urllib.parse import urlparse, urljoin
1416
from concurrent.futures.thread import ThreadPoolExecutor
1517

18+
from grafana_wtf.model import DatasourceBreakdownItem
1619
from grafana_wtf.monkey import monkeypatch_grafana_api
1720
# Apply monkeypatch to grafana-api
1821
# https://github.com/m0nhawk/grafana_api/pull/85/files
@@ -61,23 +64,30 @@ def clear_cache(self):
6164
def enable_concurrency(self, concurrency):
6265
self.concurrency = concurrency
6366

64-
def setup(self):
65-
url = urlparse(self.grafana_url)
67+
@staticmethod
68+
def grafana_client_factory(grafana_url, grafana_token=None):
69+
url = urlparse(grafana_url)
6670

6771
# Grafana API Key auth
68-
if self.grafana_token:
69-
auth = self.grafana_token
72+
if grafana_token:
73+
auth = grafana_token
7074

7175
# HTTP basic auth
7276
else:
7377
username = url.username or 'admin'
7478
password = url.password or 'admin'
7579
auth = (username, password)
7680

77-
self.grafana = GrafanaFace(
81+
grafana = GrafanaFace(
7882
auth, protocol=url.scheme,
7983
host=url.hostname, port=url.port, url_path_prefix=url.path.lstrip('/'))
8084

85+
return grafana
86+
87+
def setup(self):
88+
89+
self.grafana = self.grafana_client_factory(self.grafana_url, grafana_token=self.grafana_token)
90+
8191
# Configure a larger HTTP request pool.
8292
# Todo: Review the pool settings and eventually adjust according to concurrency level or other parameters.
8393
# https://urllib3.readthedocs.io/en/latest/advanced-usage.html#customizing-pool-behavior
@@ -175,6 +185,7 @@ def scan_datasources(self):
175185
try:
176186
self.data.datasources = munchify(self.grafana.datasource.list_datasources())
177187
log.info('Found {} data sources'.format(len(self.data.datasources)))
188+
return self.data.datasources
178189
except GrafanaClientError as ex:
179190
message = '{name}: {ex}'.format(name=ex.__class__.__name__, ex=ex)
180191
log.error(self.get_red_message(message))
@@ -218,6 +229,8 @@ def scan_dashboards(self, dashboard_uids=None):
218229
if self.progressbar:
219230
self.taqadum.close()
220231

232+
return self.data.dashboards
233+
221234
def handle_grafana_error(self, ex):
222235
message = '{name}: {ex}'.format(name=ex.__class__.__name__, ex=ex)
223236
message = colored.stylize(message, colored.fg("red") + colored.attr("bold"))
@@ -272,3 +285,93 @@ def get_dashboard_versions(self, dashboard_id):
272285
get_dashboard_versions_path = '/dashboards/id/%s/versions' % dashboard_id
273286
r = self.grafana.dashboard.api.GET(get_dashboard_versions_path)
274287
return r
288+
289+
def datasource_breakdown(self):
290+
291+
# Prepare indexes, mapping dashboards by uid, datasources by name
292+
# as well as dashboards to datasources and vice versa.
293+
ix = Indexer(engine=self)
294+
295+
# Compute list of breakdown items, associating datasources with the dashboards that use them.
296+
results_used = []
297+
results_unused = []
298+
for name in sorted(ix.datasource_by_name):
299+
datasource = ix.datasource_by_name[name]
300+
dashboard_uids = ix.datasource_dashboard_index.get(name, [])
301+
dashboards = list(map(ix.dashboard_by_uid.get, dashboard_uids))
302+
item = DatasourceBreakdownItem(datasource=datasource, used_in=dashboards, grafana_url=self.grafana_url)
303+
304+
# Format results in a more compact form, using only a subset of all the attributes.
305+
result = item.format_compact()
306+
307+
if dashboard_uids:
308+
results_used.append(result)
309+
else:
310+
results_unused.append(result)
311+
312+
response = OrderedDict(
313+
used=results_used,
314+
unused=results_unused,
315+
)
316+
317+
return response
318+
319+
320+
class Indexer:
321+
322+
def __init__(self, engine: GrafanaSearch):
323+
self.engine = engine
324+
325+
# Prepare index data structures.
326+
self.dashboard_by_uid = {}
327+
self.datasource_by_name = {}
328+
self.dashboard_datasource_index = {}
329+
self.datasource_dashboard_index = {}
330+
331+
# Gather all data.
332+
self.dashboards = self.engine.scan_dashboards()
333+
self.datasources = self.engine.scan_datasources()
334+
335+
# Invoke indexer.
336+
self.index()
337+
338+
def index(self):
339+
self.index_dashboards()
340+
self.index_datasources()
341+
342+
@staticmethod
343+
def collect_datasource_names(root):
344+
return list(set([item.datasource for item in root if item.datasource]))
345+
346+
def index_dashboards(self):
347+
348+
self.dashboard_by_uid = {}
349+
self.dashboard_datasource_index = {}
350+
351+
for dashboard in self.dashboards:
352+
if dashboard.meta.isFolder:
353+
continue
354+
355+
# Index by uid.
356+
uid = dashboard.dashboard.uid
357+
self.dashboard_by_uid[uid] = dashboard
358+
359+
# Map to data source names.
360+
ds_panels = self.collect_datasource_names(dashboard.dashboard.panels)
361+
ds_annotations = self.collect_datasource_names(dashboard.dashboard.annotations.list)
362+
ds_templating = self.collect_datasource_names(dashboard.dashboard.templating.list)
363+
self.dashboard_datasource_index[uid] = list(sorted(set(ds_panels + ds_annotations + ds_templating)))
364+
365+
def index_datasources(self):
366+
367+
self.datasource_by_name = {}
368+
self.datasource_dashboard_index = {}
369+
370+
for datasource in self.datasources:
371+
name = datasource.name
372+
self.datasource_by_name[name] = datasource
373+
374+
for dashboard_uid, datasource_names in self.dashboard_datasource_index.items():
375+
for datasource_name in datasource_names:
376+
self.datasource_dashboard_index.setdefault(datasource_name, [])
377+
self.datasource_dashboard_index[datasource_name].append(dashboard_uid)

grafana_wtf/model.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import dataclasses
2+
from typing import List
3+
4+
from munch import Munch
5+
from collections import OrderedDict
6+
from urllib.parse import urljoin
7+
8+
9+
@dataclasses.dataclass
10+
class DatasourceBreakdownItem:
11+
datasource: Munch
12+
used_in: List[Munch]
13+
grafana_url: str
14+
15+
def format_compact(self):
16+
dsshort = OrderedDict(
17+
name=self.datasource.name,
18+
type=self.datasource.type,
19+
url=self.datasource.url,
20+
)
21+
item = OrderedDict(datasource=dsshort)
22+
for dashboard in self.used_in:
23+
item.setdefault("dashboards", [])
24+
dbshort = OrderedDict(
25+
title=dashboard.dashboard.title,
26+
uid=dashboard.dashboard.uid,
27+
path=dashboard.meta.url,
28+
url=urljoin(self.grafana_url, dashboard.meta.url),
29+
)
30+
item["dashboards"].append(dbshort)
31+
return item

grafana_wtf/util.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import sys
55
import json
66
import logging
7+
from collections import OrderedDict
8+
9+
import yaml
710
from munch import munchify
811
from jsonpath_rw import parse
912
from pygments import highlight
@@ -106,3 +109,22 @@ def find(self, needle, haystack):
106109
def prettify_json(data):
107110
json_str = json.dumps(data, indent=4)
108111
return highlight(json_str, JsonLexer(), TerminalFormatter())
112+
113+
114+
def yaml_dump(data, stream=None, Dumper=yaml.SafeDumper, **kwds):
115+
"""
116+
https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts
117+
"""
118+
119+
kwds["default_flow_style"] = False
120+
121+
class OrderedDumper(Dumper):
122+
pass
123+
124+
def _dict_representer(dumper, data):
125+
return dumper.represent_mapping(
126+
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
127+
data.items())
128+
129+
OrderedDumper.add_representer(OrderedDict, _dict_representer)
130+
return yaml.dump(data, stream, OrderedDumper, **kwds)

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
# Core
1111
'six',
12+
'dataclasses; python_version<"3.7"',
1213
'docopt>=0.6.2,<0.7',
1314
'munch>=2.5.0,<3',
1415
'tqdm>=4.37.0,<5',
@@ -25,6 +26,7 @@
2526
'tabulate>=0.8.5,<0.9',
2627
'colored>=1.4.3,<2',
2728
'Pygments>=2.7.4,<3',
29+
'PyYAML>=5,<6',
2830

2931
]
3032

tests/conftest.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
from pathlib import Path
44

55
import pytest
6+
from grafana_api.grafana_api import GrafanaClientError
7+
8+
from grafana_wtf.core import GrafanaSearch
69

710

811
def clean_environment():
@@ -37,4 +40,19 @@ def docker_grafana(docker_services):
3740
return url
3841

3942

43+
@pytest.fixture
44+
def create_datasource(docker_grafana):
45+
# https://docs.pytest.org/en/4.6.x/fixture.html#factories-as-fixtures
46+
def _create_datasource(name: str, type: str, access: str):
47+
grafana = GrafanaSearch.grafana_client_factory(docker_grafana)
48+
# TODO: Add fixture which completely resets everything in Grafana before running the test harness.
49+
# Move to a different port than 3000 then!
50+
try:
51+
grafana.datasource.create_datasource(dict(name=name, type=type, access=access))
52+
except GrafanaClientError as ex:
53+
if "Client Error 409: data source with the same name already exists" not in str(ex):
54+
raise
55+
return _create_datasource
56+
57+
4058
clean_environment()

tests/test_commands.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import docopt
88
import pytest
9+
import yaml
910

1011
import grafana_wtf.commands
1112

@@ -173,3 +174,27 @@ def test_log_tabular_success(docker_grafana, capsys, caplog):
173174
first_item_raw = str.splitlines(captured.out)[-1]
174175
first_item_normalized = re.sub("(.*)Date: .+|(.*)", r"\1Date: xxxx-xx-xxTxx:xx:xxZ |\2", first_item_raw, 1)
175176
assert first_item_normalized == reference
177+
178+
179+
def test_datasource_breakdown(docker_grafana, create_datasource, capsys, caplog):
180+
181+
# Create a datasource, which is not used by any dashboard.
182+
create_datasource(name="foo", type="foo", access="foo")
183+
create_datasource(name="bar", type="bar", access="bar")
184+
185+
# Compute breakdown.
186+
set_command("datasource-breakdown", "--format=yaml")
187+
188+
# Proof the output is correct.
189+
with caplog.at_level(logging.DEBUG):
190+
grafana_wtf.commands.run()
191+
assert "Found 2 unused data source(s)" in caplog.messages
192+
193+
captured = capsys.readouterr()
194+
data = yaml.load(captured.out)
195+
196+
assert len(data["used"]) >= 1
197+
assert len(data["unused"]) >= 2
198+
199+
assert data["unused"][0]["datasource"]["name"] == "bar"
200+
assert data["unused"][1]["datasource"]["name"] == "foo"

0 commit comments

Comments
 (0)