Skip to content

Commit ba81881

Browse files
Prometheus integration (#198)
* Initial prometheus integration * datasource_head_updated * Callback time * more duration metrics * sync_duration (why?) * Fix immediate stopping after oneshot execution * metrics.index_realtime_queue_size * total, synched, realtime * Some renamings * docs * Cleanup * Cleanup * More metrics * Fixes * Fix index counter * min/max/avg for durations * averaged durations everywhere * Refactoring * Count rollbacks * http_errors * index_hit_ratio, refactoring * Total matches * Useful options * Opt-in prometheus (dirty) * Same for bigmap indexes * Require host field * typo * Build images, cooler pyproject injectr * Changelog * Cleanup * indexes_total * Review, bump * Do not MA metrics
1 parent f3cd293 commit ba81881

File tree

12 files changed

+351
-115
lines changed

12 files changed

+351
-115
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ jobs:
4646
cache-from: type=gha
4747
cache-to: type=gha,mode=max
4848
tags: ${{ steps.meta.outputs.tags }}
49-
labels: ${{ steps.meta.outputs.labels }}
49+
labels: ${{ steps.meta.outputs.labels }}

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Please use [this](https://docs.gitlab.com/ee/development/changelog.html) documen
77
### Added
88

99
* metadata: Added `metadata_interface` feature flag to expose metadata in TzKT format.
10+
* prometheus: Added ability to expose Prometheus metrics.
1011
* tzkt: Added missing fields to the `HeadBlockData` model.
1112
* tzkt: Added `iter_...` methods to iterate over item batches.
1213

Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ WORKDIR /home/dipdup/source
1919
RUN poetry config virtualenvs.create false
2020
RUN make install DEV=0 PLUGINS="${PLUGINS}"
2121

22-
COPY --chown=dipdup inject_pyproject.sh /home/dipdup/inject_pyproject
23-
RUN chmod +x /home/dipdup/inject_pyproject
24-
RUN echo 'dipdup ALL = NOPASSWD: /home/dipdup/inject_pyproject' >> /etc/sudoers
22+
COPY --chown=dipdup inject_pyproject.sh /usr/bin/inject_pyproject.sh
23+
RUN echo 'sudo /usr/bin/inject_pyproject.sh' >> /usr/bin/inject_pyproject
24+
RUN echo 'dipdup ALL = NOPASSWD: /usr/bin/inject_pyproject.sh' >> /etc/sudoers
25+
RUN chmod +x /usr/bin/inject_pyproject.sh
26+
RUN chmod +x /usr/bin/inject_pyproject
2527

2628
COPY --chown=dipdup src /home/dipdup/source/src
2729

poetry.lock

Lines changed: 68 additions & 60 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ sqlparse = "^0.4.1"
4242
tabulate = "^0.8.9"
4343
tortoise-orm = "0.17.8"
4444
typing-inspect = "^0.6.0"
45+
prometheus-client = "^0.12.0"
4546

4647
[tool.poetry.dev-dependencies]
4748
black = "^22.1.0"

src/dipdup/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,13 @@ class SentryConfig:
906906
debug: bool = False
907907

908908

909+
@dataclass
910+
class PrometheusConfig:
911+
host: str
912+
port: int = 8000
913+
update_interval: float = 1.0
914+
915+
909916
@dataclass
910917
class HookConfig(CallbackMixin, kind='hook'):
911918
args: Dict[str, str] = field(default_factory=dict)
@@ -989,6 +996,7 @@ class DipDupConfig:
989996
hooks: Dict[str, HookConfig] = field(default_factory=dict)
990997
hasura: Optional[HasuraConfig] = None
991998
sentry: Optional[SentryConfig] = None
999+
prometheus: Optional[PrometheusConfig] = None
9921000
advanced: AdvancedConfig = AdvancedConfig()
9931001

9941002
def __post_init_post_parse__(self):

src/dipdup/context.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import logging
22
import os
33
import sys
4-
import time
54
from collections import deque
65
from contextlib import AsyncExitStack
6+
from contextlib import ExitStack
77
from contextlib import contextmanager
88
from contextlib import suppress
99
from os.path import exists
@@ -59,6 +59,7 @@
5959
from dipdup.models import ReindexingReason
6060
from dipdup.models import Schema
6161
from dipdup.models import TokenMetadata
62+
from dipdup.prometheus import Metrics
6263
from dipdup.utils import FormattedLogger
6364
from dipdup.utils import slowdown
6465
from dipdup.utils.database import execute_sql_scripts
@@ -455,11 +456,10 @@ async def execute_sql(self, ctx: 'DipDupContext', name: str) -> None:
455456
@contextmanager
456457
def _callback_wrapper(self, kind: str, name: str) -> Iterator[None]:
457458
try:
458-
start = time.perf_counter()
459-
yield
460-
diff = time.perf_counter() - start
461-
level = self._logger.warning if diff > 1 else self._logger.debug
462-
level('`%s` %s callback executed in %s seconds', name, kind, diff)
459+
with ExitStack() as stack:
460+
if Metrics.enabled:
461+
stack.enter_context(Metrics.measure_callback_duration(name))
462+
yield
463463
except Exception as e:
464464
if isinstance(e, ReindexingRequiredError):
465465
raise

src/dipdup/dipdup.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from typing import cast
2020

2121
from apscheduler.events import EVENT_JOB_ERROR # type: ignore
22+
from prometheus_client import start_http_server # type: ignore
2223
from tortoise.exceptions import OperationalError
2324
from tortoise.transactions import get_connection
2425

@@ -46,9 +47,7 @@
4647
from dipdup.index import HeadIndex
4748
from dipdup.index import Index
4849
from dipdup.index import OperationIndex
49-
from dipdup.index import block_cache
5050
from dipdup.index import extract_operation_subgroups
51-
from dipdup.index import head_cache
5251
from dipdup.models import BigMapData
5352
from dipdup.models import Contract
5453
from dipdup.models import Head
@@ -57,6 +56,7 @@
5756
from dipdup.models import IndexStatus
5857
from dipdup.models import OperationData
5958
from dipdup.models import Schema
59+
from dipdup.prometheus import Metrics
6060
from dipdup.scheduler import add_job
6161
from dipdup.scheduler import create_scheduler
6262
from dipdup.utils import slowdown
@@ -74,18 +74,18 @@ def __init__(self, ctx: DipDupContext) -> None:
7474
self._logger = logging.getLogger('dipdup')
7575
self._indexes: Dict[str, Index] = {}
7676
self._contracts: Set[ContractConfig] = set()
77-
self._stopped: bool = False
7877
self._tasks: Deque[asyncio.Task] = deque()
7978

8079
self._entrypoint_filter: Set[Optional[str]] = set()
8180
self._address_filter: Set[str] = set()
8281

83-
async def run(
84-
self,
85-
spawn_datasources_event: Event,
86-
start_scheduler_event: Event,
87-
early_realtime: bool = False,
88-
) -> None:
82+
async def run(self, spawn_datasources_event: Event, start_scheduler_event: Event, early_realtime: bool = False) -> None:
83+
tasks = [self._run(spawn_datasources_event, start_scheduler_event, early_realtime)]
84+
if self._ctx.config.prometheus:
85+
tasks.append(self._update_metrics(self._ctx.config.prometheus.update_interval))
86+
await gather(*tasks)
87+
88+
async def _run(self, spawn_datasources_event: Event, start_scheduler_event: Event, early_realtime: bool = False) -> None:
8989
self._logger.info('Starting index dispatcher')
9090
await self._subscribe_to_datasource_events()
9191
await self._load_index_states()
@@ -96,7 +96,7 @@ async def run(
9696
if isinstance(index, OperationIndex):
9797
self._apply_filters(index._config)
9898

99-
while not self._stopped:
99+
while True:
100100
if not spawn_datasources_event.is_set():
101101
if self._every_index_is(IndexStatus.REALTIME) or early_realtime:
102102
spawn_datasources_event.set()
@@ -123,7 +123,7 @@ async def run(
123123
self._apply_filters(index._config)
124124

125125
if not indexes_spawned and self._every_index_is(IndexStatus.ONESHOT):
126-
self.stop()
126+
break
127127

128128
if self._every_index_is(IndexStatus.REALTIME) and not indexes_spawned:
129129
if not on_synchronized_fired:
@@ -136,8 +136,19 @@ async def run(
136136
else:
137137
on_synchronized_fired = False
138138

139-
def stop(self) -> None:
140-
self._stopped = True
139+
async def _update_metrics(self, update_interval: float) -> None:
140+
while True:
141+
await asyncio.sleep(update_interval)
142+
143+
active, synced, realtime = 0, 0, 0
144+
for index in tuple(self._indexes.values()) + tuple(pending_indexes):
145+
active += 1
146+
if index.synchronized:
147+
synced += 1
148+
if index.realtime:
149+
realtime += 1
150+
151+
Metrics.set_indexes_count(active, synced, realtime)
141152

142153
def _apply_filters(self, index_config: OperationIndexConfig) -> None:
143154
self._address_filter.update(index_config.address_filter)
@@ -215,10 +226,6 @@ async def _process(index_state: IndexState) -> None:
215226
tasks = (create_task(_process(index_state)) for index_state in await IndexState.all())
216227
await gather(*tasks)
217228

218-
# NOTE: Cached blocks used only on index state init
219-
block_cache.clear()
220-
head_cache.clear()
221-
222229
async def _on_head(self, datasource: TzktDatasource, head: HeadBlockData) -> None:
223230
# NOTE: Do not await query results - blocked database connection may cause Websocket timeout.
224231
self._tasks.append(
@@ -233,6 +240,8 @@ async def _on_head(self, datasource: TzktDatasource, head: HeadBlockData) -> Non
233240
),
234241
)
235242
)
243+
if Metrics.enabled:
244+
Metrics.set_datasource_head_updated(datasource.name)
236245
for index in self._indexes.values():
237246
if isinstance(index, HeadIndex) and index.datasource == datasource:
238247
index.push_head(head)
@@ -261,6 +270,8 @@ async def _on_big_maps(self, datasource: TzktDatasource, big_maps: Tuple[BigMapD
261270
async def _on_rollback(self, datasource: TzktDatasource, from_level: int, to_level: int) -> None:
262271
"""Perform a single level rollback when possible, otherwise call `on_rollback` hook"""
263272
self._logger.warning('Datasource `%s` rolled back: %s -> %s', datasource.name, from_level, to_level)
273+
if Metrics.enabled:
274+
Metrics.set_datasource_rollback(datasource.name)
264275

265276
# NOTE: Zero difference between levels means we received no operations/big_maps on this level and thus channel level hasn't changed
266277
zero_level_rollback = from_level - to_level == 0
@@ -341,6 +352,7 @@ async def run(self) -> None:
341352
await self._set_up_database(stack)
342353
await self._set_up_datasources(stack)
343354
await self._set_up_hooks(tasks)
355+
await self._set_up_prometheus()
344356

345357
await self._initialize_schema()
346358
await self._initialize_datasources()
@@ -446,6 +458,11 @@ async def _set_up_hooks(self, tasks: Optional[Set[Task]] = None) -> None:
446458
if tasks:
447459
tasks.add(create_task(self._ctx.callbacks.run()))
448460

461+
async def _set_up_prometheus(self) -> None:
462+
if self._config.prometheus:
463+
Metrics.enabled = True
464+
start_http_server(self._config.prometheus.port, self._config.prometheus.host)
465+
449466
async def _set_up_hasura(self, stack: AsyncExitStack) -> None:
450467
if not self._config.hasura:
451468
return

src/dipdup/http.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from fcache.cache import FileCache # type: ignore
2020

2121
from dipdup import __version__
22-
from dipdup.config import HTTPConfig
22+
from dipdup.config import HTTPConfig # type: ignore
23+
from dipdup.prometheus import Metrics
2324

2425
safe_exceptions = (
2526
aiohttp.ClientConnectionError,
@@ -113,6 +114,7 @@ def _session(self) -> aiohttp.ClientSession:
113114
raise RuntimeError('aiohttp session is closed')
114115
return self.__session
115116

117+
# TODO: Move to separate method to cover SignalR negotiations too
116118
async def _retry_request(self, method: str, url: str, weight: int = 1, **kwargs):
117119
"""Retry a request in case of failure sleeping according to config"""
118120
attempt = 1
@@ -134,13 +136,20 @@ async def _retry_request(self, method: str, url: str, weight: int = 1, **kwargs)
134136
raise e
135137

136138
ratelimit_sleep: Optional[float] = None
137-
if isinstance(e, aiohttp.ClientResponseError) and e.status == HTTPStatus.TOO_MANY_REQUESTS:
138-
# NOTE: Sleep at least 5 seconds on ratelimit
139-
ratelimit_sleep = 5
140-
# TODO: Parse Retry-After in UTC date format
141-
with suppress(KeyError, ValueError):
142-
e.headers = cast(Mapping, e.headers)
143-
ratelimit_sleep = int(e.headers['Retry-After'])
139+
if isinstance(e, aiohttp.ClientResponseError):
140+
if Metrics.enabled:
141+
Metrics.set_http_error(self._url, e.status)
142+
143+
if e.status == HTTPStatus.TOO_MANY_REQUESTS:
144+
# NOTE: Sleep at least 5 seconds on ratelimit
145+
ratelimit_sleep = 5
146+
# TODO: Parse Retry-After in UTC date format
147+
with suppress(KeyError, ValueError):
148+
e.headers = cast(Mapping, e.headers)
149+
ratelimit_sleep = int(e.headers['Retry-After'])
150+
else:
151+
if Metrics.enabled:
152+
Metrics.set_http_error(self._url, 0)
144153

145154
self._logger.warning('HTTP request attempt %s/%s failed: %s', attempt, retry_count_str, e)
146155
self._logger.info('Waiting %s seconds before retry', ratelimit_sleep or retry_sleep)

0 commit comments

Comments
 (0)