Skip to content

Commit 8b3f0b3

Browse files
committed
feat(backend): add database connection throttling
Also add metrics related to in use connection. First attempt was to use metric callback so that we grab the actual usage but was hard since we require access to the request context that has the DB driver object referenced. A simpler version is to keep track of the last usage value seen and update the metric accordingly. Note that the max_concurrent_queries setting should match the query thread pool on the server side (defaults to 400) and be distributed among server workers. Signed-off-by: Fatih Acar <[email protected]>
1 parent 942e048 commit 8b3f0b3

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

backend/infrahub/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,12 @@ class DatabaseSettings(BaseSettings):
249249
retry_limit: int = Field(
250250
default=3, description="Maximum number of times a transient issue in a transaction should be retried."
251251
)
252+
max_concurrent_queries: int = Field(
253+
default=0, ge=0, description="Maximum number of concurrent queries that can run (0 means unlimited)."
254+
)
255+
max_concurrent_queries_delay: float = Field(
256+
default=0.01, ge=0, description="Delay to add when max_concurrent_queries is reached."
257+
)
252258

253259
@property
254260
def database_name(self) -> str:

backend/infrahub/database/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
from .constants import DatabaseType, Neo4jRuntime
3636
from .memgraph import DatabaseManagerMemgraph
37-
from .metrics import QUERY_EXECUTION_METRICS, TRANSACTION_RETRIES
37+
from .metrics import CONNECTION_POOL_USAGE, QUERY_EXECUTION_METRICS, TRANSACTION_RETRIES
3838
from .neo4j import DatabaseManagerNeo4j
3939

4040
if TYPE_CHECKING:
@@ -335,6 +335,14 @@ async def execute_query_with_metadata(
335335
context: dict[str, str] | None = None,
336336
type: QueryType | None = None, # pylint: disable=redefined-builtin
337337
) -> tuple[list[Record], dict[str, Any]]:
338+
connpool_usage = self._driver._pool.in_use_connection_count(self._driver._pool.address)
339+
CONNECTION_POOL_USAGE.labels(self._driver._pool.address).set(float(connpool_usage))
340+
341+
if config.SETTINGS.database.max_concurrent_queries:
342+
while connpool_usage > config.SETTINGS.database.max_concurrent_queries: # noqa: ASYNC110
343+
await asyncio.sleep(config.SETTINGS.database.max_concurrent_queries_delay)
344+
connpool_usage = self._driver._pool.in_use_connection_count(self._driver._pool.address)
345+
338346
with trace.get_tracer(__name__).start_as_current_span("execute_db_query_with_metadata") as span:
339347
span.set_attribute("query", query)
340348
if name:

backend/infrahub/database/metrics.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from prometheus_client import Counter, Histogram
3+
from prometheus_client import Counter, Gauge, Histogram
44

55
METRIC_PREFIX = "infrahub_db"
66

@@ -16,3 +16,9 @@
1616
"Number of transaction that have been retried due to transcient error",
1717
labelnames=["name"],
1818
)
19+
20+
CONNECTION_POOL_USAGE = Gauge(
21+
f"{METRIC_PREFIX}_last_connection_pool_usage",
22+
"Number of last known active connections in the pool",
23+
labelnames=["address"],
24+
)

0 commit comments

Comments
 (0)