Skip to content

Commit 53c678a

Browse files
authored
tests: Add test for prometheus metrics sanity check (#5843)
Signed-off-by: Abhijat Malviya <[email protected]>
1 parent c45a22d commit 53c678a

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

tests/dragonfly/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ pytest-timeout==2.2.0
2727
asyncio==3.4.3
2828
fakeredis[json]==2.26.2
2929
hiredis==2.4.0
30+
PyYAML>=6.0
31+
testcontainers>=3.7.1

tests/dragonfly/server_family_test.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1+
import os.path
2+
from tempfile import NamedTemporaryFile
3+
from typing import Callable
4+
5+
import yaml
16
from prometheus_client.samples import Sample
27
from pymemcache import Client
8+
from testcontainers.core.container import DockerContainer
9+
from testcontainers.core.waiting_utils import wait_for_logs
310

411
from . import dfly_args
512
from .instance import DflyInstance
@@ -250,3 +257,66 @@ async def test_latency_stats_disabled(async_client: aioredis.Redis):
250257
for _ in range(100):
251258
await async_client.set("foo", "bar")
252259
assert await async_client.info("LATENCYSTATS") == {}
260+
261+
262+
async def test_metrics_sanity_check(df_server: DflyInstance):
263+
264+
def on_container_output(container: DockerContainer, fn: Callable):
265+
for entry in container.get_logs():
266+
for row in entry.decode("utf-8").split("\n"):
267+
fn(row)
268+
269+
def extract_msg(s: str):
270+
return re.search("""msg="([^"]*)""", s).group(1)
271+
272+
def assert_no_error(entry: str):
273+
assert "level=ERROR" not in entry and "level=WARN" not in entry, extract_msg(entry)
274+
275+
# Piggyback on the first known mounted path if running in CI, the container running the test will start another
276+
# container with prometheus. The prometheus container needs the file present on the host to be able to mount it.
277+
# Fall back to /tmp so the test can be run on the local machine without using root.
278+
parent = next((p for p in ("/var/crash", "/mnt", "/tmp") if os.access(p, os.W_OK)), None)
279+
280+
# TODO use python-docker api to find a valid mounted volume instead of hardcoded list
281+
assert parent is not None, "Could not find a path to write prometheus config"
282+
with NamedTemporaryFile("w", dir=parent) as f:
283+
prometheus_config = {
284+
"scrape_configs": [
285+
{
286+
"job_name": "dfly",
287+
"scrape_interval": "1s",
288+
"static_configs": [{"targets": [f"host.docker.internal:{df_server.port}"]}],
289+
}
290+
]
291+
}
292+
prometheus_config_path = "/etc/prometheus/prometheus.yml"
293+
294+
logging.info(f"Starting prometheus with file {f.name}:\n{yaml.dump(prometheus_config)}")
295+
296+
yaml.dump(prometheus_config, f)
297+
path = os.path.abspath(f.name)
298+
os.chmod(path, 0o644)
299+
300+
with (
301+
DockerContainer(image="prom/prometheus")
302+
.with_volume_mapping(path, prometheus_config_path)
303+
.with_kwargs(extra_hosts={"host.docker.internal": "host-gateway"})
304+
) as prometheus:
305+
try:
306+
wait_for_logs(prometheus, "Server is ready to receive web requests.", timeout=5)
307+
308+
# Wait for a few seconds for any potential warnings or errors to appear, it can take several seconds.
309+
wait_for_errors_sec, sleep_time_sec = 10, 0.5
310+
start = time.monotonic()
311+
while time.monotonic() < start + wait_for_errors_sec:
312+
on_container_output(prometheus, assert_no_error)
313+
await asyncio.sleep(sleep_time_sec)
314+
except AssertionError:
315+
# For assertion errors which we raise, skip printing full prometheus logs
316+
raise
317+
except Exception as e:
318+
# For any other error such as timeout when starting the container, print all container logs
319+
logging.error(f"failed to start prometheus: {e}")
320+
on_container_output(
321+
prometheus, lambda entry: logging.info(f"prometheus log: {entry}")
322+
)

0 commit comments

Comments
 (0)