|
| 1 | +import os.path |
| 2 | +from tempfile import NamedTemporaryFile |
| 3 | +from typing import Callable |
| 4 | + |
| 5 | +import yaml |
1 | 6 | from prometheus_client.samples import Sample
|
2 | 7 | from pymemcache import Client
|
| 8 | +from testcontainers.core.container import DockerContainer |
| 9 | +from testcontainers.core.waiting_utils import wait_for_logs |
3 | 10 |
|
4 | 11 | from . import dfly_args
|
5 | 12 | from .instance import DflyInstance
|
@@ -250,3 +257,66 @@ async def test_latency_stats_disabled(async_client: aioredis.Redis):
|
250 | 257 | for _ in range(100):
|
251 | 258 | await async_client.set("foo", "bar")
|
252 | 259 | assert await async_client.info("LATENCYSTATS") == {}
|
| 260 | + |
| 261 | + |
| 262 | +async def test_metrics_sanity_check(df_server: DflyInstance): |
| 263 | + |
| 264 | + def on_container_output(container: DockerContainer, fn: Callable): |
| 265 | + for entry in container.get_logs(): |
| 266 | + for row in entry.decode("utf-8").split("\n"): |
| 267 | + fn(row) |
| 268 | + |
| 269 | + def extract_msg(s: str): |
| 270 | + return re.search("""msg="([^"]*)""", s).group(1) |
| 271 | + |
| 272 | + def assert_no_error(entry: str): |
| 273 | + assert "level=ERROR" not in entry and "level=WARN" not in entry, extract_msg(entry) |
| 274 | + |
| 275 | + # Piggyback on the first known mounted path if running in CI, the container running the test will start another |
| 276 | + # container with prometheus. The prometheus container needs the file present on the host to be able to mount it. |
| 277 | + # Fall back to /tmp so the test can be run on the local machine without using root. |
| 278 | + parent = next((p for p in ("/var/crash", "/mnt", "/tmp") if os.access(p, os.W_OK)), None) |
| 279 | + |
| 280 | + # TODO use python-docker api to find a valid mounted volume instead of hardcoded list |
| 281 | + assert parent is not None, "Could not find a path to write prometheus config" |
| 282 | + with NamedTemporaryFile("w", dir=parent) as f: |
| 283 | + prometheus_config = { |
| 284 | + "scrape_configs": [ |
| 285 | + { |
| 286 | + "job_name": "dfly", |
| 287 | + "scrape_interval": "1s", |
| 288 | + "static_configs": [{"targets": [f"host.docker.internal:{df_server.port}"]}], |
| 289 | + } |
| 290 | + ] |
| 291 | + } |
| 292 | + prometheus_config_path = "/etc/prometheus/prometheus.yml" |
| 293 | + |
| 294 | + logging.info(f"Starting prometheus with file {f.name}:\n{yaml.dump(prometheus_config)}") |
| 295 | + |
| 296 | + yaml.dump(prometheus_config, f) |
| 297 | + path = os.path.abspath(f.name) |
| 298 | + os.chmod(path, 0o644) |
| 299 | + |
| 300 | + with ( |
| 301 | + DockerContainer(image="prom/prometheus") |
| 302 | + .with_volume_mapping(path, prometheus_config_path) |
| 303 | + .with_kwargs(extra_hosts={"host.docker.internal": "host-gateway"}) |
| 304 | + ) as prometheus: |
| 305 | + try: |
| 306 | + wait_for_logs(prometheus, "Server is ready to receive web requests.", timeout=5) |
| 307 | + |
| 308 | + # Wait for a few seconds for any potential warnings or errors to appear, it can take several seconds. |
| 309 | + wait_for_errors_sec, sleep_time_sec = 10, 0.5 |
| 310 | + start = time.monotonic() |
| 311 | + while time.monotonic() < start + wait_for_errors_sec: |
| 312 | + on_container_output(prometheus, assert_no_error) |
| 313 | + await asyncio.sleep(sleep_time_sec) |
| 314 | + except AssertionError: |
| 315 | + # For assertion errors which we raise, skip printing full prometheus logs |
| 316 | + raise |
| 317 | + except Exception as e: |
| 318 | + # For any other error such as timeout when starting the container, print all container logs |
| 319 | + logging.error(f"failed to start prometheus: {e}") |
| 320 | + on_container_output( |
| 321 | + prometheus, lambda entry: logging.info(f"prometheus log: {entry}") |
| 322 | + ) |
0 commit comments