WIP: Finally set up integration tests properly

phoevos · phoevos · commit 88c1d5735976 · 2025-01-21T17:23:12.000Z
Signed-off-by: Phoevos Kalemkeris &lt;phoevos.kalemkeris@ucl.ac.uk&gt;
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ select = ["E", "F", "I", "UP"]
 [tool.pytest.ini_options]
 addopts = "-ra"
 pythonpath = ["."]
+testpaths = ["tests"]
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,37 @@
+import logging
+
+import pytest
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--skip-cleanup-cms",
+        action="store_true",
+        default=False,
+        help="Skip cleanup for the CMS resources after completing the tests.",
+    )
+
+
+@pytest.fixture(scope="module")
+def cleanup_cms(request: pytest.FixtureRequest) -> bool:
+    return not request.config.getoption("--skip-cleanup-cms")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def setup_logging() -> None:
+    # Suppress logging from testcontainers
+    for logger_name in logging.root.manager.loggerDict:
+        if logger_name.startswith("testcontainers"):
+            logging.getLogger(logger_name).setLevel(logging.WARNING)
+
+    parent_logger = logging.getLogger("cmg.tests")
+    parent_logger.setLevel(logging.DEBUG)
+
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.DEBUG)
+    handler.setFormatter(logging.Formatter("%(levelname)s:%(message)s"))
+    parent_logger.addHandler(handler)
+
+    # Configure child loggers
+    logging.getLogger("cmg.tests.integration").setLevel(logging.INFO)
+    logging.getLogger("cmg.tests.unit").setLevel(logging.INFO)
diff --git a/tests/integration/assets/cms.env b/tests/integration/assets/cms.env
@@ -0,0 +1,12 @@
+COMPOSE_PROJECT_NAME=cmg-test
+
+MLFLOW_DB_USERNAME=admin
+MLFLOW_DB_PASSWORD=admin
+AWS_ACCESS_KEY_ID=admin
+AWS_SECRET_ACCESS_KEY=admin123
+
+GRAFANA_ADMIN_USER=admin
+GRAFANA_ADMIN_PASSWORD=admin
+
+GRAYLOG_PASSWORD_SECRET=admin
+GRAYLOG_ROOT_PASSWORD_SHA2=admin
diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py
@@ -1,48 +1,38 @@
+import json
+
 import pytest
+import requests
 from fastapi.testclient import TestClient
-from testcontainers.minio import MinioContainer
-from testcontainers.postgres import PostgresContainer
-from testcontainers.rabbitmq import RabbitMqContainer
 
+from cogstack_model_gateway.common.config import Config, load_config
+from cogstack_model_gateway.common.object_store import ObjectStoreManager
+from cogstack_model_gateway.common.queue import QueueManager
+from cogstack_model_gateway.common.tasks import Status, TaskManager
 from cogstack_model_gateway.gateway.main import app
 from tests.integration.utils import (
-    clone_cogstack_model_serve,
+    TEST_MODEL_SERVICE,
     configure_environment,
-    remove_cogstack_model_serve,
-    remove_testcontainers,
-    start_cogstack_model_serve,
-    start_scheduler,
-    start_testcontainers,
-    stop_cogstack_model_serve,
-    stop_scheduler,
+    setup_cms,
+    setup_scheduler,
+    setup_testcontainers,
 )
 
-POSTGRES_IMAGE = "postgres:17.2"
-RABBITMQ_IMAGE = "rabbitmq:4.0.4-management-alpine"
-MINIO_IMAGE = "minio/minio:RELEASE.2024-11-07T00-52-20Z"
-
 
 @pytest.fixture(scope="module", autouse=True)
-def setup(request):
-    postgres = PostgresContainer(POSTGRES_IMAGE)
-    rabbitmq = RabbitMqContainer(RABBITMQ_IMAGE)
-    minio = MinioContainer(MINIO_IMAGE)
-
-    containers = [postgres, rabbitmq, minio]
-    request.addfinalizer(lambda: remove_testcontainers(containers))
+def setup(request: pytest.FixtureRequest, cleanup_cms: bool):
+    postgres, rabbitmq, minio = setup_testcontainers(request)
 
-    start_testcontainers(containers)
+    svc_addr_map = setup_cms(request, cleanup_cms)
+    request.config.cache.set("TEST_MODEL_SERVICE_IP", svc_addr_map[TEST_MODEL_SERVICE]["address"])
 
-    configure_environment(postgres, rabbitmq, minio)
+    mlflow_addr = svc_addr_map["mlflow-ui"]["address"]
+    mlflow_port = svc_addr_map["mlflow-ui"]["port"]
+    env = {
+        "MLFLOW_TRACKING_URI": f"http://{mlflow_addr}:{mlflow_port}",
+    }
+    configure_environment(postgres, rabbitmq, minio, extras=env)
 
-    scheduler_process = start_scheduler()
-    request.addfinalizer(lambda: stop_scheduler(scheduler_process))
-
-    clone_cogstack_model_serve()
-    request.addfinalizer(remove_cogstack_model_serve)
-
-    cms_compose_envs = start_cogstack_model_serve()
-    request.addfinalizer(lambda: stop_cogstack_model_serve(cms_compose_envs))
+    setup_scheduler(request)
 
 
 @pytest.fixture(scope="module")
@@ -51,7 +41,165 @@ def client():
         yield client
 
 
+@pytest.fixture(scope="module")
+def config(client: TestClient) -> Config:
+    return load_config()
+
+
+@pytest.fixture(scope="module")
+def test_model_service_ip(request: pytest.FixtureRequest) -> str:
+    return request.config.cache.get("TEST_MODEL_SERVICE_IP", None)
+
+
+def test_config_loaded(config: Config):
+    assert config
+    assert all(
+        key in config
+        for key in [
+            "database_manager",
+            "task_object_store_manager",
+            "results_object_store_manager",
+            "queue_manager",
+            "task_manager",
+        ]
+    )
+
+
 def test_root(client: TestClient):
     response = client.get("/")
     assert response.status_code == 200
     assert response.json() == {"message": "Enter the cult... I mean, the API."}
+
+
+def test_get_tasks(client: TestClient):
+    response = client.get("/tasks/")
+    assert response.status_code == 403
+    assert response.json() == {"detail": "Only admins can list tasks"}
+
+
+def test_get_task_by_uuid(client: TestClient, config: Config):
+    task_uuid = "nonexistent-uuid"
+    response = client.get(f"/tasks/{task_uuid}")
+    assert response.status_code == 404
+    assert response.json() == {"detail": f"Task '{task_uuid}' not found"}
+
+    tm: TaskManager = config.task_manager
+    task_uuid = tm.create_task(status="pending")
+    response = client.get(f"/tasks/{task_uuid}")
+    assert response.status_code == 200
+    assert response.json() == {"uuid": task_uuid, "status": "pending"}
+
+    tm.update_task(task_uuid, status="succeeded", result="result.txt", error_message=None)
+    response = client.get(f"/tasks/{task_uuid}", params={"detail": True})
+    assert response.status_code == 200
+    assert response.json() == {
+        "uuid": task_uuid,
+        "status": "succeeded",
+        "result": "result.txt",
+        "error_message": None,
+        "tracking_id": None,
+    }
+
+
+def test_get_models(client: TestClient):
+    response = client.get("/models/")
+    assert response.status_code == 200
+
+    response_json = response.json()
+    assert isinstance(response_json, list)
+    assert len(response_json) == 1
+    assert all(key in response_json[0] for key in ["name", "uri"])
+    assert response_json[0]["name"] == TEST_MODEL_SERVICE
+
+
+def test_get_model_info(client: TestClient, test_model_service_ip: str):
+    response = client.get(f"/models/{test_model_service_ip}/info")
+    assert response.status_code == 200
+    assert all(
+        key in response.json()
+        for key in ["api_version", "model_type", "model_description", "model_card"]
+    )
+
+
+def test_unsupported_task(client: TestClient, test_model_service_ip: str):
+    response = client.post(
+        f"/models/{test_model_service_ip}/unsupported-task",
+        headers={"Content-Type": "dummy"},
+    )
+    assert response.status_code == 404
+    assert "Task 'unsupported-task' not found. Supported tasks are:" in response.json()["detail"]
+
+
+def test_process(client: TestClient, config: Config, test_model_service_ip: str):
+    response = client.post(
+        f"/models/{test_model_service_ip}/process",
+        data="Spinal stenosis",
+        headers={"Content-Type": "text/plain"},
+    )
+    assert response.status_code == 200
+    response_json = response.json()
+    assert all(key in response_json for key in ["uuid", "status"])
+
+    task_uuid = response_json["uuid"]
+    tm: TaskManager = config.task_manager
+    assert tm.get_task(task_uuid), "Failed to submit task: not found in the database"
+
+    # Wait for the task to complete
+    while (task := tm.get_task(task_uuid)).status != Status.SUCCEEDED:
+        pass
+
+    # Verify that the task payload was stored in the object store
+    task_payload_key = f"{task_uuid}_payload.txt"
+    tom: ObjectStoreManager = config.task_object_store_manager
+    payload = tom.get_object(task_payload_key)
+    assert payload == b"Spinal stenosis"
+
+    # Verify that the queue is empty after the task is processed
+    qm: QueueManager = config.queue_manager
+    assert qm.is_queue_empty()
+
+    # Verify task results
+    assert task.error_message is None, f"Task failed unexpectedly: {task.error_message}"
+    assert task.result is not None, "Task results are missing"
+
+    rom: ObjectStoreManager = config.results_object_store_manager
+    result = rom.get_object(task.result)
+
+    try:
+        result_json = json.loads(result.decode("utf-8"))
+    except json.JSONDecodeError as e:
+        pytest.fail(f"Failed to parse the result as JSON: {result}, {e}")
+
+    assert result_json["text"] == "Spinal stenosis"
+    assert len(result_json["annotations"]) == 1
+
+    annotation = result_json["annotations"][0]
+    assert all(
+        key in annotation
+        for key in [
+            "start",
+            "end",
+            "label_name",
+            "label_id",
+            "categories",
+            "accuracy",
+            "meta_anns",
+            "athena_ids",
+        ]
+    )
+    assert annotation["label_name"] == "Spinal Stenosis"
+
+    # Verify that the above match the information exposed through the user-facing API
+    get_response = client.get(f"/tasks/{task_uuid}", params={"detail": True, "download_url": True})
+    assert get_response.status_code == 200
+
+    get_response_json = get_response.json()
+    assert get_response_json["uuid"] == task.uuid
+    assert get_response_json["status"] == task.status
+    assert get_response_json["error_message"] is None
+    assert get_response_json["tracking_id"] is None
+
+    # Download results and verify they match the ones read from the object store
+    download_results = requests.get(get_response_json["result"])
+    assert download_results.status_code == 200
+    assert download_results.content == result
diff --git a/tests/integration/utils.py b/tests/integration/utils.py