HardMax71 · HardMax71 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/.github/workflows/backend-ci.yml b/.github/workflows/backend-ci.yml
@@ -106,7 +106,6 @@ jobs:
         run: |
           cd backend
           uv run pytest tests/integration -v -rs \
-            --ignore=tests/integration/k8s \
             --cov=app \
             --cov-report=xml --cov-report=term
 
@@ -190,7 +189,7 @@ jobs:
           K8S_NAMESPACE: integr8scode
         run: |
           cd backend
-          uv run pytest tests/integration/k8s -v -rs \
+          uv run pytest tests/e2e -v -rs \
             --cov=app \
             --cov-report=xml --cov-report=term
 

diff --git a/.github/workflows/frontend-ci.yml b/.github/workflows/frontend-ci.yml
@@ -23,7 +23,7 @@ jobs:
       - uses: actions/checkout@v6
 
       - name: Setup Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
         with:
           node-version: '22'
           cache: 'npm'
@@ -51,11 +51,30 @@ jobs:
     name: E2E Tests
     needs: unit
     runs-on: ubuntu-latest
+
+    # Local registry for buildx to reference base image (docker-container driver is isolated)
+    services:
+      registry:
+        image: registry:2
+        ports:
+          - 5000:5000
+
+    env:
+      MONGO_IMAGE: mongo:8.0
+      REDIS_IMAGE: redis:7-alpine
+      KAFKA_IMAGE: apache/kafka:3.9.0
+      SCHEMA_REGISTRY_IMAGE: confluentinc/cp-schema-registry:7.5.0
+
     steps:
       - uses: actions/checkout@v6
 
+      - name: Cache and load Docker images
+        uses: ./.github/actions/docker-cache
+        with:
+          images: ${{ env.MONGO_IMAGE }} ${{ env.REDIS_IMAGE }} ${{ env.KAFKA_IMAGE }} ${{ env.SCHEMA_REGISTRY_IMAGE }}
+
       - name: Setup Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
         with:
           node-version: '22'
           cache: 'npm'
@@ -71,6 +90,8 @@ jobs:
 
       - name: Setup Docker Buildx
         uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: network=host
 
       - name: Setup Kubernetes (k3s)
         run: |
@@ -88,9 +109,59 @@ jobs:
             /home/runner/.kube/config > backend/kubeconfig.yaml
           chmod 644 backend/kubeconfig.yaml
 
-      - name: Build and start full stack
+      # Build images with GitHub Actions cache for faster subsequent builds
+      # Base image pushed to local registry so buildx can reference it
+      - name: Build and push base image
+        uses: docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.base
+          push: true
+          tags: localhost:5000/integr8scode-base:latest
+          cache-from: type=gha,scope=backend-base
+          cache-to: type=gha,mode=max,scope=backend-base
+
+      # Pull base to Docker daemon (needed for docker-compose)
+      - name: Load base image to Docker daemon
+        run: |
+          docker pull localhost:5000/integr8scode-base:latest
+          docker tag localhost:5000/integr8scode-base:latest integr8scode-base:latest
+
+      - name: Build backend image
+        uses: docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          load: true
+          tags: integr8scode-backend:latest
+          build-contexts: |
+            base=docker-image://localhost:5000/integr8scode-base:latest
+          cache-from: type=gha,scope=backend
+          cache-to: type=gha,mode=max,scope=backend
+
+      - name: Build cert-generator image
+        uses: docker/build-push-action@v6
+        with:
+          context: ./cert-generator
+          file: ./cert-generator/Dockerfile
+          load: true
+          tags: integr8scode-cert-generator:latest
+          cache-from: type=gha,scope=cert-generator
+          cache-to: type=gha,mode=max,scope=cert-generator
+
+      - name: Build frontend image
+        uses: docker/build-push-action@v6
+        with:
+          context: ./frontend
+          file: ./frontend/Dockerfile
+          load: true
+          tags: integr8scode-frontend:latest
+          cache-from: type=gha,scope=frontend
+          cache-to: type=gha,mode=max,scope=frontend
+
+      - name: Start full stack
         run: |
-          docker compose -f docker-compose.ci.yaml --profile full up -d --build --wait --wait-timeout 300
+          docker compose -f docker-compose.ci.yaml --profile full up -d --wait --wait-timeout 300
           docker compose -f docker-compose.ci.yaml ps
 
       - name: Seed test users

diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -16,7 +16,7 @@ dependencies = [
     "attrs==25.3.0",
     "avro-python3==1.10.2",
     "backoff==2.2.1",
-    "blinker==1.8.2",
+    "blinker==1.9.0",
     "Brotli==1.2.0",
     "cachetools==6.2.0",
     "certifi==2024.8.30",
@@ -30,9 +30,9 @@ dependencies = [
     "dishka==1.6.0",
     "dnspython==2.7.0",
     "durationpy==0.9",
-    "email_validator==2.2.0",
+    "email-validator==2.3.0",
     "exceptiongroup==1.2.2",
-    "fastapi==0.124.0",
+    "fastapi==0.128.0",
     "fastavro==1.12.1",
     "fonttools==4.61.1",
     "frozenlist==1.7.0",
@@ -46,7 +46,7 @@ dependencies = [
     "httpx==0.28.1",
     "idna==3.10",
     "importlib-metadata==6.11.0",
-    "importlib_resources==6.4.5",
+    "importlib-resources==6.5.2",
     "itsdangerous==2.2.0",
     "Jinja2==3.1.6",
     "kiwisolver==1.4.9",
@@ -88,7 +88,7 @@ dependencies = [
     "pyasn1==0.6.1",
     "pyasn1_modules==0.4.2",
     "pydantic==2.9.2",
-    "pydantic-avro==0.7.1",
+    "pydantic-avro==0.9.1",
     "pydantic-settings==2.5.2",
     "pydantic_core==2.23.4",
     "Pygments==2.19.2",
@@ -194,6 +194,7 @@ python_classes = ["Test*"]
 python_functions = ["test_*"]
 markers = [
     "integration: marks tests as integration tests",
+    "e2e: marks tests as end-to-end tests requiring full system",
     "unit: marks tests as unit tests",
     "slow: marks tests as slow running",
     "kafka: marks tests as requiring Kafka",

diff --git a/backend/tests/integration/k8s/__init__.py → backend/tests/e2e/__init__.py b/backend/tests/integration/k8s/__init__.py → backend/tests/e2e/__init__.py
diff --git a/backend/tests/e2e/conftest.py b/backend/tests/e2e/conftest.py
@@ -0,0 +1,31 @@
+import pytest_asyncio
+import redis.asyncio as redis
+from beanie import init_beanie
+
+from app.core.database_context import Database
+from app.db.docs import ALL_DOCUMENTS
+
+
+@pytest_asyncio.fixture(autouse=True)
+async def _cleanup(db: Database, redis_client: redis.Redis):
+    """Clean DB and Redis before each E2E test.
+
+    Only pre-test cleanup - post-test cleanup causes event loop issues
+    when SSE/streaming tests hold connections across loop boundaries.
+
+    NOTE: With pytest-xdist, each worker uses a separate Redis database
+    (gw0→db0, gw1→db1, etc.), so flushdb() is safe and only affects
+    that worker's database. See tests/conftest.py for REDIS_DB setup.
+    """
+    collections = await db.list_collection_names()
+    for name in collections:
+        if not name.startswith("system."):
+            await db.drop_collection(name)
+
+    await redis_client.flushdb()
+
+    # Initialize Beanie with document models
+    await init_beanie(database=db, document_models=ALL_DOCUMENTS)
+
+    yield
+    # No post-test cleanup to avoid "Event loop is closed" errors
diff --git a/.../integration/k8s/test_execution_routes.py → backend/tests/e2e/test_execution_routes.py b/.../integration/k8s/test_execution_routes.py → backend/tests/e2e/test_execution_routes.py
@@ -13,8 +13,9 @@
     ResourceUsage
 )
 
+pytestmark = [pytest.mark.e2e, pytest.mark.k8s]
+
 
-@pytest.mark.k8s
 class TestExecution:
     """Test execution endpoints against real backend."""
 
@@ -104,13 +105,13 @@ async def test_get_execution_result(self, client: AsyncClient, test_user: Dict[s
         # Immediately fetch result - no waiting
         result_response = await client.get(f"/api/v1/result/{execution_id}")
         assert result_response.status_code == 200
-        
+
         result_data = result_response.json()
         execution_result = ExecutionResult(**result_data)
         assert execution_result.execution_id == execution_id
         assert execution_result.status in [e.value for e in ExecutionStatusEnum]
         assert execution_result.lang == "python"
-        
+
         # Execution might be in any state - that's fine
         # If completed, validate output; if not, that's valid too
         if execution_result.status == ExecutionStatusEnum.COMPLETED:
@@ -140,7 +141,7 @@ async def test_execute_with_error(self, client: AsyncClient, test_user: Dict[str
         assert exec_response.status_code == 200
 
         execution_id = exec_response.json()["execution_id"]
-        
+
         # No waiting - execution was accepted, error will be processed asynchronously
 
     @pytest.mark.asyncio
@@ -172,7 +173,7 @@ async def test_execute_with_resource_tracking(self, client: AsyncClient, test_us
         assert exec_response.status_code == 200
 
         execution_id = exec_response.json()["execution_id"]
-        
+
         # No waiting - execution was accepted, error will be processed asynchronously
 
         # Fetch result and validate resource usage if present
@@ -245,7 +246,7 @@ async def test_execute_with_large_output(self, client: AsyncClient, test_user: D
         assert exec_response.status_code == 200
 
         execution_id = exec_response.json()["execution_id"]
-        
+
         # No waiting - execution was accepted, error will be processed asynchronously
         # Validate output from result endpoint (best-effort)
         result_response = await client.get(f"/api/v1/result/{execution_id}")
@@ -299,7 +300,7 @@ async def test_cancel_running_execution(self, client: AsyncClient, test_user: Di
             pytest.skip("Cancellation not wired; backend returned 5xx")
         # Should succeed or fail if already completed
         assert cancel_response.status_code in [200, 400, 404]
-        
+
         # Cancel response of 200 means cancellation was accepted
 
     @pytest.mark.asyncio
@@ -335,7 +336,7 @@ async def test_execution_with_timeout(self, client: AsyncClient, test_user: Dict
         assert exec_response.status_code == 200
 
         execution_id = exec_response.json()["execution_id"]
-        
+
         # Just verify the execution was created - it will run forever until timeout
         # No need to wait or observe states
 

diff --git a/...gration/k8s/test_k8s_worker_create_pod.py → ...d/tests/e2e/test_k8s_worker_create_pod.py b/...gration/k8s/test_k8s_worker_create_pod.py → ...d/tests/e2e/test_k8s_worker_create_pod.py
@@ -13,7 +13,7 @@
 from app.services.k8s_worker.worker import KubernetesWorker
 from kubernetes.client.rest import ApiException
 
-pytestmark = [pytest.mark.integration, pytest.mark.k8s]
+pytestmark = [pytest.mark.e2e, pytest.mark.k8s]
 
 _test_logger = logging.getLogger("test.k8s.worker_create_pod")
 

diff --git a/...egration/k8s/test_resource_cleaner_k8s.py → ...nd/tests/e2e/test_resource_cleaner_k8s.py b/...egration/k8s/test_resource_cleaner_k8s.py → ...nd/tests/e2e/test_resource_cleaner_k8s.py
@@ -7,7 +7,7 @@
 from app.services.result_processor.resource_cleaner import ResourceCleaner
 
 
-pytestmark = [pytest.mark.integration, pytest.mark.k8s]
+pytestmark = [pytest.mark.e2e, pytest.mark.k8s]
 
 _test_logger = logging.getLogger("test.k8s.resource_cleaner_k8s")
 
@@ -36,11 +36,11 @@ async def test_cleanup_orphaned_resources_dry_run() -> None:
 async def test_cleanup_nonexistent_pod() -> None:
     rc = ResourceCleaner(logger=_test_logger)
     await rc.initialize()
-    
+
     # Attempt to delete a pod that doesn't exist - should complete without errors
     namespace = os.environ.get("K8S_NAMESPACE", "default")
     nonexistent_pod = "integr8s-test-nonexistent-pod"
-    
+
     # Should complete within timeout and not raise any exceptions
     start_time = asyncio.get_event_loop().time()
     await rc.cleanup_pod_resources(
@@ -50,15 +50,14 @@ async def test_cleanup_nonexistent_pod() -> None:
         timeout=5,
     )
     elapsed = asyncio.get_event_loop().time() - start_time
-    
+
     # Verify it completed quickly (not waiting full timeout for non-existent resources)
     assert elapsed < 5, f"Cleanup took {elapsed}s, should be quick for non-existent resources"
-    
+
     # Verify no resources exist with this name (should be empty/zero)
     usage = await rc.get_resource_usage(namespace=namespace)
-    
+
     # usage returns counts (int), not lists
     # Just check that we got a valid usage report
     assert isinstance(usage.get("pods", 0), int)
     assert isinstance(usage.get("configmaps", 0), int)
-
diff --git a/.../k8s/test_resource_cleaner_integration.py → ...tests/e2e/test_resource_cleaner_orphan.py b/.../k8s/test_resource_cleaner_integration.py → ...tests/e2e/test_resource_cleaner_orphan.py
@@ -8,9 +8,9 @@
 from app.services.result_processor.resource_cleaner import ResourceCleaner
 from tests.helpers.eventually import eventually
 
-pytestmark = [pytest.mark.integration, pytest.mark.k8s]
+pytestmark = [pytest.mark.e2e, pytest.mark.k8s]
 
-_test_logger = logging.getLogger("test.k8s.resource_cleaner_integration")
+_test_logger = logging.getLogger("test.k8s.resource_cleaner_orphan")
 
 
 def _ensure_kubeconfig():

diff --git a/backend/tests/integration/dlq/test_dlq_discard_policy.py b/backend/tests/integration/dlq/test_dlq_discard_policy.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import os
+import uuid
 from datetime import datetime, timezone
 
 import pytest
@@ -15,7 +16,10 @@
 from tests.helpers import make_execution_requested_event
 from tests.helpers.eventually import eventually
 
-pytestmark = [pytest.mark.integration, pytest.mark.kafka, pytest.mark.mongodb]
+# xdist_group: DLQ tests share a Kafka consumer group. When running in parallel,
+# different workers' managers consume each other's messages and apply wrong policies.
+# Serial execution ensures each test's manager processes only its own messages.
+pytestmark = [pytest.mark.integration, pytest.mark.kafka, pytest.mark.mongodb, pytest.mark.xdist_group("dlq")]
 
 _test_logger = logging.getLogger("test.dlq.discard_policy")
 
@@ -28,7 +32,8 @@ async def test_dlq_manager_discards_with_manual_policy(db) -> None:  # type: ign
     topic = f"{prefix}{str(KafkaTopic.EXECUTION_EVENTS)}"
     manager.set_retry_policy(topic, RetryPolicy(topic=topic, strategy=RetryStrategy.MANUAL))
 
-    ev = make_execution_requested_event(execution_id="exec-dlq-discard")
+    # Use unique execution_id to avoid conflicts with parallel test workers
+    ev = make_execution_requested_event(execution_id=f"exec-dlq-discard-{uuid.uuid4().hex[:8]}")
 
     payload = {
         "event": ev.to_dict(),

diff --git a/backend/tests/integration/dlq/test_dlq_manager.py b/backend/tests/integration/dlq/test_dlq_manager.py
@@ -14,7 +14,10 @@
 from tests.helpers import make_execution_requested_event
 from tests.helpers.eventually import eventually
 
-pytestmark = [pytest.mark.integration, pytest.mark.kafka, pytest.mark.mongodb]
+# xdist_group: DLQ tests share a Kafka consumer group. When running in parallel,
+# different workers' managers consume each other's messages and apply wrong policies.
+# Serial execution ensures each test's manager processes only its own messages.
+pytestmark = [pytest.mark.integration, pytest.mark.kafka, pytest.mark.mongodb, pytest.mark.xdist_group("dlq")]
 
 _test_logger = logging.getLogger("test.dlq.manager")