debug CI

philippestepniewski · philippestepniewski · commit ca0bcaadefe5 · 2026-02-06T11:10:30.000+01:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -166,11 +166,41 @@ jobs:
           make create-backend-secret POSTGRES_PWD=your_postgres_password JWT_SECRET="ask for the JWT secret" ADMIN_EMAIL=alice@example.com ADMIN_PWD=pass!
           make k8s-modelplatform
 
+      - name: Configure backend for minikube Docker environment
+        run: |
+          echo "Configuring backend to use minikube Docker daemon"
+          # Get minikube docker environment variables
+          eval $(minikube docker-env)
+          # Update the backend deployment with Docker environment variables
+          if [ -n "$DOCKER_HOST" ]; then
+            kubectl set env deployment/backend -n model-platform DOCKER_HOST="$DOCKER_HOST"
+          fi
+          if [ -n "$DOCKER_CERT_PATH" ]; then
+            kubectl set env deployment/backend -n model-platform DOCKER_CERT_PATH="$DOCKER_CERT_PATH"
+          fi
+          if [ -n "$DOCKER_TLS_VERIFY" ]; then
+            kubectl set env deployment/backend -n model-platform DOCKER_TLS_VERIFY="$DOCKER_TLS_VERIFY"
+          fi
+          echo "Backend configuration updated for minikube Docker"
+          # Wait for backend to restart
+          kubectl rollout status deployment/backend -n model-platform --timeout=120s
+
       - name: Wait for infrastructure to settle (3m)
         run: |
-          echo "Waitin 3 minutes for infrastructure to settle"
+          echo "Waiting 3 minutes for infrastructure to settle"
           sleep 60
 
+      - name: Configure Docker environment for minikube
+        run: |
+          echo "Configuring Docker environment to use minikube"
+          eval $(minikube docker-env)
+          echo "DOCKER_HOST=$DOCKER_HOST" >> $GITHUB_ENV
+          echo "DOCKER_CERT_PATH=$DOCKER_CERT_PATH" >> $GITHUB_ENV
+          echo "DOCKER_TLS_VERIFY=$DOCKER_TLS_VERIFY" >> $GITHUB_ENV
+          echo "MINIKUBE_ACTIVE_DOCKERD=$MINIKUBE_ACTIVE_DOCKERD" >> $GITHUB_ENV
+          echo "Current Docker host: $DOCKER_HOST"
+          docker info | head -10
+
       - name: Run end-to-end tests
         run: |
           echo "Launching end-to-end tests"
@@ -180,7 +210,11 @@ jobs:
         if: failure()
         run: |
           echo "=== All Pods Status ==="
-          kubectl get pods --all-namespaces
+          kubectl get pods --all-namespaces -o wide
+          echo "=== All Services ==="
+          kubectl get services --all-namespaces
+          echo "=== All Ingresses ==="
+          kubectl get ingresses --all-namespaces
           echo "=== Backend Pod logs ==="
           kubectl logs -n model-platform -l app=backend --tail=100 || true
           echo "=== Backend Pod describe ==="
@@ -189,6 +223,12 @@ jobs:
           kubectl logs -l app=nginx-reverse-proxy --tail=100 || true
           echo "=== Nginx Pod describe ==="
           kubectl describe pod -l app=nginx-reverse-proxy || true
+          echo "=== Model deployment pods (all namespaces) ==="
+          kubectl get pods --all-namespaces | grep -E "(deployment|model)" || true
+          echo "=== Events from all namespaces ==="
+          kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp | tail -50 || true
+          echo "=== Docker images in minikube ==="
+          minikube image ls || true
           echo "=== Pod logs ==="
 
       - name: Stop Minikube
diff --git a/infrastructure/k8s/backend-configmap.yaml b/infrastructure/k8s/backend-configmap.yaml
@@ -16,3 +16,5 @@ data:
   POSTGRES_NAMESPACE: "pgsql"
   POSTGRES_USER: "postgres"
   MLFLOW_S3_ENDPOINT_URL: "http://minio.minio.svc.cluster.local:9000"
+  # Docker configuration pour minikube - sera écrasée par les variables d'environnement si nécessaire
+  DOCKER_HOST: "unix:///var/run/docker.sock"
diff --git a/tests/tests_end_to_end/test_from_project_creation_to_model_predict.py b/tests/tests_end_to_end/test_from_project_creation_to_model_predict.py
@@ -34,31 +34,6 @@
 MODEL_VERSION = "1"
 
 
-@pytest.fixture(scope="module", autouse=True)
-def setup_and_teardown():
-    """Clean up project before and after tests."""
-    # Setup: Login and configure docker env for minikube
-    print("[DEBUG] Setting up e2e test environment")
-
-    # Check minikube status first
-    try:
-        result = subprocess.run(["minikube", "status"], capture_output=True, text=True, timeout=30)
-        print(f"[DEBUG] minikube status exit code: {result.returncode}")
-        print(f"[DEBUG] minikube status output:\n{result.stdout}")
-        if result.stderr:
-            print(f"[DEBUG] minikube status stderr:\n{result.stderr}")
-    except Exception as exc:
-        print(f"[DEBUG] Error checking minikube status: {exc}")
-
-    _setup_minikube_docker_env()
-    assert login() == 0, "Login failed"
-
-    yield
-
-    # Teardown: cleanup
-    cleanup_project(PROJECT_NAME)
-
-
 def test_health_endpoint_responds():
     """Test that the platform health endpoint responds."""
     result = subprocess.run(
@@ -186,11 +161,29 @@ def _dump_deployment_debug_info(deployment_name):
         "kubectl get events",
         ["kubectl", "get", "events", "-n", PROJECT_NAME, "--sort-by=.metadata.creationTimestamp"],
     )
-    pod_name = _first_pod_name(PROJECT_NAME)
-    if pod_name:
-        _run_debug_cmd("kubectl describe pod", ["kubectl", "describe", "pod", pod_name, "-n", PROJECT_NAME])
-        _run_debug_cmd("kubectl logs current", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME])
-        _run_debug_cmd("kubectl logs previous", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--previous"])
+
+    # Get all pods for this deployment to check their logs
+    result = subprocess.run(
+        ["kubectl", "get", "pods", "-n", PROJECT_NAME, "-l", f"app={deployment_name}", "--no-headers"],
+        capture_output=True,
+        text=True,
+        timeout=20,
+    )
+    if result.returncode == 0 and result.stdout.strip():
+        pod_lines = result.stdout.strip().splitlines()
+        for line in pod_lines:
+            pod_name = line.split()[0]
+            print(f"[DEBUG] Checking logs for pod: {pod_name}")
+            _run_debug_cmd(
+                f"kubectl describe pod {pod_name}", ["kubectl", "describe", "pod", pod_name, "-n", PROJECT_NAME]
+            )
+            _run_debug_cmd(
+                f"kubectl logs current {pod_name}", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--tail=100"]
+            )
+            _run_debug_cmd(
+                f"kubectl logs previous {pod_name}",
+                ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--previous", "--tail=100"],
+            )
 
     # Also check available images in minikube
     _run_debug_cmd("minikube image ls", ["minikube", "image", "ls"])
@@ -231,6 +224,9 @@ def test_deploy_model():
     """Test model deployment."""
     _skip_if_mlflow_not_ready()
 
+    # Configure minikube docker environment before deployment
+    _setup_minikube_docker_env()
+
     # Verify Docker environment is still configured for minikube
     docker_host = os.environ.get("DOCKER_HOST", "not set")
     print(f"[DEBUG] Deploy test - Current DOCKER_HOST: {docker_host}")
@@ -255,6 +251,48 @@ def test_deployed_model_health_check():
     _skip_if_mlflow_not_ready()
     time.sleep(180)
     deployment_name = sanitize_ressource_name(f"{PROJECT_NAME}-{MODEL_NAME}-{MODEL_VERSION}-deployment")
+
+    # Check if the expected image exists in minikube
+    expected_image_name = (
+        f"{PROJECT_NAME.lower().replace('_', '-')}-{MODEL_NAME.lower().replace('_', '-')}-{MODEL_VERSION}-ctr:latest"
+    )
+    print(f"[DEBUG] Checking for image: {expected_image_name}")
+    result = subprocess.run(
+        ["minikube", "image", "ls", "--format", "table"],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    if result.returncode == 0:
+        if expected_image_name in result.stdout:
+            print(f"[DEBUG] Image {expected_image_name} found in minikube")
+        else:
+            print(f"[DEBUG] Image {expected_image_name} NOT found in minikube")
+            print(f"[DEBUG] Available images:\n{result.stdout}")
+
+    # First check if the pod is running
+    print(f"[DEBUG] Checking pod status for deployment {deployment_name}")
+    result = subprocess.run(
+        ["kubectl", "get", "pods", "-n", PROJECT_NAME, "-l", f"app={deployment_name}", "--no-headers"],
+        capture_output=True,
+        text=True,
+        timeout=20,
+    )
+    if result.returncode == 0 and result.stdout.strip():
+        pod_lines = result.stdout.strip().splitlines()
+        for line in pod_lines:
+            parts = line.split()
+            pod_name, ready, status = parts[0], parts[1], parts[2]
+            print(f"[DEBUG] Pod {pod_name}: ready={ready}, status={status}")
+            if status != "Running":
+                print(f"[DEBUG] Pod is not running, checking logs...")
+                _run_debug_cmd(
+                    f"kubectl logs {pod_name}", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--tail=50"]
+                )
+                _run_debug_cmd(
+                    f"kubectl describe pod {pod_name}", ["kubectl", "describe", "pod", pod_name, "-n", PROJECT_NAME]
+                )
+
     health_url = f"http://{MP_HOSTNAME}/deploy/{PROJECT_NAME}/{deployment_name}/health"
     timeout = time.time() + 300  # Increase timeout to 5 minutes for CI environments
     start = time.time()
@@ -267,7 +305,9 @@ def test_deployed_model_health_check():
         )
         last_status = result.stdout.strip()
         if last_status == "200":
+            print(f"[DEBUG] Health check successful after {time.time() - start:.1f}s")
             return
+        print(f"[DEBUG] Health check attempt after {time.time() - start:.1f}s: status={last_status}")
         time.sleep(5)  # Retry every 5 seconds
 
     print(