Skip to content

Commit ca0bcaa

Browse files
debug CI
1 parent 8291b98 commit ca0bcaa

File tree

3 files changed

+114
-32
lines changed

3 files changed

+114
-32
lines changed

.github/workflows/test.yml

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,41 @@ jobs:
166166
make create-backend-secret POSTGRES_PWD=your_postgres_password JWT_SECRET="ask for the JWT secret" ADMIN_EMAIL=alice@example.com ADMIN_PWD=pass!
167167
make k8s-modelplatform
168168
169+
- name: Configure backend for minikube Docker environment
170+
run: |
171+
echo "Configuring backend to use minikube Docker daemon"
172+
# Get minikube docker environment variables
173+
eval $(minikube docker-env)
174+
# Update the backend deployment with Docker environment variables
175+
if [ -n "$DOCKER_HOST" ]; then
176+
kubectl set env deployment/backend -n model-platform DOCKER_HOST="$DOCKER_HOST"
177+
fi
178+
if [ -n "$DOCKER_CERT_PATH" ]; then
179+
kubectl set env deployment/backend -n model-platform DOCKER_CERT_PATH="$DOCKER_CERT_PATH"
180+
fi
181+
if [ -n "$DOCKER_TLS_VERIFY" ]; then
182+
kubectl set env deployment/backend -n model-platform DOCKER_TLS_VERIFY="$DOCKER_TLS_VERIFY"
183+
fi
184+
echo "Backend configuration updated for minikube Docker"
185+
# Wait for backend to restart
186+
kubectl rollout status deployment/backend -n model-platform --timeout=120s
187+
169188
- name: Wait for infrastructure to settle (3m)
170189
run: |
171-
echo "Waitin 3 minutes for infrastructure to settle"
190+
echo "Waiting 3 minutes for infrastructure to settle"
172191
sleep 60
173192
193+
- name: Configure Docker environment for minikube
194+
run: |
195+
echo "Configuring Docker environment to use minikube"
196+
eval $(minikube docker-env)
197+
echo "DOCKER_HOST=$DOCKER_HOST" >> $GITHUB_ENV
198+
echo "DOCKER_CERT_PATH=$DOCKER_CERT_PATH" >> $GITHUB_ENV
199+
echo "DOCKER_TLS_VERIFY=$DOCKER_TLS_VERIFY" >> $GITHUB_ENV
200+
echo "MINIKUBE_ACTIVE_DOCKERD=$MINIKUBE_ACTIVE_DOCKERD" >> $GITHUB_ENV
201+
echo "Current Docker host: $DOCKER_HOST"
202+
docker info | head -10
203+
174204
- name: Run end-to-end tests
175205
run: |
176206
echo "Launching end-to-end tests"
@@ -180,7 +210,11 @@ jobs:
180210
if: failure()
181211
run: |
182212
echo "=== All Pods Status ==="
183-
kubectl get pods --all-namespaces
213+
kubectl get pods --all-namespaces -o wide
214+
echo "=== All Services ==="
215+
kubectl get services --all-namespaces
216+
echo "=== All Ingresses ==="
217+
kubectl get ingresses --all-namespaces
184218
echo "=== Backend Pod logs ==="
185219
kubectl logs -n model-platform -l app=backend --tail=100 || true
186220
echo "=== Backend Pod describe ==="
@@ -189,6 +223,12 @@ jobs:
189223
kubectl logs -l app=nginx-reverse-proxy --tail=100 || true
190224
echo "=== Nginx Pod describe ==="
191225
kubectl describe pod -l app=nginx-reverse-proxy || true
226+
echo "=== Model deployment pods (all namespaces) ==="
227+
kubectl get pods --all-namespaces | grep -E "(deployment|model)" || true
228+
echo "=== Events from all namespaces ==="
229+
kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp | tail -50 || true
230+
echo "=== Docker images in minikube ==="
231+
minikube image ls || true
192232
echo "=== Pod logs ==="
193233
194234
- name: Stop Minikube

infrastructure/k8s/backend-configmap.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ data:
1616
POSTGRES_NAMESPACE: "pgsql"
1717
POSTGRES_USER: "postgres"
1818
MLFLOW_S3_ENDPOINT_URL: "http://minio.minio.svc.cluster.local:9000"
19+
# Docker configuration pour minikube - sera écrasée par les variables d'environnement si nécessaire
20+
DOCKER_HOST: "unix:///var/run/docker.sock"

tests/tests_end_to_end/test_from_project_creation_to_model_predict.py

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,31 +34,6 @@
3434
MODEL_VERSION = "1"
3535

3636

37-
@pytest.fixture(scope="module", autouse=True)
38-
def setup_and_teardown():
39-
"""Clean up project before and after tests."""
40-
# Setup: Login and configure docker env for minikube
41-
print("[DEBUG] Setting up e2e test environment")
42-
43-
# Check minikube status first
44-
try:
45-
result = subprocess.run(["minikube", "status"], capture_output=True, text=True, timeout=30)
46-
print(f"[DEBUG] minikube status exit code: {result.returncode}")
47-
print(f"[DEBUG] minikube status output:\n{result.stdout}")
48-
if result.stderr:
49-
print(f"[DEBUG] minikube status stderr:\n{result.stderr}")
50-
except Exception as exc:
51-
print(f"[DEBUG] Error checking minikube status: {exc}")
52-
53-
_setup_minikube_docker_env()
54-
assert login() == 0, "Login failed"
55-
56-
yield
57-
58-
# Teardown: cleanup
59-
cleanup_project(PROJECT_NAME)
60-
61-
6237
def test_health_endpoint_responds():
6338
"""Test that the platform health endpoint responds."""
6439
result = subprocess.run(
@@ -186,11 +161,29 @@ def _dump_deployment_debug_info(deployment_name):
186161
"kubectl get events",
187162
["kubectl", "get", "events", "-n", PROJECT_NAME, "--sort-by=.metadata.creationTimestamp"],
188163
)
189-
pod_name = _first_pod_name(PROJECT_NAME)
190-
if pod_name:
191-
_run_debug_cmd("kubectl describe pod", ["kubectl", "describe", "pod", pod_name, "-n", PROJECT_NAME])
192-
_run_debug_cmd("kubectl logs current", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME])
193-
_run_debug_cmd("kubectl logs previous", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--previous"])
164+
165+
# Get all pods for this deployment to check their logs
166+
result = subprocess.run(
167+
["kubectl", "get", "pods", "-n", PROJECT_NAME, "-l", f"app={deployment_name}", "--no-headers"],
168+
capture_output=True,
169+
text=True,
170+
timeout=20,
171+
)
172+
if result.returncode == 0 and result.stdout.strip():
173+
pod_lines = result.stdout.strip().splitlines()
174+
for line in pod_lines:
175+
pod_name = line.split()[0]
176+
print(f"[DEBUG] Checking logs for pod: {pod_name}")
177+
_run_debug_cmd(
178+
f"kubectl describe pod {pod_name}", ["kubectl", "describe", "pod", pod_name, "-n", PROJECT_NAME]
179+
)
180+
_run_debug_cmd(
181+
f"kubectl logs current {pod_name}", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--tail=100"]
182+
)
183+
_run_debug_cmd(
184+
f"kubectl logs previous {pod_name}",
185+
["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--previous", "--tail=100"],
186+
)
194187

195188
# Also check available images in minikube
196189
_run_debug_cmd("minikube image ls", ["minikube", "image", "ls"])
@@ -231,6 +224,9 @@ def test_deploy_model():
231224
"""Test model deployment."""
232225
_skip_if_mlflow_not_ready()
233226

227+
# Configure minikube docker environment before deployment
228+
_setup_minikube_docker_env()
229+
234230
# Verify Docker environment is still configured for minikube
235231
docker_host = os.environ.get("DOCKER_HOST", "not set")
236232
print(f"[DEBUG] Deploy test - Current DOCKER_HOST: {docker_host}")
@@ -255,6 +251,48 @@ def test_deployed_model_health_check():
255251
_skip_if_mlflow_not_ready()
256252
time.sleep(180)
257253
deployment_name = sanitize_ressource_name(f"{PROJECT_NAME}-{MODEL_NAME}-{MODEL_VERSION}-deployment")
254+
255+
# Check if the expected image exists in minikube
256+
expected_image_name = (
257+
f"{PROJECT_NAME.lower().replace('_', '-')}-{MODEL_NAME.lower().replace('_', '-')}-{MODEL_VERSION}-ctr:latest"
258+
)
259+
print(f"[DEBUG] Checking for image: {expected_image_name}")
260+
result = subprocess.run(
261+
["minikube", "image", "ls", "--format", "table"],
262+
capture_output=True,
263+
text=True,
264+
timeout=30,
265+
)
266+
if result.returncode == 0:
267+
if expected_image_name in result.stdout:
268+
print(f"[DEBUG] Image {expected_image_name} found in minikube")
269+
else:
270+
print(f"[DEBUG] Image {expected_image_name} NOT found in minikube")
271+
print(f"[DEBUG] Available images:\n{result.stdout}")
272+
273+
# First check if the pod is running
274+
print(f"[DEBUG] Checking pod status for deployment {deployment_name}")
275+
result = subprocess.run(
276+
["kubectl", "get", "pods", "-n", PROJECT_NAME, "-l", f"app={deployment_name}", "--no-headers"],
277+
capture_output=True,
278+
text=True,
279+
timeout=20,
280+
)
281+
if result.returncode == 0 and result.stdout.strip():
282+
pod_lines = result.stdout.strip().splitlines()
283+
for line in pod_lines:
284+
parts = line.split()
285+
pod_name, ready, status = parts[0], parts[1], parts[2]
286+
print(f"[DEBUG] Pod {pod_name}: ready={ready}, status={status}")
287+
if status != "Running":
288+
print(f"[DEBUG] Pod is not running, checking logs...")
289+
_run_debug_cmd(
290+
f"kubectl logs {pod_name}", ["kubectl", "logs", pod_name, "-n", PROJECT_NAME, "--tail=50"]
291+
)
292+
_run_debug_cmd(
293+
f"kubectl describe pod {pod_name}", ["kubectl", "describe", "pod", pod_name, "-n", PROJECT_NAME]
294+
)
295+
258296
health_url = f"http://{MP_HOSTNAME}/deploy/{PROJECT_NAME}/{deployment_name}/health"
259297
timeout = time.time() + 300 # Increase timeout to 5 minutes for CI environments
260298
start = time.time()
@@ -267,7 +305,9 @@ def test_deployed_model_health_check():
267305
)
268306
last_status = result.stdout.strip()
269307
if last_status == "200":
308+
print(f"[DEBUG] Health check successful after {time.time() - start:.1f}s")
270309
return
310+
print(f"[DEBUG] Health check attempt after {time.time() - start:.1f}s: status={last_status}")
271311
time.sleep(5) # Retry every 5 seconds
272312

273313
print(

0 commit comments

Comments
 (0)