@@ -158,11 +158,23 @@ tasks:
158158 - helm repo add cnpg https://cloudnative-pg.github.io/charts
159159 - helm repo add minio https://operator.min.io/
160160 - helm repo update
161+
162+ # Dependency update
163+ update:helm:deps :
164+ desc : Update Helm chart dependencies
165+ deps : [add:helm:repos]
166+ cmds :
167+ - echo "Updating Helm chart dependencies..."
168+ - for : { var: CHARTS }
169+ cmd : |
170+ echo "Updating dependencies for {{.ITEM}} chart..."
171+ helm dependency update {{.CHART_DIR}}/{{.ITEM}}
172+ - echo "Helm chart dependencies updated successfully."
161173
162174 # Chart linting
163175 lint :
164176 desc : Lint Helm charts
165- deps : [add:helm:repos]
177+ deps : [add:helm:repos, update:helm:deps ]
166178 cmds :
167179 - echo "Linting Helm charts..."
168180 - for : { var: CHARTS }
@@ -174,7 +186,7 @@ tasks:
174186 # Template rendering
175187 template :
176188 desc : Template Helm charts with Replicated SDK disabled and output to stdout
177- deps : [add:helm:repos]
189+ deps : [add:helm:repos, update:helm:deps ]
178190 cmds :
179191 - echo "Templating Helm charts with Replicated SDK disabled..."
180192 - for : { var: CHARTS }
@@ -200,7 +212,7 @@ tasks:
200212 # Packaging tasks
201213 package :
202214 desc : Package Helm charts for distribution
203- deps : [add:helm:repos, update:versions]
215+ deps : [add:helm:repos, update:helm:deps, update: versions]
204216 cmds :
205217 - echo "Packaging Helm charts..."
206218 - for : { var: CHARTS }
@@ -444,8 +456,10 @@ tasks:
444456 echo "❌ ERROR: MLflow service not found after $((MAX_RETRIES * RETRY_INTERVAL)) seconds."
445457 echo "Showing all available services in the namespace:"
446458 kubectl get svc -n {{.NAMESPACE}}
447- echo "Showing all pods in the namespace:"
459+ echo "Showing pod status in the namespace:"
448460 kubectl get pods -n {{.NAMESPACE}}
461+ echo "Showing pod details:"
462+ kubectl describe pods -n {{.NAMESPACE}} -l app.kubernetes.io/name=mlflow
449463 exit 1
450464 fi
451465
@@ -471,32 +485,88 @@ tasks:
471485
472486 echo "Setting up port forwarding to $SERVICE_NAME..."
473487 # Kill any existing port-forward on the same port
474- lsof -i :{{.PORT}} | grep LISTEN | awk '{print $2}' | xargs kill 2>/dev/null || true
488+ PORT_FORWARD_LOG="/tmp/port-forward-mlflow-$$.log"
489+
490+ # Platform-independent way to check and kill processes on the port
491+ PORT_IN_USE=""
492+ if command -v lsof >/dev/null 2>&1; then
493+ # Linux/Mac approach
494+ PORT_IN_USE=$(lsof -i :{{.PORT}} | grep LISTEN | awk '{print $2}')
495+ if [ -n "$PORT_IN_USE" ]; then
496+ echo "Killing process $PORT_IN_USE using port {{.PORT}}"
497+ kill $PORT_IN_USE 2>/dev/null || true
498+ sleep 2
499+ fi
500+ elif command -v netstat >/dev/null 2>&1; then
501+ # Windows/generic approach
502+ netstat -ano | grep ":{{.PORT}} " | grep "LISTENING" > /dev/null && {
503+ echo "Port {{.PORT}} is in use. Please free this port before continuing."
504+ exit 1
505+ }
506+ fi
475507
476- # Set up port forwarding in the background
477- kubectl port-forward -n {{.NAMESPACE}} $SERVICE_NAME {{.PORT}}:5000 &
508+ # Set up port forwarding in the background with logs
509+ kubectl port-forward -n {{.NAMESPACE}} $SERVICE_NAME {{.PORT}}:5000 > $PORT_FORWARD_LOG 2>&1 &
478510 PORT_FORWARD_PID=$!
479511 echo "Port forwarding set up with PID: $PORT_FORWARD_PID"
480512
481513 # Give port-forward more time to establish
482514 echo "Waiting for port-forward to establish..."
483515 sleep 5
484516
517+ # Check if port-forward process is still running
518+ if ! ps -p $PORT_FORWARD_PID > /dev/null; then
519+ echo "ERROR: Port forwarding process died. Check the logs:"
520+ cat $PORT_FORWARD_LOG
521+ exit 1
522+ fi
523+
485524 # Basic connectivity check
486525 echo "Checking connectivity to MLflow on localhost:{{.PORT}}..."
487- for i in {1..5}; do
488- if curl -s -o /dev/null -w "%{http_code}" http://localhost:{{.PORT}}/; then
526+ MAX_CONN_RETRIES=5
527+ CONN_RETRY_COUNT=0
528+ CONN_SUCCESS=false
529+
530+ while [ $CONN_RETRY_COUNT -lt $MAX_CONN_RETRIES ]; do
531+ CONN_RETRY_COUNT=$((CONN_RETRY_COUNT+1))
532+ echo "Connection attempt $CONN_RETRY_COUNT/$MAX_CONN_RETRIES..."
533+
534+ # Try curling the MLflow endpoint
535+ if curl -s -o /dev/null -w "%{http_code}" http://localhost:{{.PORT}}/ > /dev/null 2>&1; then
489536 echo "Successfully connected to MLflow service!"
537+ CONN_SUCCESS=true
490538 break
491539 else
492- echo "Connection attempt $i failed, retrying..."
493- sleep 5
494- if [ $i -eq 5 ]; then
495- echo "Warning: Could not connect to MLflow service after multiple attempts"
496- echo "Port forwarding may not be working correctly"
540+ echo "Connection attempt $CONN_RETRY_COUNT failed, retrying in 5 seconds..."
541+
542+ # Check if port-forward is still running
543+ if ! ps -p $PORT_FORWARD_PID > /dev/null; then
544+ echo "ERROR: Port forwarding process died during connection attempts."
545+ echo "Port forwarding log:"
546+ cat $PORT_FORWARD_LOG
547+
548+ # Restart port forwarding as a fallback
549+ echo "Attempting to restart port forwarding..."
550+ kubectl port-forward -n {{.NAMESPACE}} $SERVICE_NAME {{.PORT}}:5000 > $PORT_FORWARD_LOG 2>&1 &
551+ PORT_FORWARD_PID=$!
552+ echo "Restarted port forwarding with PID: $PORT_FORWARD_PID"
497553 fi
554+
555+ sleep 5
498556 fi
499557 done
558+
559+ if [ "$CONN_SUCCESS" != "true" ]; then
560+ echo "WARNING: Could not connect to MLflow service after $MAX_CONN_RETRIES attempts."
561+ echo "This may indicate issues with the service or port forwarding."
562+ echo "Port forwarding log:"
563+ cat $PORT_FORWARD_LOG
564+ echo "Pod logs:"
565+ kubectl logs -n {{.NAMESPACE}} -l app.kubernetes.io/name=mlflow --tail=20 || true
566+ echo "Continuing anyway, but tests may fail."
567+ fi
568+
569+ echo "Port forwarding setup completed."
500570
501571 test:local :
502572 desc : Run Helm installation test with local charts (no Replicated registry)
@@ -549,7 +619,7 @@ tasks:
549619 - echo "Running application tests against MLflow on localhost:{{.PORT}}..."
550620 - |
551621 echo "Installing Python dependencies for tests..."
552- pip3 install mlflow pandas scikit-learn requests urllib3
622+ pip3 install mlflow==2.11.0 pandas>=2.0.0 scikit-learn>=1.3.0 requests>=2.31.0 urllib3>=2.0.0
553623
554624 echo "Running MLflow application tests"
555625 python {{.TESTS_DIR}}/mlflow_test.py localhost:{{.PORT}} \
0 commit comments