Skip to content

Commit 115d4c0

Browse files
committed
fix taskfile
1 parent 956f0b2 commit 115d4c0

File tree

2 files changed

+88
-15
lines changed

2 files changed

+88
-15
lines changed

.github/workflows/mlflow-ci.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ jobs:
5656
run: |
5757
# Use Taskfile to add Helm repos, lint charts and generate templates
5858
task add:helm:repos
59+
task update:helm:deps
5960
task lint
6061
task template
6162
@@ -98,6 +99,8 @@ jobs:
9899
working-directory: applications/mlflow
99100
run: |
100101
# Update and package charts
102+
task add:helm:repos
103+
task update:helm:deps
101104
task update:versions
102105
task package
103106

applications/mlflow/Taskfile.yml

Lines changed: 85 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,23 @@ tasks:
158158
- helm repo add cnpg https://cloudnative-pg.github.io/charts
159159
- helm repo add minio https://operator.min.io/
160160
- helm repo update
161+
162+
# Dependency update
163+
update:helm:deps:
164+
desc: Update Helm chart dependencies
165+
deps: [add:helm:repos]
166+
cmds:
167+
- echo "Updating Helm chart dependencies..."
168+
- for: { var: CHARTS }
169+
cmd: |
170+
echo "Updating dependencies for {{.ITEM}} chart..."
171+
helm dependency update {{.CHART_DIR}}/{{.ITEM}}
172+
- echo "Helm chart dependencies updated successfully."
161173

162174
# Chart linting
163175
lint:
164176
desc: Lint Helm charts
165-
deps: [add:helm:repos]
177+
deps: [add:helm:repos, update:helm:deps]
166178
cmds:
167179
- echo "Linting Helm charts..."
168180
- for: { var: CHARTS }
@@ -174,7 +186,7 @@ tasks:
174186
# Template rendering
175187
template:
176188
desc: Template Helm charts with Replicated SDK disabled and output to stdout
177-
deps: [add:helm:repos]
189+
deps: [add:helm:repos, update:helm:deps]
178190
cmds:
179191
- echo "Templating Helm charts with Replicated SDK disabled..."
180192
- for: { var: CHARTS }
@@ -200,7 +212,7 @@ tasks:
200212
# Packaging tasks
201213
package:
202214
desc: Package Helm charts for distribution
203-
deps: [add:helm:repos, update:versions]
215+
deps: [add:helm:repos, update:helm:deps, update:versions]
204216
cmds:
205217
- echo "Packaging Helm charts..."
206218
- for: { var: CHARTS }
@@ -444,8 +456,10 @@ tasks:
444456
echo "❌ ERROR: MLflow service not found after $((MAX_RETRIES * RETRY_INTERVAL)) seconds."
445457
echo "Showing all available services in the namespace:"
446458
kubectl get svc -n {{.NAMESPACE}}
447-
echo "Showing all pods in the namespace:"
459+
echo "Showing pod status in the namespace:"
448460
kubectl get pods -n {{.NAMESPACE}}
461+
echo "Showing pod details:"
462+
kubectl describe pods -n {{.NAMESPACE}} -l app.kubernetes.io/name=mlflow
449463
exit 1
450464
fi
451465
@@ -471,32 +485,88 @@ tasks:
471485
472486
echo "Setting up port forwarding to $SERVICE_NAME..."
473487
# Kill any existing port-forward on the same port
474-
lsof -i :{{.PORT}} | grep LISTEN | awk '{print $2}' | xargs kill 2>/dev/null || true
488+
PORT_FORWARD_LOG="/tmp/port-forward-mlflow-$$.log"
489+
490+
# Platform-independent way to check and kill processes on the port
491+
PORT_IN_USE=""
492+
if command -v lsof >/dev/null 2>&1; then
493+
# Linux/Mac approach
494+
PORT_IN_USE=$(lsof -i :{{.PORT}} | grep LISTEN | awk '{print $2}')
495+
if [ -n "$PORT_IN_USE" ]; then
496+
echo "Killing process $PORT_IN_USE using port {{.PORT}}"
497+
kill $PORT_IN_USE 2>/dev/null || true
498+
sleep 2
499+
fi
500+
elif command -v netstat >/dev/null 2>&1; then
501+
# Windows/generic approach
502+
netstat -ano | grep ":{{.PORT}} " | grep "LISTENING" > /dev/null && {
503+
echo "Port {{.PORT}} is in use. Please free this port before continuing."
504+
exit 1
505+
}
506+
fi
475507
476-
# Set up port forwarding in the background
477-
kubectl port-forward -n {{.NAMESPACE}} $SERVICE_NAME {{.PORT}}:5000 &
508+
# Set up port forwarding in the background with logs
509+
kubectl port-forward -n {{.NAMESPACE}} $SERVICE_NAME {{.PORT}}:5000 > $PORT_FORWARD_LOG 2>&1 &
478510
PORT_FORWARD_PID=$!
479511
echo "Port forwarding set up with PID: $PORT_FORWARD_PID"
480512
481513
# Give port-forward more time to establish
482514
echo "Waiting for port-forward to establish..."
483515
sleep 5
484516
517+
# Check if port-forward process is still running
518+
if ! ps -p $PORT_FORWARD_PID > /dev/null; then
519+
echo "ERROR: Port forwarding process died. Check the logs:"
520+
cat $PORT_FORWARD_LOG
521+
exit 1
522+
fi
523+
485524
# Basic connectivity check
486525
echo "Checking connectivity to MLflow on localhost:{{.PORT}}..."
487-
for i in {1..5}; do
488-
if curl -s -o /dev/null -w "%{http_code}" http://localhost:{{.PORT}}/; then
526+
MAX_CONN_RETRIES=5
527+
CONN_RETRY_COUNT=0
528+
CONN_SUCCESS=false
529+
530+
while [ $CONN_RETRY_COUNT -lt $MAX_CONN_RETRIES ]; do
531+
CONN_RETRY_COUNT=$((CONN_RETRY_COUNT+1))
532+
echo "Connection attempt $CONN_RETRY_COUNT/$MAX_CONN_RETRIES..."
533+
534+
# Try curling the MLflow endpoint
535+
if curl -s -o /dev/null -w "%{http_code}" http://localhost:{{.PORT}}/ > /dev/null 2>&1; then
489536
echo "Successfully connected to MLflow service!"
537+
CONN_SUCCESS=true
490538
break
491539
else
492-
echo "Connection attempt $i failed, retrying..."
493-
sleep 5
494-
if [ $i -eq 5 ]; then
495-
echo "Warning: Could not connect to MLflow service after multiple attempts"
496-
echo "Port forwarding may not be working correctly"
540+
echo "Connection attempt $CONN_RETRY_COUNT failed, retrying in 5 seconds..."
541+
542+
# Check if port-forward is still running
543+
if ! ps -p $PORT_FORWARD_PID > /dev/null; then
544+
echo "ERROR: Port forwarding process died during connection attempts."
545+
echo "Port forwarding log:"
546+
cat $PORT_FORWARD_LOG
547+
548+
# Restart port forwarding as a fallback
549+
echo "Attempting to restart port forwarding..."
550+
kubectl port-forward -n {{.NAMESPACE}} $SERVICE_NAME {{.PORT}}:5000 > $PORT_FORWARD_LOG 2>&1 &
551+
PORT_FORWARD_PID=$!
552+
echo "Restarted port forwarding with PID: $PORT_FORWARD_PID"
497553
fi
554+
555+
sleep 5
498556
fi
499557
done
558+
559+
if [ "$CONN_SUCCESS" != "true" ]; then
560+
echo "WARNING: Could not connect to MLflow service after $MAX_CONN_RETRIES attempts."
561+
echo "This may indicate issues with the service or port forwarding."
562+
echo "Port forwarding log:"
563+
cat $PORT_FORWARD_LOG
564+
echo "Pod logs:"
565+
kubectl logs -n {{.NAMESPACE}} -l app.kubernetes.io/name=mlflow --tail=20 || true
566+
echo "Continuing anyway, but tests may fail."
567+
fi
568+
569+
echo "Port forwarding setup completed."
500570
501571
test:local:
502572
desc: Run Helm installation test with local charts (no Replicated registry)
@@ -549,7 +619,7 @@ tasks:
549619
- echo "Running application tests against MLflow on localhost:{{.PORT}}..."
550620
- |
551621
echo "Installing Python dependencies for tests..."
552-
pip3 install mlflow pandas scikit-learn requests urllib3
622+
pip3 install mlflow==2.11.0 pandas>=2.0.0 scikit-learn>=1.3.0 requests>=2.31.0 urllib3>=2.0.0
553623
554624
echo "Running MLflow application tests"
555625
python {{.TESTS_DIR}}/mlflow_test.py localhost:{{.PORT}} \

0 commit comments

Comments
 (0)