Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 78a1e2c

Browse files
authored
Add retry mechanism for starting the ollama container (#898)
Signed-off-by: Radoslav Dimitrov <[email protected]>
1 parent 1790e7f commit 78a1e2c

File tree

1 file changed

+72
-27
lines changed

1 file changed

+72
-27
lines changed

.github/workflows/integration-tests.yml

Lines changed: 72 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -138,47 +138,92 @@ jobs:
138138

139139
- name: Run the Ollama container (ollama-only)
140140
if: ${{ matrix.test-provider == 'ollama' }} # This is only needed for Ollama
141+
timeout-minutes: 15
142+
env:
143+
MAX_RETRIES: 3
141144
run: |
142-
docker run -d -v ollama:/root/.ollama --network host --name ollama ollama/ollama
143-
docker ps -f name=ollama
144-
echo "Loop until the endpoint responds successfully"
145-
while ! curl --silent --fail --get "http://localhost:11434" >/dev/null; do
146-
echo "Ollama not available yet. Retrying in 2 seconds..."
147-
sleep 2
148-
done
149-
echo "Ollama is now available!"
150-
151-
# Run the model
152-
docker exec -d ollama ollama run qwen2.5-coder:0.5b
153-
154-
echo "Waiting for model to be ready..."
155-
while true; do
156-
# Try to make a test query to the model
145+
function check_model_ready() {
157146
response=$(curl -s http://localhost:11434/api/generate -d '{
158147
"model": "qwen2.5-coder:0.5b",
159148
"prompt": "Why is the sky blue?",
160149
"stream": false
161150
}' 2>&1)
162151
163-
# Check if the response contains an error
164-
if echo "$response" | grep -q "error"; then
165-
echo "Model not ready yet. Retrying in 5 seconds..."
152+
if ! echo "$response" | grep -q "error"; then
153+
return 0 # Success
154+
fi
155+
return 1 # Not ready/error
156+
}
157+
158+
function cleanup_container() {
159+
docker stop ollama >/dev/null 2>&1 || true
160+
docker rm ollama >/dev/null 2>&1 || true
161+
sleep 2
162+
}
163+
164+
retry_count=0
165+
while [ $retry_count -lt $MAX_RETRIES ]; do
166+
# Cleanup any existing container
167+
cleanup_container
168+
169+
echo "Starting Ollama container (Attempt $(($retry_count + 1))/$MAX_RETRIES)"
170+
docker run -d -v ollama:/root/.ollama --network host --name ollama ollama/ollama
171+
172+
# Wait for endpoint to be available
173+
endpoint_wait=0
174+
while [ $endpoint_wait -lt 30 ]; do
175+
if curl --silent --fail --get "http://localhost:11434" >/dev/null; then
176+
echo "Ollama endpoint is available"
177+
break
178+
fi
179+
sleep 2
180+
endpoint_wait=$((endpoint_wait + 1))
181+
done
182+
183+
if [ $endpoint_wait -eq 30 ]; then
184+
echo "Endpoint never became available, retrying..."
185+
retry_count=$((retry_count + 1))
186+
continue
187+
fi
188+
189+
echo "Starting model download/initialization..."
190+
docker exec -d ollama ollama run qwen2.5-coder:0.5b
191+
192+
# Monitor container and model status
193+
monitor_count=0
194+
while [ $monitor_count -lt 60 ]; do # 5 minute timeout per attempt
195+
# Check if container is still running
196+
if ! docker ps | grep -q ollama; then
197+
echo "Container crashed, logs:"
198+
docker logs ollama
199+
retry_count=$((retry_count + 1))
200+
break
201+
fi
202+
203+
# Check if model is ready
204+
if check_model_ready; then
205+
echo "Model is ready!"
206+
exit 0 # Success!
207+
fi
208+
209+
echo "Model not ready yet. Waiting... ($(($monitor_count + 1))/60)"
166210
sleep 5
167-
else
168-
echo "Model is ready!"
169-
break
211+
monitor_count=$((monitor_count + 1))
212+
done
213+
214+
if [ $monitor_count -eq 60 ]; then
215+
echo "Timeout waiting for model, container logs:"
216+
docker logs ollama
217+
retry_count=$((retry_count + 1))
170218
fi
171219
done
172220
173-
# Verify the Ollama API is working
174-
curl http://localhost:11434/api/generate -d '{
175-
"model": "qwen2.5-coder:0.5b",
176-
"prompt": "Why is the sky blue?",
177-
"stream": false
178-
}'
221+
echo "Failed after $MAX_RETRIES attempts"
222+
exit 1
179223
180224
- name: Build and run the vllm container (vllm-only)
181225
if: ${{ matrix.test-provider == 'vllm' }} # This is only needed for VLLM
226+
timeout-minutes: 10
182227
run: |
183228
# We clone the VLLM repo and build the container because the CPU-mode container is not published
184229
git clone https://github.com/vllm-project/vllm.git

0 commit comments

Comments
 (0)