Skip to content

Commit 871b06a

Browse files
committed
Provide a command line option to run the tool with vLLM model
1 parent ae93cd8 commit 871b06a

File tree

6 files changed

+165
-61
lines changed

6 files changed

+165
-61
lines changed

.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ OLLAMA_BASE_URL=http://localhost:11434
33
OLLAMA_API_KEY=ollama
44

55
# Disable Google GenAI/Vertex AI for local model usage
6-
GOOGLE_GENAI_USE_VERTEXAI=FALSE
6+
GOOGLE_GENAI_USE_VERTEXAI=FALSE

ci_analysis_agent/agent.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
from sub_agents.e2e_test_analyst import e2e_test_analyst_agent
2424
from sub_agents.mustgather_analyst import mustgather_analyst_agent
2525

26-
MODEL = LiteLlm(model="ollama_chat/qwen3:4b")
27-
26+
import os
27+
MODEL = os.environ.get("MODEL", "ollama_chat/qwen3:4b")
2828
ci_analysis_advisor = LlmAgent(
2929
name="ci_analysis_advisor",
30-
model=MODEL,
30+
model=LiteLlm(model=MODEL),
3131
description=(
3232
"Analyzes CI jobs and provides root cause analysis for failures."
3333
),

quick-start-containers.sh

Lines changed: 152 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ OLLAMA_MODEL="qwen3:4b"
2020
AGENT_PORT="8000"
2121
OLLAMA_PORT="11434"
2222
USE_GPU="auto" # auto, nvidia, amd, none
23+
USE_REMOTE_VLLM=false # Use remote vLLM instead of local Ollama
2324

2425
# Function to print colored output
2526
print_status() {
@@ -53,6 +54,46 @@ check_podman() {
5354
print_success "Podman is available"
5455
}
5556

57+
# Function to validate remote vLLM environment variables
58+
validate_vllm_env() {
59+
if [ "$USE_REMOTE_VLLM" = true ]; then
60+
print_status "Validating remote vLLM environment variables..."
61+
62+
local missing_vars=""
63+
64+
if [ -z "$HOSTED_VLLM_API_BASE" ]; then
65+
missing_vars="$missing_vars HOSTED_VLLM_API_BASE"
66+
fi
67+
68+
if [ -z "$HOSTED_VLLM_API_KEY" ]; then
69+
missing_vars="$missing_vars HOSTED_VLLM_API_KEY"
70+
fi
71+
72+
if [ -z "$HOSTED_VLLM_MODEL_NAME" ]; then
73+
missing_vars="$missing_vars HOSTED_VLLM_MODEL_NAME"
74+
fi
75+
if [ -n "$missing_vars" ]; then
76+
print_error "Missing required environment variables for remote vLLM:$missing_vars"
77+
echo ""
78+
echo "Please set the following environment variables before running this script:"
79+
echo " export HOSTED_VLLM_API_BASE=\"your_vllm_api_base_url\""
80+
echo " export HOSTED_VLLM_API_KEY=\"your_vllm_api_key\""
81+
echo " export HOSTED_VLLM_MODEL_NAME=\"your_vllm_model\""
82+
echo ""
83+
echo "Example:"
84+
echo " export HOSTED_VLLM_API_BASE=\"https://your-vllm-endpoint.com/v1\""
85+
echo " export HOSTED_VLLM_API_KEY=\"your-api-key-here\""
86+
echo " export HOSTED_VLLM_MODEL_NAME=\"claudette-sheep\""
87+
exit 1
88+
fi
89+
90+
print_success "Remote vLLM environment variables are set"
91+
print_status "API Base: $HOSTED_VLLM_API_BASE"
92+
print_status "API Key: ${HOSTED_VLLM_API_KEY:0:8}..." # Show only first 8 characters for security
93+
print_status "Model Name: $HOSTED_VLLM_MODEL_NAME"
94+
fi
95+
}
96+
5697
# Function to detect GPU capabilities
5798
detect_gpu() {
5899
local gpu_type="none"
@@ -214,11 +255,26 @@ start_agent() {
214255
podman build -t ci-analysis-agent:latest .
215256

216257
print_status "Starting CI Analysis Agent container..."
258+
259+
# Prepare environment variables
260+
local env_args="-e LOG_LEVEL=INFO"
261+
262+
if [ "$USE_REMOTE_VLLM" = true ]; then
263+
env_args="$env_args -e HOSTED_VLLM_API_BASE=$HOSTED_VLLM_API_BASE"
264+
env_args="$env_args -e HOSTED_VLLM_API_KEY=$HOSTED_VLLM_API_KEY"
265+
env_args="$env_args -e MODEL=$HOSTED_VLLM_MODEL_NAME"
266+
print_status "Using remote vLLM endpoint: $HOSTED_VLLM_API_BASE"
267+
else
268+
# Use default values for local Ollama setup
269+
env_args="$env_args -e MODEL=ollama_chat/$OLLAMA_MODEL"
270+
env_args="$env_args -e OLLAMA_API_BASE=http://localhost:$OLLAMA_PORT"
271+
print_status "Using local Ollama setup with default vLLM fallback"
272+
fi
273+
217274
podman run -d \
218275
--name "$AGENT_CONTAINER" \
219276
--network host \
220-
-e OLLAMA_API_BASE="http://localhost:$OLLAMA_PORT" \
221-
-e LOG_LEVEL=INFO \
277+
$env_args \
222278
ci-analysis-agent:latest
223279

224280
print_success "CI Analysis Agent container started"
@@ -229,11 +285,15 @@ verify_deployment() {
229285
print_status "Verifying deployment..."
230286

231287
# Check if containers are running
232-
if podman ps | grep -q "$OLLAMA_CONTAINER"; then
233-
print_success "Ollama container is running"
288+
if [ "$USE_REMOTE_VLLM" = false ]; then
289+
if podman ps | grep -q "$OLLAMA_CONTAINER"; then
290+
print_success "Ollama container is running"
291+
else
292+
print_error "Ollama container is not running"
293+
return 1
294+
fi
234295
else
235-
print_error "Ollama container is not running"
236-
return 1
296+
print_status "Skipping Ollama container check (using remote vLLM)"
237297
fi
238298

239299
if podman ps | grep -q "$AGENT_CONTAINER"; then
@@ -419,42 +479,59 @@ show_status() {
419479
echo "================================================================="
420480
echo ""
421481
echo "🌐 Web Interface: http://localhost:$AGENT_PORT"
422-
echo "🤖 Ollama API: http://localhost:$OLLAMA_PORT"
423482

424-
# Show GPU status
425-
case "$gpu_type" in
426-
"nvidia")
427-
echo "🎮 GPU Mode: NVIDIA GPU acceleration enabled"
428-
;;
429-
"amd")
430-
echo "🎮 GPU Mode: AMD GPU acceleration enabled"
431-
;;
432-
"none")
433-
echo "🎮 GPU Mode: CPU-only mode"
434-
;;
435-
esac
483+
if [ "$USE_REMOTE_VLLM" = false ]; then
484+
echo "🤖 Ollama API: http://localhost:$OLLAMA_PORT"
485+
486+
# Show GPU status
487+
case "$gpu_type" in
488+
"nvidia")
489+
echo "🎮 GPU Mode: NVIDIA GPU acceleration enabled"
490+
;;
491+
"amd")
492+
echo "🎮 GPU Mode: AMD GPU acceleration enabled"
493+
;;
494+
"none")
495+
echo "🎮 GPU Mode: CPU-only mode"
496+
;;
497+
esac
498+
else
499+
echo "🤖 Remote vLLM: $HOSTED_VLLM_API_BASE"
500+
echo "🎮 GPU Mode: Remote vLLM endpoint"
501+
fi
436502

437503
echo ""
438504
echo "📊 Container Status:"
439505
podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
440506
echo ""
441-
echo "💾 Volume Status:"
442-
podman volume ls | grep "$OLLAMA_VOLUME" || echo " No volumes found"
443-
echo ""
444-
echo "🎯 Quick Commands:"
445-
echo " • View logs: podman logs -f $AGENT_CONTAINER"
446-
echo " • Check Ollama models: podman exec $OLLAMA_CONTAINER ollama list"
447-
echo " • Stop containers: $0 --stop"
448-
echo " • Start containers: podman start $OLLAMA_CONTAINER $AGENT_CONTAINER"
449-
echo " • Clean up all: $0 --clean-all"
450-
echo " • Remove volumes: $0 --remove-volumes"
451-
echo " • Remove images: $0 --remove-images"
507+
if [ "$USE_REMOTE_VLLM" = false ]; then
508+
echo "💾 Volume Status:"
509+
podman volume ls | grep "$OLLAMA_VOLUME" || echo " No volumes found"
510+
echo ""
511+
echo "🎯 Quick Commands:"
512+
echo " • View logs: podman logs -f $AGENT_CONTAINER"
513+
echo " • Check Ollama models: podman exec $OLLAMA_CONTAINER ollama list"
514+
echo " • Stop containers: $0 --stop"
515+
echo " • Start containers: podman start $OLLAMA_CONTAINER $AGENT_CONTAINER"
516+
echo " • Clean up all: $0 --clean-all"
517+
echo " • Remove volumes: $0 --remove-volumes"
518+
echo " • Remove images: $0 --remove-images"
519+
else
520+
echo "🎯 Quick Commands:"
521+
echo " • View logs: podman logs -f $AGENT_CONTAINER"
522+
echo " • Stop container: $0 --stop"
523+
echo " • Start container: podman start $AGENT_CONTAINER"
524+
echo " • Clean up all: $0 --clean-all"
525+
echo " • Remove images: $0 --remove-images"
526+
fi
452527

453-
# GPU-specific commands
454-
if [ "$gpu_type" = "nvidia" ]; then
455-
echo " • Check GPU usage: podman exec $OLLAMA_CONTAINER nvidia-smi"
456-
elif [ "$gpu_type" = "amd" ]; then
457-
echo " • Check GPU usage: podman exec $OLLAMA_CONTAINER rocm-smi"
528+
# GPU-specific commands (only for local Ollama)
529+
if [ "$USE_REMOTE_VLLM" = false ]; then
530+
if [ "$gpu_type" = "nvidia" ]; then
531+
echo " • Check GPU usage: podman exec $OLLAMA_CONTAINER nvidia-smi"
532+
elif [ "$gpu_type" = "amd" ]; then
533+
echo " • Check GPU usage: podman exec $OLLAMA_CONTAINER rocm-smi"
534+
fi
458535
fi
459536

460537
echo ""
@@ -477,6 +554,7 @@ show_help() {
477554
echo " --no-model Skip pulling the Ollama model"
478555
echo " --gpu TYPE GPU type to use: auto, nvidia, amd, none (default: $USE_GPU)"
479556
echo " --cpu-only Force CPU-only mode, disable GPU detection"
557+
echo " --remote-vllm Use remote vLLM endpoint instead of local Ollama"
480558
echo ""
481559
echo "Cleanup Options:"
482560
echo " --clean-all Remove containers, volumes, images, and pods"
@@ -495,6 +573,13 @@ show_help() {
495573
echo " $0 -p 3000 # Use port 3000 instead of 8000"
496574
echo " $0 --gpu nvidia # Force NVIDIA GPU usage"
497575
echo " $0 --cpu-only # Force CPU-only mode"
576+
echo " $0 --remote-vllm # Use remote vLLM (requires env vars)"
577+
echo ""
578+
echo "Remote vLLM Usage:"
579+
echo " Set environment variables before using --remote-vllm:"
580+
echo " export HOSTED_VLLM_API_BASE=\"https://your-vllm-endpoint.com/v1\""
581+
echo " export HOSTED_VLLM_API_KEY=\"your-api-key-here\""
582+
echo " $0 --remote-vllm # Start without local Ollama"
498583
}
499584

500585
# Main function
@@ -562,6 +647,10 @@ main() {
562647
USE_GPU="none"
563648
shift
564649
;;
650+
--remote-vllm)
651+
USE_REMOTE_VLLM=true
652+
shift
653+
;;
565654
*)
566655
print_error "Unknown option: $1"
567656
show_help
@@ -587,19 +676,27 @@ main() {
587676
# Check prerequisites
588677
check_podman
589678

590-
# Determine GPU type
591-
if [ "$USE_GPU" = "auto" ]; then
592-
gpu_type=$(detect_gpu)
593-
else
594-
gpu_type="$USE_GPU"
595-
fi
679+
# Validate remote vLLM environment variables if needed
680+
validate_vllm_env
596681

597-
# Validate GPU runtime if needed
598-
if [ "$gpu_type" != "none" ]; then
599-
if ! check_gpu_runtime "$gpu_type"; then
600-
print_warning "GPU runtime check failed, falling back to CPU-only mode"
601-
gpu_type="none"
682+
# Determine GPU type (only needed for local Ollama)
683+
if [ "$USE_REMOTE_VLLM" = false ]; then
684+
if [ "$USE_GPU" = "auto" ]; then
685+
gpu_type=$(detect_gpu)
686+
else
687+
gpu_type="$USE_GPU"
602688
fi
689+
690+
# Validate GPU runtime if needed
691+
if [ "$gpu_type" != "none" ]; then
692+
if ! check_gpu_runtime "$gpu_type"; then
693+
print_warning "GPU runtime check failed, falling back to CPU-only mode"
694+
gpu_type="none"
695+
fi
696+
fi
697+
else
698+
gpu_type="none" # GPU not relevant for remote vLLM
699+
print_status "Using remote vLLM - skipping GPU detection"
603700
fi
604701

605702
# Cleanup if requested
@@ -608,11 +705,15 @@ main() {
608705
fi
609706

610707
# Start deployment
611-
create_volume
612-
start_ollama "$gpu_type"
613-
614-
if [ "$skip_model" = false ]; then
615-
pull_model
708+
if [ "$USE_REMOTE_VLLM" = false ]; then
709+
create_volume
710+
start_ollama "$gpu_type"
711+
712+
if [ "$skip_model" = false ]; then
713+
pull_model
714+
fi
715+
else
716+
print_status "Skipping Ollama setup (using remote vLLM)"
616717
fi
617718

618719
start_agent

sub_agents/e2e_test_analyst/agent.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
import threading
1010
import concurrent.futures
1111
import re
12+
import os
1213
from typing import Dict, Any, Optional, List
1314

1415
GCS_URL = "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs"
1516

16-
MODEL = LiteLlm(model="ollama_chat/qwen3:4b")
17+
MODEL = os.environ.get("MODEL", "qwen3:4b")
1718

1819
# Prow tool functions for e2e test analysis
1920
async def get_job_metadata_async(job_name: str, build_id: str) -> Dict[str, Any]:
@@ -327,7 +328,7 @@ def get_junit_results_tool(job_name: str, build_id: str, test_name: str):
327328
return run_async_in_thread(get_junit_results_async(job_name, build_id, test_name))
328329

329330
e2e_test_analyst_agent = Agent(
330-
model=MODEL,
331+
model=LiteLlm(model=MODEL),
331332
name="e2e_test_analyst_agent",
332333
instruction=prompt.E2E_TEST_SPECIALIST_PROMPT,
333334
output_key="e2e_test_analysis_output",

sub_agents/installation_analyst/agent.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
import threading
1010
import concurrent.futures
1111
import re
12+
import os
1213
from typing import Dict, Any, Optional
1314

1415
GCS_URL = "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs"
1516

16-
MODEL = LiteLlm(model="ollama_chat/qwen3:4b")
17+
MODEL = os.environ.get("MODEL", "qwen3:4b")
1718

1819
def extract_installation_info(log_content: str) -> Dict[str, Any]:
1920
"""Extract installation information from build-log.txt."""
@@ -316,7 +317,7 @@ def get_install_logs_tool(job_name: str, build_id: str, test_name: str):
316317
return run_async_in_thread(get_install_logs_async(job_name, build_id, test_name))
317318

318319
installation_analyst_agent = Agent(
319-
model=MODEL,
320+
model=LiteLlm(model=MODEL),
320321
name="installation_analyst_agent",
321322
instruction=prompt.INSTALLATION_SPECIALIST_PROMPT,
322323
output_key="installation_analysis_output",

sub_agents/mustgather_analyst/agent.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from google.adk import Agent
22
from . import prompt
33
from google.adk.models.lite_llm import LiteLlm
4-
4+
import os
55
from .must_gather import get_must_gather, list_directory, read_drained_file, get_file_info, search_files
6-
MODEL = "ollama/qwen3:4b"
6+
7+
MODEL = os.environ.get("MODEL", "qwen3:4b")
78

89
mustgather_analyst_agent = Agent(
910
model=LiteLlm(model=MODEL),

0 commit comments

Comments
 (0)