Skip to content

Commit ea299e1

Browse files
committed
WIP: MULTIARCH-5644: Agent with capabilities to interact with loki
1 parent ca28a49 commit ea299e1

File tree

4 files changed

+152
-6
lines changed

4 files changed

+152
-6
lines changed

quick-start-containers.sh

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ NC='\033[0m' # No Color
1515
# Configuration
1616
OLLAMA_CONTAINER="ollama"
1717
AGENT_CONTAINER="ci-analysis-agent"
18+
LOKI_MCP_CONTAINER="loki-mcp"
1819
OLLAMA_VOLUME="ollama-data"
1920
OLLAMA_MODEL="qwen3:4b"
21+
MCP_SERVER_PORT="8888"
2022
AGENT_PORT="8000"
2123
OLLAMA_PORT="11434"
2224
USE_GPU="auto" # auto, nvidia, amd, none
@@ -175,6 +177,11 @@ cleanup_existing() {
175177
podman stop "$AGENT_CONTAINER" 2>/dev/null || true
176178
podman rm "$AGENT_CONTAINER" 2>/dev/null || true
177179
fi
180+
181+
if podman container exists "$LOKI_MCP_CONTAINER" 2>/dev/null; then
182+
podman stop "$LOKI_MCP_CONTAINER" 2>/dev/null || true
183+
podman rm "$LOKI_MCP_CONTAINER" 2>/dev/null || true
184+
fi
178185

179186
print_success "Cleanup completed"
180187
}
@@ -280,6 +287,34 @@ start_agent() {
280287
print_success "CI Analysis Agent container started"
281288
}
282289

290+
build_loki_mcp() {
291+
print_status "Building Loki MCP container..."
292+
git clone https://github.com/grafana/loki-mcp.git
293+
podman build -t loki-mcp:latest loki-mcp
294+
print_success "Loki MCP container built"
295+
}
296+
297+
start_loki_mcp() {
298+
print_status "Starting Loki MCP container..."
299+
podman run -d --name "$LOKI_MCP_CONTAINER" -e MCP_TRANSPORT=http=stream -p "$MCP_SERVER_PORT:8080" loki-mcp:latest
300+
print_success "Loki MCP container started"
301+
# Wait for MCP server to be ready
302+
print_status "Waiting for MCP server to be ready..."
303+
sleep 5
304+
305+
# Check if MCP server is responding
306+
for i in {1..15}; do
307+
if curl -s -f "http://localhost:$MCP_SERVER_PORT/" >/dev/null 2>&1; then
308+
print_success "Loki MCP server is ready"
309+
break
310+
fi
311+
if [ $i -eq 15 ]; then
312+
print_warning "Loki MCP server may not be fully ready yet"
313+
fi
314+
sleep 2
315+
done
316+
}
317+
283318
# Function to verify deployment
284319
verify_deployment() {
285320
print_status "Verifying deployment..."
@@ -321,6 +356,19 @@ stop_containers() {
321356
print_status "Stopping CI Analysis Agent containers..."
322357

323358
# Stop containers
359+
360+
if podman container exists "$LOKI_MCP_CONTAINER" 2>/dev/null; then
361+
if podman ps | grep -q "$LOKI_MCP_CONTAINER"; then
362+
print_status "Stopping Loki MCP container..."
363+
podman stop "$LOKI_MCP_CONTAINER"
364+
print_success "Loki MCP container stopped"
365+
else
366+
print_warning "Loki MCP container is not running"
367+
fi
368+
else
369+
print_warning "CI Analysis Agent container does not exist"
370+
fi
371+
324372
if podman container exists "$AGENT_CONTAINER" 2>/dev/null; then
325373
if podman ps | grep -q "$AGENT_CONTAINER"; then
326374
print_status "Stopping CI Analysis Agent container..."
@@ -351,10 +399,10 @@ stop_containers() {
351399
echo "================================================================="
352400
echo ""
353401
echo "📊 Container Status:"
354-
podman ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -E "$OLLAMA_CONTAINER|$AGENT_CONTAINER" || echo " No containers found"
402+
podman ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -E "$OLLAMA_CONTAINER|$AGENT_CONTAINER|$LOKI_MCP_CONTAINER" || echo " No containers found"
355403
echo ""
356404
echo "🎯 Quick Commands:"
357-
echo " • Start containers: podman start $OLLAMA_CONTAINER $AGENT_CONTAINER"
405+
echo " • Start containers: podman start $OLLAMA_CONTAINER $AGENT_CONTAINER $LOKI_MCP_CONTAINER"
358406
echo " • Clean up all: $0 --clean-all"
359407
echo " • Remove volumes: $0 --remove-volumes"
360408
echo " • Remove images: $0 --remove-images"
@@ -373,10 +421,15 @@ clean_all() {
373421

374422
# Stop containers first
375423
print_status "Stopping containers..."
376-
podman stop "$OLLAMA_CONTAINER" "$AGENT_CONTAINER" 2>/dev/null || true
424+
podman stop "$LOKI_MCP_CONTAINER" "$AGENT_CONTAINER" "$OLLAMA_CONTAINER" 2>/dev/null || true
377425

378426
# Remove containers
379427
print_status "Removing containers..."
428+
if podman container exists "$LOKI_MCP_CONTAINER" 2>/dev/null; then
429+
podman rm -f "$LOKI_MCP_CONTAINER" 2>/dev/null || true
430+
print_success "Removed Loki MCP container"
431+
fi
432+
380433
if podman container exists "$AGENT_CONTAINER" 2>/dev/null; then
381434
podman rm -f "$AGENT_CONTAINER" 2>/dev/null || true
382435
print_success "Removed CI Analysis Agent container"
@@ -437,6 +490,12 @@ clean_all() {
437490
print_success "Removed image: ollama/ollama:latest"
438491
fi
439492

493+
# Remove Loki MCP image
494+
if podman image exists "loki-mcp:latest" 2>/dev/null; then
495+
podman rmi -f "loki-mcp:latest" 2>/dev/null || true
496+
print_success "Removed image: loki-mcp:latest"
497+
fi
498+
440499
# Remove any other related images
441500
for image in $(podman images --format "{{.Repository}}:{{.Tag}}" 2>/dev/null | grep -E "ci-analysis|ollama" || true); do
442501
if [ -n "$image" ] && [ "$image" != "ollama/ollama:latest" ] && [ "$image" != "ci-analysis-agent:latest" ]; then
@@ -454,13 +513,13 @@ clean_all() {
454513
echo "📊 Remaining Resources:"
455514
echo ""
456515
echo "Containers:"
457-
podman ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -E "$OLLAMA_CONTAINER|$AGENT_CONTAINER|ci-analysis" || echo " No related containers found"
516+
podman ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -E "$OLLAMA_CONTAINER|$AGENT_CONTAINER|$LOKI_MCP_CONTAINER|ci-analysis" || echo " No related containers found"
458517
echo ""
459518
echo "Volumes:"
460519
podman volume ls --format "table {{.Name}}\t{{.Driver}}" | grep -E "ollama|ci-analysis" || echo " No related volumes found"
461520
echo ""
462521
echo "Images:"
463-
podman images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}" | grep -E "ollama|ci-analysis" || echo " No related images found"
522+
podman images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}" | grep -E "ollama|ci-analysis|loki-mcp" || echo " No related images found"
464523
echo ""
465524
echo "🎯 Next Steps:"
466525
echo " • Fresh deployment: $0"
@@ -715,7 +774,9 @@ main() {
715774
else
716775
print_status "Skipping Ollama setup (using remote vLLM)"
717776
fi
718-
777+
778+
build_loki_mcp
779+
start_loki_mcp
719780
start_agent
720781
verify_deployment
721782
show_status "$gpu_type"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .agent import arch_mismatch_detector_agent
2+
3+
__all__ = ["arch_mismatch_detector_agent"]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""Installation Analyst Agent for analyzing CI installation logs."""
2+
3+
from google.adk import Agent
4+
from google.adk.models.lite_llm import LiteLlm
5+
from . import prompt
6+
from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset, StreamableHTTPConnectionParams
7+
8+
9+
import asyncio
10+
import httpx
11+
import re
12+
import threading
13+
import concurrent.futures
14+
import re
15+
import os
16+
from typing import Dict, Any, Optional
17+
18+
GCS_URL = "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs"
19+
20+
MODEL = os.environ.get("MODEL", "qwen3:4b")
21+
22+
23+
arch_mismatch_detector_agent = Agent(
24+
model=LiteLlm(model=MODEL),
25+
name="arch_mismatch_detector_agent",
26+
instruction=prompt.ARCH_MISMATCH_DETECTOR_PROMPT,
27+
output_key="installation_analysis_output",
28+
tools=[
29+
MCPToolset(
30+
connection_params=StreamableHTTPConnectionParams(
31+
url="http://127.0.0.1:9000/mcp",
32+
tool_filter=[
33+
"loki_query",
34+
"loki_label_names",
35+
"loki_label_values",
36+
],
37+
),
38+
),
39+
40+
],
41+
)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""Prompts for Installation Analyst Agent."""
2+
3+
ARCH_MISMATCH_DETECTOR_PROMPT = """
4+
You are the Arch Mismatch Detector agent.
5+
6+
Objective:
7+
- Retrieve and analyze Grafana Loki logs for a specific CI job invocation to identify architecture mismatch errors, specifically messages matching the case-insensitive pattern "exec format".
8+
9+
Required user inputs in each request:
10+
- start_time: the absolute start of the time range to search.
11+
- end_time: the absolute end of the time range to search.
12+
- job_name: the name of the job to search for.
13+
- build_id: the id of the build to search for.
14+
15+
Time handling rules:
16+
- Accept start_time and end_time as either Unix epoch milliseconds or ISO 8601 / RFC3339 timestamps.
17+
- If the user does not provide both, ask them to provide both before proceeding.
18+
- Convert any non-epoch-millisecond timestamps to epoch milliseconds before calling tools.
19+
- If start_time >= end_time, ask the user to correct the range.
20+
21+
Data source and query:
22+
- Always query Grafana Loki using the loki_query tool.
23+
- Always set orgId to 1.
24+
- Build the invoker as: "openshift-internal-ci/" + job_name + "/" + build_id
25+
- Construct the expression exactly (preserve content; whitespace changes are okay):
26+
{invoker="openshift-internal-ci/{job_name}/{build_id}"} |~ "(?i)exec format"
27+
28+
Tool invocation contract (loki_query):
29+
- Parameters you must provide:
30+
- orgId: 1
31+
- url: https://grafana-loki.ci.openshift.org
32+
- expr: the expression above
33+
- start: start_time in Unix epoch milliseconds
34+
- end: end_time in Unix epoch milliseconds
35+
36+
Response style:
37+
- Keep outputs concise and focused on the error pattern.
38+
- Report total matches, and surface 3–5 representative lines with timestamps.
39+
- Briefly note any repeated message patterns or clusters.
40+
- Provide a convenience link for further inspection in Grafana Explore with orgId=1 and the requested time range, e.g., https://grafana-loki.ci.openshift.org/explore?orgId=1
41+
"""

0 commit comments

Comments
 (0)