Skip to content

Commit 7032121

Browse files
authored
Merge pull request #14 from aws-samples/feat/solutions_library_guidance
Feat/solutions library guidance related updates merging into main branch
2 parents 1731ab2 + 42046f2 commit 7032121

33 files changed

+1749
-550
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ target/
8686
profile_default/
8787
ipython_config.py
8888

89+
# Kiro IDE
90+
.kiro
8991
# pyenv
9092
# For a library or package, you might want to ignore these files since the code is
9193
# intended to run in multiple environments; otherwise, check them in:

README.md

Lines changed: 173 additions & 225 deletions
Large diffs are not rendered by default.

apps/agentic-troubleshooting/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ RUN pip install --no-cache-dir -r requirements.txt
2121
# Copy application code
2222
COPY src/ ./src/
2323
COPY main.py .
24+
COPY memory_agent_main.py .
2425

2526
# Create non-root user for security but keep uvx accessible
2627
RUN useradd -m -u 1000 agent && \

apps/agentic-troubleshooting/helm/k8s-troubleshooting-agent/templates/deployment.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ spec:
6767
value: {{ .Values.config.vectorBucket | quote }}
6868
- name: INDEX_NAME
6969
value: {{ .Values.config.indexName | quote }}
70+
# Memory Agent A2A URL
71+
- name: MEMORY_AGENT_SERVER_URL
72+
value: "http://localhost:9000"
7073
- name: SLACK_BOT_TOKEN
7174
valueFrom:
7275
secretKeyRef:
@@ -104,6 +107,46 @@ spec:
104107
- "import sys; sys.exit(0)"
105108
initialDelaySeconds: 5
106109
periodSeconds: 10
110+
- name: memory-agent
111+
securityContext:
112+
{{- toYaml .Values.securityContext | nindent 12 }}
113+
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
114+
imagePullPolicy: {{ .Values.image.pullPolicy }}
115+
command: ["python", "memory_agent_main.py"]
116+
env:
117+
- name: AWS_REGION
118+
value: {{ .Values.config.awsRegion | quote }}
119+
- name: BEDROCK_MODEL_ID
120+
value: {{ .Values.config.bedrockModelId | quote }}
121+
- name: LOG_LEVEL
122+
value: {{ .Values.config.logLevel | quote }}
123+
# Vector Storage Configuration
124+
- name: VECTOR_BUCKET
125+
value: {{ .Values.config.vectorBucket | quote }}
126+
- name: INDEX_NAME
127+
value: {{ .Values.config.indexName | quote }}
128+
ports:
129+
- name: a2a
130+
containerPort: 9000
131+
protocol: TCP
132+
resources:
133+
limits:
134+
cpu: 250m
135+
memory: 256Mi
136+
requests:
137+
cpu: 50m
138+
memory: 128Mi
139+
livenessProbe:
140+
tcpSocket:
141+
port: 9000
142+
initialDelaySeconds: 30
143+
periodSeconds: 30
144+
failureThreshold: 3
145+
readinessProbe:
146+
tcpSocket:
147+
port: 9000
148+
initialDelaySeconds: 10
149+
periodSeconds: 10
107150
volumes:
108151
- name: kubeconfig-volume
109152
emptyDir: {}

apps/agentic-troubleshooting/helm/k8s-troubleshooting-agent/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ config:
2424
vectorBucket: "test-vector-s3-bucket-321"
2525
indexName: "k8s-troubleshooting"
2626

27+
# Memory Agent Configuration
28+
memoryAgentServerUrl: "http://localhost:9000"
29+
2730
# EKS MCP settings
2831
eksMcp:
2932
enabled: true
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Memory Agent entry point."""
2+
3+
import logging
4+
import sys
5+
from src.agents.memory_agent_server import main as memory_main
6+
from src.config.settings import Config
7+
8+
# Simple logging setup
9+
logging.basicConfig(
10+
level=getattr(logging, Config.LOG_LEVEL),
11+
format='%(asctime)s - %(levelname)s - %(message)s',
12+
handlers=[logging.StreamHandler(sys.stdout)]
13+
)
14+
15+
logger = logging.getLogger(__name__)
16+
17+
if __name__ == "__main__":
18+
try:
19+
logger.info("Starting Memory Agent A2A Server...")
20+
memory_main()
21+
except Exception as e:
22+
logger.error(f"Memory Agent startup error: {e}")
23+
sys.exit(1)

apps/agentic-troubleshooting/requirements.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
# Core framework
1+
# Core frameworks
22
strands-agents>=1.1.0
3+
strands-agents[a2a]
4+
strands-agents[otel]
35
strands-agents-tools>=0.2.6
46

57
# MCP support
@@ -14,4 +16,8 @@ boto3>=1.34.0
1416
kubernetes>=28.1.0
1517

1618
# Utilities
17-
python-dotenv>=1.0.0
19+
python-dotenv>=1.0.0
20+
21+
# Dashboard
22+
streamlit>=1.28.0
23+
pandas>=2.0.0
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env python3
2+
"""Run the S3 Vector Dashboard."""
3+
4+
import os
5+
import sys
6+
import subprocess
7+
from pathlib import Path
8+
9+
def main():
10+
"""Run the Streamlit dashboard."""
11+
# Set environment variables if not already set
12+
if not os.getenv('VECTOR_BUCKET'):
13+
print("Warning: VECTOR_BUCKET not set")
14+
15+
if not os.getenv('AWS_REGION'):
16+
os.environ['AWS_REGION'] = 'us-east-1'
17+
18+
# Get dashboard path
19+
dashboard_path = Path(__file__).parent / "src" / "dashboard" / "app.py"
20+
21+
# Run streamlit
22+
cmd = [
23+
sys.executable, "-m", "streamlit", "run",
24+
str(dashboard_path),
25+
"--server.port", "8501",
26+
"--server.address", "0.0.0.0"
27+
]
28+
29+
print(f"Starting dashboard at http://localhost:8501")
30+
print(f"Command: {' '.join(cmd)}")
31+
32+
subprocess.run(cmd)
33+
34+
if __name__ == "__main__":
35+
main()

apps/agentic-troubleshooting/src/agents/agent_orchestrator.py

Lines changed: 12 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,33 @@
11
from strands import Agent, tool
2-
from src.agents.memory_agent import MemoryAgent
32
from src.agents.k8s_specialist import K8sSpecialist
43
from src.config.settings import Config
5-
from src.prompts import ORCHESTRATOR_SYSTEM_PROMPT, CLASSIFICATION_PROMPT, K8S_KEYWORDS
4+
from src.config.telemetry import setup_langfuse_telemetry
5+
from src.prompts import ORCHESTRATOR_SYSTEM_PROMPT
66
import logging
7-
import boto3
8-
import json
97

108
logger = logging.getLogger(__name__)
119

10+
# Initialize telemetry if enabled
11+
setup_langfuse_telemetry()
12+
13+
class AgentSilentException(Exception):
14+
"""Exception that should not generate error responses."""
15+
pass
16+
1217
class OrchestratorAgent:
1318
"""Direct K8s troubleshooting orchestrator."""
1419

1520
def __init__(self):
16-
self.memory_agent = MemoryAgent()
1721
self.k8s_specialist = K8sSpecialist()
18-
19-
# Initialize Bedrock client for Nova Micro classification
20-
try:
21-
self.bedrock_client = boto3.client('bedrock-runtime', region_name=Config.AWS_REGION)
22-
except Exception as e:
23-
logger.warning(f"Failed to initialize Bedrock client, falling back to keywords: {e}")
24-
self.bedrock_client = None
25-
22+
self.last_user_message = None
23+
2624
self.agent = Agent(
2725
name="K8s Orchestrator",
2826
system_prompt=ORCHESTRATOR_SYSTEM_PROMPT,
2927
model=Config.BEDROCK_MODEL_ID,
30-
tools=[self.memory_operations, self.troubleshoot_k8s]
28+
tools=[self.troubleshoot_k8s]
3129
)
3230

33-
def should_respond(self, message: str, is_mention: bool = False, is_thread: bool = False) -> bool:
34-
"""Check if should respond to message using SLM or keyword fallback."""
35-
if is_mention:
36-
return True
37-
38-
# If this is a thread reply, assume it's relevant (saves inference costs)
39-
if is_thread:
40-
return True
41-
42-
# Try Nova Micro classification first
43-
if self.bedrock_client:
44-
return self._classify_with_nova(message)
45-
46-
# Fallback to keyword matching
47-
return any(keyword in message.lower() for keyword in K8S_KEYWORDS)
48-
49-
def _classify_with_nova(self, message: str) -> bool:
50-
"""Use Amazon Nova Micro to classify if message is K8s/troubleshooting related."""
51-
try:
52-
prompt = CLASSIFICATION_PROMPT.format(message=message)
53-
54-
body = {
55-
"messages": [
56-
{
57-
"role": "user",
58-
"content": [{"text": prompt}]
59-
}
60-
],
61-
"inferenceConfig": {
62-
"maxTokens": 10,
63-
"temperature": 0.1
64-
}
65-
}
66-
67-
response = self.bedrock_client.invoke_model(
68-
modelId="amazon.nova-micro-v1:0",
69-
body=json.dumps(body)
70-
)
71-
72-
result = json.loads(response['body'].read())
73-
logger.info(f"Message classification should respond:{result}")
74-
75-
answer = result['output']['message']['content'][0]['text'].strip().upper()
76-
77-
return answer == "YES"
78-
79-
except Exception as e:
80-
logger.error(f"Nova classification failed: {e}")
81-
# Fallback to keyword matching
82-
return any(keyword in message.lower() for keyword in K8S_KEYWORDS)
83-
84-
def respond(self, message: str, thread_id: str, context: str = None) -> str:
85-
"""Main entry point for responses."""
86-
try:
87-
# Get the agent response
88-
agent_response = self.agent(message)
89-
90-
# Handle different response types from Strands agent
91-
if hasattr(agent_response, 'content'):
92-
response = str(agent_response.content).strip()
93-
elif hasattr(agent_response, 'text'):
94-
response = str(agent_response.text).strip()
95-
elif isinstance(agent_response, (list, tuple)):
96-
# If it's a list/tuple, join all parts
97-
response = ' '.join(str(part) for part in agent_response).strip()
98-
else:
99-
response = str(agent_response).strip()
100-
101-
logger.info(f"Full agent response: {response[:200]}..." if len(response) > 200 else f"Full agent response: {response}")
102-
103-
return response if response else "I'm here to help with Kubernetes troubleshooting. How can I assist you?"
104-
except Exception as e:
105-
logger.error(f"Orchestrator error: {e}")
106-
return "Error processing request. Please try again."
107-
108-
109-
110-
@tool
111-
def memory_operations(self, request: str) -> str:
112-
"""Handle memory operations - store or retrieve K8s troubleshooting information."""
113-
try:
114-
result = self.memory_agent.agent(request)
115-
return str(result)
116-
except Exception as e:
117-
logger.error(f"Memory operation failed: {e}")
118-
return f"Memory error: {e}"
119-
12031
@tool
12132
def troubleshoot_k8s(self, query: str) -> str:
12233
"""Perform K8s troubleshooting."""

0 commit comments

Comments
 (0)