aws-samples
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 173 additions & 225 deletions b/‎README.md‎
Lines changed: 173 additions & 225 deletions
diff --git a/‎apps/agentic-troubleshooting/Dockerfile‎
Lines changed: 1 addition & 0 deletions b/‎apps/agentic-troubleshooting/Dockerfile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apps/agentic-troubleshooting/helm/k8s-troubleshooting-agent/templates/deployment.yaml‎
Lines changed: 43 additions & 0 deletions b/‎apps/agentic-troubleshooting/helm/k8s-troubleshooting-agent/templates/deployment.yaml‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎apps/agentic-troubleshooting/helm/k8s-troubleshooting-agent/values.yaml‎
Lines changed: 3 additions & 0 deletions b/‎apps/agentic-troubleshooting/helm/k8s-troubleshooting-agent/values.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/agentic-troubleshooting/memory_agent_main.py‎
Lines changed: 23 additions & 0 deletions b/‎apps/agentic-troubleshooting/memory_agent_main.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎apps/agentic-troubleshooting/requirements.txt‎
Lines changed: 8 additions & 2 deletions b/‎apps/agentic-troubleshooting/requirements.txt‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎apps/agentic-troubleshooting/run_dashboard.py‎
Lines changed: 35 additions & 0 deletions b/‎apps/agentic-troubleshooting/run_dashboard.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎apps/agentic-troubleshooting/src/agents/agent_orchestrator.py‎
Lines changed: 12 additions & 101 deletions b/‎apps/agentic-troubleshooting/src/agents/agent_orchestrator.py‎
Lines changed: 12 additions & 101 deletions
@@ -86,6 +86,8 @@ target/
 profile_default/
 ipython_config.py
 
+# Kiro IDE
+.kiro
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 
@@ -21,6 +21,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY src/ ./src/
 COPY main.py .
+COPY memory_agent_main.py .
 
 # Create non-root user for security but keep uvx accessible
 RUN useradd -m -u 1000 agent && \
 
@@ -67,6 +67,9 @@ spec:
               value: {{ .Values.config.vectorBucket | quote }}
             - name: INDEX_NAME
               value: {{ .Values.config.indexName | quote }}
+            # Memory Agent A2A URL
+            - name: MEMORY_AGENT_SERVER_URL
+              value: "http://localhost:9000"
             - name: SLACK_BOT_TOKEN
               valueFrom:
                 secretKeyRef:
@@ -104,6 +107,46 @@ spec:
                 - "import sys; sys.exit(0)"
             initialDelaySeconds: 5
             periodSeconds: 10
+        - name: memory-agent
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          command: ["python", "memory_agent_main.py"]
+          env:
+            - name: AWS_REGION
+              value: {{ .Values.config.awsRegion | quote }}
+            - name: BEDROCK_MODEL_ID
+              value: {{ .Values.config.bedrockModelId | quote }}
+            - name: LOG_LEVEL
+              value: {{ .Values.config.logLevel | quote }}
+            # Vector Storage Configuration
+            - name: VECTOR_BUCKET
+              value: {{ .Values.config.vectorBucket | quote }}
+            - name: INDEX_NAME
+              value: {{ .Values.config.indexName | quote }}
+          ports:
+            - name: a2a
+              containerPort: 9000
+              protocol: TCP
+          resources:
+            limits:
+              cpu: 250m
+              memory: 256Mi
+            requests:
+              cpu: 50m
+              memory: 128Mi
+          livenessProbe:
+            tcpSocket:
+              port: 9000
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            failureThreshold: 3
+          readinessProbe:
+            tcpSocket:
+              port: 9000
+            initialDelaySeconds: 10
+            periodSeconds: 10
       volumes:
         - name: kubeconfig-volume
           emptyDir: {}
 
@@ -24,6 +24,9 @@ config:
   vectorBucket: "test-vector-s3-bucket-321"
   indexName: "k8s-troubleshooting"
 
+  # Memory Agent Configuration
+  memoryAgentServerUrl: "http://localhost:9000"
+  
   # EKS MCP settings
   eksMcp:
     enabled: true
 
@@ -0,0 +1,23 @@
+"""Memory Agent entry point."""
+
+import logging
+import sys
+from src.agents.memory_agent_server import main as memory_main
+from src.config.settings import Config
+
+# Simple logging setup
+logging.basicConfig(
+    level=getattr(logging, Config.LOG_LEVEL),
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+
+logger = logging.getLogger(__name__)
+
+if __name__ == "__main__":
+    try:
+        logger.info("Starting Memory Agent A2A Server...")
+        memory_main()
+    except Exception as e:
+        logger.error(f"Memory Agent startup error: {e}")
+        sys.exit(1)
@@ -1,5 +1,7 @@
-# Core framework
+# Core frameworks
 strands-agents>=1.1.0
+strands-agents[a2a]
+strands-agents[otel]
 strands-agents-tools>=0.2.6
 
 # MCP support
@@ -14,4 +16,8 @@ boto3>=1.34.0
 kubernetes>=28.1.0
 
 # Utilities
-python-dotenv>=1.0.0
+python-dotenv>=1.0.0
+
+# Dashboard
+streamlit>=1.28.0
+pandas>=2.0.0
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+"""Run the S3 Vector Dashboard."""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+
+def main():
+    """Run the Streamlit dashboard."""
+    # Set environment variables if not already set
+    if not os.getenv('VECTOR_BUCKET'):
+        print("Warning: VECTOR_BUCKET not set")
+    
+    if not os.getenv('AWS_REGION'):
+        os.environ['AWS_REGION'] = 'us-east-1'
+    
+    # Get dashboard path
+    dashboard_path = Path(__file__).parent / "src" / "dashboard" / "app.py"
+    
+    # Run streamlit
+    cmd = [
+        sys.executable, "-m", "streamlit", "run", 
+        str(dashboard_path),
+        "--server.port", "8501",
+        "--server.address", "0.0.0.0"
+    ]
+    
+    print(f"Starting dashboard at http://localhost:8501")
+    print(f"Command: {' '.join(cmd)}")
+    
+    subprocess.run(cmd)
+
+if __name__ == "__main__":
+    main()
@@ -1,122 +1,33 @@
 from strands import Agent, tool
-from src.agents.memory_agent import MemoryAgent
 from src.agents.k8s_specialist import K8sSpecialist
 from src.config.settings import Config
-from src.prompts import ORCHESTRATOR_SYSTEM_PROMPT, CLASSIFICATION_PROMPT, K8S_KEYWORDS
+from src.config.telemetry import setup_langfuse_telemetry
+from src.prompts import ORCHESTRATOR_SYSTEM_PROMPT
 import logging
-import boto3
-import json
 
 logger = logging.getLogger(__name__)
 
+# Initialize telemetry if enabled
+setup_langfuse_telemetry()
+
+class AgentSilentException(Exception):
+    """Exception that should not generate error responses."""
+    pass
+
 class OrchestratorAgent:
     """Direct K8s troubleshooting orchestrator."""
 
     def __init__(self):
-        self.memory_agent = MemoryAgent()
         self.k8s_specialist = K8sSpecialist()
-        
-        # Initialize Bedrock client for Nova Micro classification
-        try:
-            self.bedrock_client = boto3.client('bedrock-runtime', region_name=Config.AWS_REGION)
-        except Exception as e:
-            logger.warning(f"Failed to initialize Bedrock client, falling back to keywords: {e}")
-            self.bedrock_client = None
-        
+        self.last_user_message = None
+            
         self.agent = Agent(
             name="K8s Orchestrator",
             system_prompt=ORCHESTRATOR_SYSTEM_PROMPT,
             model=Config.BEDROCK_MODEL_ID,
-            tools=[self.memory_operations, self.troubleshoot_k8s]
+            tools=[self.troubleshoot_k8s]
         )
 
-    def should_respond(self, message: str, is_mention: bool = False, is_thread: bool = False) -> bool:
-        """Check if should respond to message using SLM or keyword fallback."""
-        if is_mention:
-            return True
-        
-        # If this is a thread reply, assume it's relevant (saves inference costs)
-        if is_thread:
-            return True
-        
-        # Try Nova Micro classification first
-        if self.bedrock_client:
-            return self._classify_with_nova(message)
-        
-        # Fallback to keyword matching
-        return any(keyword in message.lower() for keyword in K8S_KEYWORDS)
-    
-    def _classify_with_nova(self, message: str) -> bool:
-        """Use Amazon Nova Micro to classify if message is K8s/troubleshooting related."""
-        try:
-            prompt = CLASSIFICATION_PROMPT.format(message=message)
-            
-            body = {
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": [{"text": prompt}]
-                    }
-                ],
-                "inferenceConfig": {
-                    "maxTokens": 10,
-                    "temperature": 0.1
-                }
-            }
-            
-            response = self.bedrock_client.invoke_model(
-                modelId="amazon.nova-micro-v1:0",
-                body=json.dumps(body)
-            )
-            
-            result = json.loads(response['body'].read())
-            logger.info(f"Message classification should respond:{result}")
-            
-            answer = result['output']['message']['content'][0]['text'].strip().upper()
-            
-            return answer == "YES"
-            
-        except Exception as e:
-            logger.error(f"Nova classification failed: {e}")
-            # Fallback to keyword matching
-            return any(keyword in message.lower() for keyword in K8S_KEYWORDS)
-
-    def respond(self, message: str, thread_id: str, context: str = None) -> str:
-        """Main entry point for responses."""
-        try:
-            # Get the agent response
-            agent_response = self.agent(message)
-            
-            # Handle different response types from Strands agent
-            if hasattr(agent_response, 'content'):
-                response = str(agent_response.content).strip()
-            elif hasattr(agent_response, 'text'):
-                response = str(agent_response.text).strip()
-            elif isinstance(agent_response, (list, tuple)):
-                # If it's a list/tuple, join all parts
-                response = ' '.join(str(part) for part in agent_response).strip()
-            else:
-                response = str(agent_response).strip()
-            
-            logger.info(f"Full agent response: {response[:200]}..." if len(response) > 200 else f"Full agent response: {response}")
-            
-            return response if response else "I'm here to help with Kubernetes troubleshooting. How can I assist you?"
-        except Exception as e:
-            logger.error(f"Orchestrator error: {e}")
-            return "Error processing request. Please try again."
-
-
-
-    @tool
-    def memory_operations(self, request: str) -> str:
-        """Handle memory operations - store or retrieve K8s troubleshooting information."""
-        try:
-            result = self.memory_agent.agent(request)
-            return str(result)
-        except Exception as e:
-            logger.error(f"Memory operation failed: {e}")
-            return f"Memory error: {e}"
-
     @tool
     def troubleshoot_k8s(self, query: str) -> str:
         """Perform K8s troubleshooting."""