diff --git a/src/oci-gpu-scanner-mcp-server/README.md b/src/oci-gpu-scanner-mcp-server/README.md new file mode 100644 index 0000000..72064ad --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/README.md @@ -0,0 +1,207 @@ +# Lens API MCP Server + +A Model Context Protocol (MCP) server that provides tools for interacting with the lens API for instance and health check management. + +## Overview + +This MCP server exposes lens API functionality through a standardized interface, allowing AI assistants to: + +- List instances in the lens system +- Manage active health checks +- Submit health check reports +- Monitor health check status + +## Features + +### Instance Management +- **List Instances**: Retrieve all instances in the system + +### Health Check Management +- **Get Latest Health Check**: Retrieve the most recent health check for an instance +- **Create Health Check**: Start a new health check for an instance +- **Submit Health Check Report**: Upload JSON and log reports for health checks +- **Get All Health Checks**: List all health checks for an instance + +## Configuration + +### Environment Variables + +| Variable | Description | Default | Required | +|----------|-------------|---------|----------| +| `LENS_API_BASE_URL` | Base URL of the lens API | `http://localhost:8000` | No | +| `LENS_API_KEY` | Authentication token for the lens API | None | No* | +| `LOG_LEVEL` | Logging level (DEBUG, INFO, WARNING, ERROR) | `INFO` | No | + +*Required if the lens API requires authentication + +### Example Configuration + +```bash +export LENS_API_BASE_URL="http://lens-api.example.com" +export LENS_API_KEY="your-api-key-here" +export LOG_LEVEL="DEBUG" +``` + +## Installation + +1. Create a virtual environment: +```bash +python3 -m venv lens-mcp-env +source lens-mcp-env/bin/activate +``` + +2. Install dependencies: +```bash +pip install -r requirements.txt +source local.env +``` + +3. Set environment variables as needed + +## Running the MCP Server + +#### Running with STDIO + +Start the MCP server using STDIO: +```bash +./run_server.sh +``` + +#### Running with SSE (Server-Sent Events) + +Start the MCP server using SSE: +```bash +npx mcp-proxy --port 8001 --shell ./run_server.sh +``` + +#### Direct Python Execution + +Alternatively, run the server directly: +```bash +python server.py +``` + +#### Testing with MCP Inspector + +For development and testing, use the MCP inspector: +```bash +npx @modelcontextprotocol/inspector ./run_server.sh +``` + +## Available Tools + +### list_instances +List all instances in the lens system. + +**Parameters:** None + +**Example:** +```json +{ + "name": "list_instances", + "arguments": {} +} +``` + +### get_latest_health_check +Get the latest active health check for a specific instance. + +**Parameters:** +- `instance_id` (string, required): The ID of the instance + +**Example:** +```json +{ + "name": "get_latest_health_check", + "arguments": { + "instance_id": "ocid1.instance.oc1..." + } +} +``` + +### create_health_check +Create a new active health check for a specific instance. + +**Parameters:** +- `instance_id` (string, required): The ID of the instance +- `type` (string, optional): Type of health check (["single_node", "multi_node", "advanced" default: "single_node") + +**Example:** +```json +{ + "name": "create_health_check", + "arguments": { + "instance_id": "ocid1.instance.oc1...", + "type": "single_node" + } +} +``` + +### submit_health_check_report +Submit JSON and log reports for an active health check. + +**Parameters:** +- `instance_id` (string, required): The ID of the instance +- `log_report` (string, required): Base64 encoded log file +- `json_report` (string, optional): Base64 encoded JSON report + +**Example:** +```json +{ + "name": "submit_health_check_report", + "arguments": { + "instance_id": "ocid1.instance.oc1...", + "log_report": "base64-encoded-log-content", + "json_report": "base64-encoded-json-content" + } +} +``` + +## API Endpoints Mapping + +This MCP server maps to the following lens API endpoints: + +| MCP Tool | HTTP Method | Endpoint | +|----------|-------------|----------| +| `list_instances` | GET | `/instances/` | +| `get_latest_health_check` | GET | `/instances/{instance_id}/active-health-check/` | +| `create_health_check` | POST | `/instances/{instance_id}/active-health-check/` | +| `submit_health_check_report` | POST | `/instances/{instance_id}/active-health-check/report/` | + +## Development + +### Project Structure + +``` +lens/ +├── server.py # Main MCP server +├── config.py # HTTP client configuration +├── log_setup.py # Logging configuration +├── requirements.txt # Python dependencies +├── README.md # This file +├── tools/ +│ ├── tool_definition.py # Tool schema definitions +│ └── tool_handler.py # Tool implementation handlers +└── logs/ # Log files (created at runtime) +``` + +### Logging + +The server provides comprehensive logging with: +- Colored console output for development +- File-based logging with timestamps +- Request/response logging for debugging +- Configurable log levels + +Logs are stored in the `logs/` directory with timestamps. + +### Error Handling + +The server includes robust error handling: +- HTTP request/response error handling +- Proper error messages returned to the client +- Comprehensive logging of errors for debugging + +## License + +This project follows the same license as the parent OCI MCP servers project. diff --git a/src/oci-gpu-scanner-mcp-server/config.py b/src/oci-gpu-scanner-mcp-server/config.py new file mode 100644 index 0000000..afdb57a --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/config.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Lens API Configuration Module + +Handles HTTP client initialization and configuration for lens API. +""" + +import os +import logging +from typing import Optional +import requests +from urllib.parse import urljoin + +# Module-level logger +logger = logging.getLogger("lens-mcp.config") + +# Global HTTP session +http_session: Optional[requests.Session] = None +base_url: Optional[str] = None + +def init_lens_client(): + """Initialize lens HTTP client with configuration.""" + global http_session, base_url + logger.info("🔧 Initializing lens HTTP client...") + + try: + # Get base URL from environment variable or use default + base_url = os.getenv('LENS_API_BASE_URL', 'http://localhost:8000') + + # Create HTTP session with authentication + http_session = requests.Session() + + # Add authentication if API key is provided + api_key = os.getenv('LENS_API_KEY') + if api_key: + http_session.headers.update({ + 'Authorization': f'Token {api_key}', + 'Content-Type': 'application/json' + }) + logger.info("✅ API key authentication configured") + else: + logger.warning("⚠️ No API key found. Set LENS_API_KEY environment variable if required") + http_session.headers.update({ + 'Content-Type': 'application/json' + }) + + # Test connectivity + try: + logger.debug("🧪 Testing lens API connectivity...") + test_url = urljoin(base_url, '/instances/') + response = http_session.get(test_url, timeout=10) + if response.status_code in [200, 401, 403]: # 401/403 means API is reachable but auth might be needed + logger.info(f"✅ Lens API connectivity test passed. Base URL: {base_url}") + else: + logger.warning(f"⚠️ Lens API connectivity test returned status {response.status_code}") + except Exception as test_e: + logger.warning(f"⚠️ Lens API connectivity test failed: {test_e}") + + logger.info("✅ Lens HTTP client initialized successfully") + + except Exception as e: + logger.error(f"❌ Failed to initialize lens HTTP client: {e}") + logger.error("Please ensure LENS_API_BASE_URL environment variable is set") + +def get_http_session() -> Optional[requests.Session]: + """Get the initialized HTTP session.""" + return http_session + +def get_base_url() -> Optional[str]: + """Get the configured base URL.""" + return base_url + +def is_initialized() -> bool: + """Check if lens client is initialized.""" + return http_session is not None and base_url is not None + +def make_request(method: str, endpoint: str, **kwargs) -> requests.Response: + """Make an authenticated HTTP request to the lens API.""" + if not is_initialized(): + raise RuntimeError("Lens client not initialized. Call init_lens_client() first.") + + url = urljoin(base_url, endpoint) + logger.debug(f"🌐 Making {method.upper()} request to: {url}") + + response = http_session.request(method, url, **kwargs) + logger.debug(f"📡 Response status: {response.status_code}") + + return response diff --git a/src/oci-gpu-scanner-mcp-server/env.template b/src/oci-gpu-scanner-mcp-server/env.template new file mode 100644 index 0000000..b0f52d7 --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/env.template @@ -0,0 +1,3 @@ +export LENS_API_BASE_URL="" +export LENS_API_KEY="" +export LOG_LEVEL="DEBUG" \ No newline at end of file diff --git a/src/oci-gpu-scanner-mcp-server/log_setup.py b/src/oci-gpu-scanner-mcp-server/log_setup.py new file mode 100644 index 0000000..b184293 --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/log_setup.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +Logging Setup Module + +Configures logging for the lens MCP server with proper formatters and handlers. +""" + +import logging +import os +import sys +from datetime import datetime +from pathlib import Path + + +def setup_logging(log_level: str = "INFO") -> logging.Logger: + """ + Set up logging configuration for the lens MCP server. + + Args: + log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + + Returns: + Configured logger instance + """ + # Ensure logs directory exists + log_dir = Path(__file__).parent / "logs" + log_dir.mkdir(exist_ok=True) + + # Get log level from environment or use provided default + log_level = os.getenv('LOG_LEVEL', log_level).upper() + + # Create custom formatter + class ColoredFormatter(logging.Formatter): + """Custom formatter with colors for different log levels.""" + + # ANSI color codes + COLORS = { + 'DEBUG': '\033[36m', # Cyan + 'INFO': '\033[92m', # Green + 'WARNING': '\033[93m', # Yellow + 'ERROR': '\033[91m', # Red + 'CRITICAL': '\033[95m', # Magenta + 'RESET': '\033[0m' # Reset + } + + def format(self, record): + # Add color to log level + if record.levelname in self.COLORS: + record.levelname = f"{self.COLORS[record.levelname]}{record.levelname}{self.COLORS['RESET']}" + + # Format the message + return super().format(record) + + # Configure root logger + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, log_level)) + + # Clear any existing handlers + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Console handler with colors + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(getattr(logging, log_level)) + console_format = ColoredFormatter( + '%(asctime)s | %(levelname)s | %(name)s:%(lineno)d | %(message)s' + ) + console_handler.setFormatter(console_format) + root_logger.addHandler(console_handler) + + # File handler without colors + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = log_dir / f"lens-mcp-server_{timestamp}.log" + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(logging.DEBUG) # Always log everything to file + file_format = logging.Formatter( + '%(asctime)s | %(levelname)s | %(name)s:%(lineno)d | %(message)s' + ) + file_handler.setFormatter(file_format) + root_logger.addHandler(file_handler) + + # Create and configure main logger + logger = logging.getLogger("lens-mcp") + logger.info(f"🚀 Lens MCP Server logging initialized") + logger.info(f"📁 Log file: {log_file}") + logger.info(f"📊 Log level: {log_level}") + + return logger + + +def get_logger(name: str) -> logging.Logger: + """ + Get a logger with the specified name. + + Args: + name: Logger name + + Returns: + Logger instance + """ + return logging.getLogger(name) diff --git a/src/oci-gpu-scanner-mcp-server/requirements.txt b/src/oci-gpu-scanner-mcp-server/requirements.txt new file mode 100644 index 0000000..af91075 --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/requirements.txt @@ -0,0 +1,26 @@ +# Lens MCP Server Requirements +# Model Context Protocol framework +mcp>=0.9.0 + +# HTTP client for lens API communication +requests>=2.31.0 + +# Environment variable management +python-dotenv>=1.0.0 + +# Type hints support +typing-extensions>=4.0.0 + +# Enhanced console output (cross-platform colors) +colorama>=0.4.6 + +# JSON handling and data validation +pydantic>=2.0.0 + +# Development dependencies (uncomment if needed) +# pytest>=7.4.0 +# pytest-asyncio>=0.21.0 +# black>=23.0.0 +# flake8>=6.0.0 +# mypy>=1.5.0 +# pre-commit>=3.0.0 diff --git a/src/oci-gpu-scanner-mcp-server/run_server.sh b/src/oci-gpu-scanner-mcp-server/run_server.sh new file mode 100755 index 0000000..9b5351b --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/run_server.sh @@ -0,0 +1,38 @@ +#!/bin/bash +""" +Run script for lens MCP server +""" + +set -e + +echo "🚀 Starting lens MCP server..." + +# Check if virtual environment exists +if [ ! -d "lens-mcp-env" ]; then + echo "❌ Virtual environment not found. Please run setup.sh first." + exit 1 +fi + +# Activate virtual environment +echo "🔧 Activating virtual environment..." +source lens-mcp-env/bin/activate + +# Check required environment variables +if [ -z "$LENS_API_BASE_URL" ]; then + echo "⚠️ LENS_API_BASE_URL not set, using default: http://localhost:8000" + export LENS_API_BASE_URL="http://localhost:8000" +fi + +echo "🌐 Lens API Base URL: $LENS_API_BASE_URL" + +if [ -n "$LENS_API_KEY" ]; then + echo "🔑 API Key configured" +else + echo "⚠️ No API key configured. Set LENS_API_KEY if required." +fi + +# Create logs directory if it doesn't exist +mkdir -p logs + +echo "▶️ Starting server..." +python server.py diff --git a/src/oci-gpu-scanner-mcp-server/server.py b/src/oci-gpu-scanner-mcp-server/server.py new file mode 100644 index 0000000..a0c860a --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/server.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +""" +Lens API MCP Server + +This server provides comprehensive Model Context Protocol (MCP) tools for interacting with +the lens API for instance and health check management. +""" + +import asyncio +import json +import logging +import os +from datetime import datetime +from typing import List, Sequence + +from mcp.server import Server +from mcp.server.models import InitializationOptions +from mcp.server.stdio import stdio_server +from mcp.types import ( + TextContent, + Tool, + ServerCapabilities, + ToolsCapability +) + +# Import our modular components +from log_setup import setup_logging +from config import init_lens_client, is_initialized +from tools.tool_definition import get_tool_definitions +from tools.tool_handler import ( + handle_get_latest_health_check, + handle_create_health_check, + handle_get_instance_log, + handle_get_monitoring_ring_health_status +) + +# Initialize logging +logger = setup_logging() + +# Create server instance +server = Server("lens-mcp") + +@server.list_tools() +async def handle_list_tools() -> List[Tool]: + """List available lens API tools.""" + return get_tool_definitions() + +@server.call_tool() +async def handle_call_tool(name: str, arguments: dict) -> Sequence[TextContent]: + """Handle tool calls for lens API operations.""" + # Enhanced logging for debugging HTTP requests + logger.info("="*60) + logger.info(f"🎯 Claude is calling tool: {name}") + logger.info(f"📝 Arguments received: {json.dumps(arguments, indent=2)}") + logger.info(f"🕐 Request timestamp: {datetime.now().isoformat()}") + + # Log environment variables that might contain HTTP context (set by mcpo) + http_context_vars = [ + 'HTTP_X_API_KEY', 'HTTP_AUTHORIZATION', 'HTTP_USER_AGENT', + 'HTTP_HOST', 'HTTP_CONTENT_TYPE', 'REQUEST_METHOD', 'HTTP_ORIGIN', + 'REMOTE_ADDR', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_REAL_IP' + ] + + http_context = {} + for var in http_context_vars: + value = os.environ.get(var) + if value: + http_context[var] = value + + if http_context: + logger.info(f"🌐 HTTP Context from mcpo: {json.dumps(http_context, indent=2)}") + else: + logger.debug("📭 No HTTP context variables found in environment") + + # Log current working directory and process info + logger.debug(f"📂 Working directory: {os.getcwd()}") + logger.debug(f"🔧 Process ID: {os.getpid()}") + + if not is_initialized(): + logger.warning("⚠️ Lens client not initialized, attempting to initialize...") + init_lens_client() + if not is_initialized(): + error_msg = "Error: Lens client not initialized. Please check your configuration." + logger.error(f"❌ {error_msg}") + return [TextContent(type="text", text=error_msg)] + + try: + logger.info(f"🔄 Processing tool call: {name}") + + if name == "lens_get_latest_health_check_state": + return await handle_get_latest_health_check(arguments) + elif name == "lens_create_health_check": + return await handle_create_health_check(arguments) + elif name == "lens_get_instance_logs": + return await handle_get_instance_log(arguments) + elif name == "lens_get_monitoring_ring_health_status": + return await handle_get_monitoring_ring_health_status(arguments) + else: + return [TextContent(type="text", text=f"Unknown tool: {name}")] + + except Exception as e: + logger.error(f"Error in {name}: {str(e)}") + return [TextContent(type="text", text=f"Error executing {name}: {str(e)}")] + + +async def main(): + """Main entry point for the MCP server.""" + # Initialize lens client on startup + init_lens_client() + + # Run the server + async with stdio_server() as (read_stream, write_stream): + await server.run( + read_stream, + write_stream, + InitializationOptions( + server_name="lens-mcp", + server_version="1.0.0", + capabilities=ServerCapabilities( + tools=ToolsCapability(listChanged=False) + ), + ), + ) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/oci-gpu-scanner-mcp-server/setup.sh b/src/oci-gpu-scanner-mcp-server/setup.sh new file mode 100755 index 0000000..48d8172 --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/setup.sh @@ -0,0 +1,35 @@ +#!/bin/bash +""" +Setup script for lens MCP server +""" + +set -e + +echo "🚀 Setting up lens MCP server..." + +# Create virtual environment if it doesn't exist +if [ ! -d "lens-mcp-env" ]; then + echo "📦 Creating virtual environment..." + python3 -m venv lens-mcp-env +fi + +# Activate virtual environment +echo "🔧 Activating virtual environment..." +source lens-mcp-env/bin/activate + +# Install requirements +echo "📥 Installing requirements..." +pip install -r requirements.txt + +# Create logs directory +echo "📁 Creating logs directory..." +mkdir -p logs + +echo "✅ Setup complete!" +echo "" +echo "To run the server:" +echo "1. Set environment variables:" +echo " export LENS_API_BASE_URL='http://your-lens-api-url'" +echo " export LENS_API_KEY='your-api-key'" +echo "2. Activate the environment: source lens-mcp-env/bin/activate" +echo "3. Run the server: python server.py" diff --git a/src/oci-gpu-scanner-mcp-server/tools/__init__.py b/src/oci-gpu-scanner-mcp-server/tools/__init__.py new file mode 100644 index 0000000..6c3cd87 --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/tools/__init__.py @@ -0,0 +1 @@ +# Tools package for lens MCP server diff --git a/src/oci-gpu-scanner-mcp-server/tools/tool_definition.py b/src/oci-gpu-scanner-mcp-server/tools/tool_definition.py new file mode 100644 index 0000000..1e4bb5a --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/tools/tool_definition.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +""" +Lens API Tools Definitions Module + +Contains all MCP tool definitions and their input schemas for lens API operations. +""" + +from typing import List +from mcp.types import Tool + + +def get_tool_definitions() -> List[Tool]: + """Get all available lens API tool definitions.""" + return [ + Tool( + name="lens_get_latest_health_check_state", + description="Get the state of the latest active health check for a specific instance in the lens system", + inputSchema={ + "type": "object", + "properties": { + "instance_id": { + "type": "string", + "description": "The ID of the instance" + } + }, + "required": ["instance_id"] + } + ), + Tool( + name="lens_create_health_check", + description="Create a new active health check for a specific instance in the lens system", + inputSchema={ + "type": "object", + "properties": { + "instance_id": { + "type": "string", + "description": "The ID of the instance" + }, + "type": { + "type": "string", + "description": "Type of health check to perform", + "enum": ["single_node", "multi_node", "advanced"], + "default": "single_node" + } + }, + "required": ["instance_id"] + } + ), + Tool( + name="lens_get_instance_logs", + description="Retrieve and decode base64 encoded logs from active health checks for a specific instance", + inputSchema={ + "type": "object", + "properties": { + "instance_id": { + "type": "string", + "description": "The ID of the instance to retrieve logs from" + } + }, + "required": ["instance_id"] + } + ), + Tool( + name="lens_get_monitoring_ring_health_status", + description="Get comprehensive health status for all instances in a monitoring ring, including active health checks, passive health checks, and failure recommendations. Identifies failed instances and provides detailed analysis with suggestions for remediation.", + inputSchema={ + "type": "object", + "properties": { + "monitoring_ring_id": { + "type": "string", + "description": "The UUID of the monitoring ring to analyze" + } + }, + "required": ["monitoring_ring_id"] + } + ) + ] diff --git a/src/oci-gpu-scanner-mcp-server/tools/tool_handler.py b/src/oci-gpu-scanner-mcp-server/tools/tool_handler.py new file mode 100644 index 0000000..fd09d7c --- /dev/null +++ b/src/oci-gpu-scanner-mcp-server/tools/tool_handler.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python3 +""" +Lens API Tool Handlers Module + +Contains all individual handler functions for lens API MCP tool operations. +""" + +import base64 +import json +import logging +import sys +import os +from typing import Sequence, Dict, Any, Optional +from mcp.types import TextContent + +# Add parent directory to path for imports +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from config import make_request + +# Module-level logger +logger = logging.getLogger("lens-mcp.tool_handlers") + +# Ensure this logger inherits the correct log level from environment +log_level = os.getenv('LOG_LEVEL', 'INFO').upper() +logger.setLevel(getattr(logging, log_level)) +logger.debug(f"🐛 Tool handler logger initialized with level: {log_level}") + + +async def handle_list_instances(arguments: dict) -> Sequence[TextContent]: + """Handle list_instances tool call.""" + logger.debug("🏢 Executing list_instances operation") + + try: + logger.info("📡 Making HTTP request to list instances...") + response = make_request('GET', '/instances/') + response.raise_for_status() + + instances_data = response.json() + result_text = f"Found {len(instances_data)} instances:\n" + json.dumps(instances_data, indent=2) + + logger.info(f"✅ HTTP request successful - Found {len(instances_data)} instances") + logger.debug(f"📤 Returning response (length: {len(result_text)} chars)") + + return [TextContent(type="text", text=result_text)] + + except Exception as e: + logger.error(f"❌ Error in list_instances: {str(e)}") + return [TextContent(type="text", text=f"Error listing instances: {str(e)}")] + +async def handle_get_latest_health_check(arguments: dict) -> Sequence[TextContent]: + """Handle get_latest_health_check tool call.""" + instance_id = arguments["instance_id"] + + logger.debug(f"🩺 Executing get_latest_health_check for instance {instance_id}") + + try: + endpoint = f'/instances/{instance_id}/active-health-check/' + + logger.info(f"📡 Making HTTP request to get latest health check for instance {instance_id}...") + response = make_request('GET', endpoint) + response.raise_for_status() + + health_check_data = response.json() + + if not health_check_data: + result_text = f"No active health checks found for instance {instance_id}" + else: + # Extract only the state from the health check data + state = health_check_data.get('state', 'unknown') + result_text = f"Latest health check state for instance {instance_id}: {state}" + + logger.info(f"✅ Latest health check state retrieved successfully for instance {instance_id}") + return [TextContent(type="text", text=result_text)] + + except Exception as e: + logger.error(f"❌ Error getting latest health check: {str(e)}") + if hasattr(e, 'response') and e.response is not None: + try: + error_detail = e.response.json() + return [TextContent(type="text", text=f"Error getting latest health check: {error_detail}")] + except: + return [TextContent(type="text", text=f"Error getting latest health check: {e.response.text}")] + return [TextContent(type="text", text=f"Error getting latest health check: {str(e)}")] + + +async def handle_create_health_check(arguments: dict) -> Sequence[TextContent]: + """Handle create_health_check tool call.""" + instance_id = arguments["instance_id"] + health_check_type = arguments.get("type", "single_node") + + logger.debug(f"🆕 Executing create_health_check for instance {instance_id} with type {health_check_type}") + + try: + endpoint = f'/instances/{instance_id}/active-health-check/' + payload = {"type": health_check_type} + + logger.info(f"📡 Making HTTP request to create health check for instance {instance_id}...") + response = make_request('POST', endpoint, json=payload) + response.raise_for_status() + + logger.info(f"✅ Health check created successfully for instance {instance_id}") + return [TextContent( + type="text", + text=f"Health check of type {health_check_type} created successfully for instance {instance_id}" + )] + + except Exception as e: + logger.error(f"❌ Error creating health check: {str(e)}") + if hasattr(e, 'response') and e.response is not None: + try: + error_detail = e.response.json() + return [TextContent(type="text", text=f"Error creating health check: {error_detail}")] + except: + return [TextContent(type="text", text=f"Error creating health check: {e.response.text}")] + return [TextContent(type="text", text=f"Error creating health check: {str(e)}")] + + +async def handle_get_instance_log(arguments: dict) -> Sequence[TextContent]: + """Handle get_instance_log tool call.""" + instance_id = arguments["instance_id"] + + logger.debug(f"📋 Executing get_instance_log for instance {instance_id}") + + try: + endpoint = f'/instances/{instance_id}/active-health-check/' + + logger.info(f"📡 Making HTTP request to get log for instance {instance_id}...") + response = make_request('GET', endpoint) + response.raise_for_status() + + health_check_data = response.json() + + if not health_check_data: + return [TextContent(type="text", text=f"No active health check found for instance {instance_id}")] + + # Process the latest health check and decode any base64 logs + decoded_logs = [] + + decoded_logs.append(f"=== Latest Health Check ===") + decoded_logs.append(f"ID: {health_check_data.get('uuid', 'N/A')}") + decoded_logs.append(f"State: {health_check_data.get('state', 'N/A')}") + decoded_logs.append(f"Type: {health_check_data.get('type', 'N/A')}") + decoded_logs.append(f"Created: {health_check_data.get('created_at', 'N/A')}") + + # Check for base64 encoded logs + log_found = False + if 'log' in health_check_data and health_check_data['log']: + try: + decoded_text = base64.b64decode(health_check_data['log']).decode('utf-8') + + # Split into lines and get the last 100 lines + log_lines = decoded_text.splitlines() + if len(log_lines) > 100: + log_lines = log_lines[-100:] + decoded_logs.append(f"\n--- Decoded log (last 100 lines of {len(decoded_text.splitlines())} total) ---") + else: + decoded_logs.append(f"\n--- Decoded log (all {len(log_lines)} lines) ---") + + decoded_logs.append('\n'.join(log_lines)) + log_found = True + except Exception as decode_error: + decoded_logs.append(f"\n--- Raw {'log'} (failed to decode) ---") + decoded_logs.append(str(health_check_data['log'])) + logger.warning(f"Failed to decode {'log'}: {str(decode_error)}") + log_found = True + + if not log_found: + decoded_logs.append("\n--- No log data found ---") + + result_text = f"Retrieved and decoded latest log for instance {instance_id}:\n\n" + "\n".join(decoded_logs) + + logger.info(f"✅ Instance log retrieved and decoded successfully for instance {instance_id}") + return [TextContent(type="text", text=result_text)] + + except Exception as e: + logger.error(f"❌ Error getting instance logs: {str(e)}") + if hasattr(e, 'response') and e.response is not None: + try: + error_detail = e.response.json() + return [TextContent(type="text", text=f"Error getting instance logs: {error_detail}")] + except: + return [TextContent(type="text", text=f"Error getting instance logs: {e.response.text}")] + return [TextContent(type="text", text=f"Error getting instance logs: {str(e)}")] + + +async def handle_get_monitoring_ring_health_status(arguments: dict) -> Sequence[TextContent]: + """Handle get_monitoring_ring_health_status tool call.""" + monitoring_ring_id = arguments["monitoring_ring_id"] + + logger.debug(f"🔍 Executing get_monitoring_ring_health_status for monitoring ring {monitoring_ring_id}") + + try: + # Step 1: Get monitoring ring details + logger.info(f"📡 Getting monitoring ring details for {monitoring_ring_id}...") + ring_endpoint = f'/monitoring-rings/{monitoring_ring_id}/' + ring_response = make_request('GET', ring_endpoint) + ring_response.raise_for_status() + + ring_data = ring_response.json() + ring_name = ring_data.get('name', 'Unknown') + instances = ring_data.get('instances', []) + + # Log what was retrieved from monitoring ring API + logger.info(f"📊 Retrieved monitoring ring data: name='{ring_name}', instances_count={len(instances)}") + logger.debug(f"🔍 Ring data keys: {list(ring_data.keys())}") + + # Log detailed instan + if instances: + instance_sample = instances[0] if instances else {} + logger.debug(f"🔍 Sample instance keys: {list(instance_sample.keys()) if instance_sample else 'No instances'}") + + if not instances: + return [TextContent(type="text", text=f"No instances found in monitoring ring '{ring_name}' ({monitoring_ring_id})")] + + logger.info(f"✅ Found {len(instances)} instances in monitoring ring '{ring_name}'") + + # Step 2: Collect health status for each instance + health_results = [] + failed_instances = [] + + for instance in instances: + instance_id = instance.get('instance_id') + instance_name = instance.get('display_name', instance_id) + + logger.debug(f"🩺 Checking health for instance {instance_name} ({instance_id})") + + instance_result = { + 'instance_id': instance_id, + 'instance_name': instance_name, + 'region': instance.get('region_name', 'Unknown'), + 'shape': instance.get('shape', 'Unknown'), + 'active_health_check': None, + 'passive_health_check': None, + 'overall_status': 'unknown' + } + + # Get active health check + try: + active_endpoint = f'/instances/{instance_id}/active-health-check/' + active_response = make_request('GET', active_endpoint) + active_response.raise_for_status() + + active_data = active_response.json() + + # Log what was retrieved from active health check API + logger.debug(f"🩺 Active health check data for {instance_name}: {active_data}") + if active_data: + logger.info(f"📋 Active health check retrieved for {instance_name}: state={active_data.get('state')}, type={active_data.get('type')}") + instance_result['active_health_check'] = { + 'state': active_data.get('state', 'unknown'), + 'type': active_data.get('type', 'unknown'), + 'created_at': active_data.get('created_at', 'unknown'), + 'uuid': active_data.get('uuid', 'unknown') + } + else: + logger.info(f"📋 No active health check data found for {instance_name}") + instance_result['active_health_check'] = {'state': 'no_check', 'message': 'No active health check found'} + + except Exception as active_e: + logger.warning(f"⚠️ Failed to get active health check for {instance_name}: {active_e}") + instance_result['active_health_check'] = {'state': 'error', 'message': str(active_e)} + + # Get passive health check from lens API + try: + passive_endpoint = f'/instances/{instance_id}/passive-health-check/' + passive_response = make_request('GET', passive_endpoint) + passive_response.raise_for_status() + + passive_data = passive_response.json() + passive_health = passive_data.get('passive_health_check', {}) + failure_recommendation = passive_data.get('failure_recommendation') + + # Log what was retrieved from passive health check API + logger.debug(f"🔍 Passive health check data for {instance_name}: {passive_data}") + logger.info(f"📊 Passive health check retrieved for {instance_name}: status={passive_health.get('status')}, has_failure_recommendation={bool(failure_recommendation)}") + if failure_recommendation: + logger.debug(f"⚠️ Failure recommendation for {instance_name}: {failure_recommendation}") + + # Structure the passive health check result + passive_result = { + 'status': passive_health.get('status', 'unknown'), + 'failure_recommendation': failure_recommendation + } + + # Add issue description if available from failure recommendation + if failure_recommendation and failure_recommendation.get('issue'): + passive_result['issue'] = failure_recommendation['issue'] + + instance_result['passive_health_check'] = passive_result + + except Exception as passive_e: + logger.warning(f"⚠️ Failed to get passive health check for {instance_name}: {passive_e}") + instance_result['passive_health_check'] = {'status': 'error', 'message': str(passive_e)} + + # Determine overall status + active_state = instance_result['active_health_check'].get('state', 'unknown') if instance_result['active_health_check'] else 'unknown' + passive_status = instance_result['passive_health_check'].get('status', 'unknown') if instance_result['passive_health_check'] else 'unknown' + + # Logic to determine if instance is failed + # Active Health Check States: scheduled, running, failed, completed, disabled + is_failed = False + if active_state == 'failed': + is_failed = True + elif passive_status == 'fail': + is_failed = True + elif active_state == 'disabled': + # Health checks are disabled for this instance + instance_result['overall_status'] = 'disabled' + elif active_state == 'completed' and passive_status == 'pass': + instance_result['overall_status'] = 'healthy' + elif active_state == 'completed' and passive_status in ['unknown', 'error', 'unavailable']: + # Active check completed but no passive data - still consider healthy if active passed + instance_result['overall_status'] = 'healthy' + elif active_state in ['scheduled', 'running']: + instance_result['overall_status'] = 'checking' + else: + instance_result['overall_status'] = 'degraded' + + if is_failed: + instance_result['overall_status'] = 'failed' + failed_instances.append(instance_result) + + health_results.append(instance_result) + + # Step 4: Generate comprehensive report + report_lines = [] + report_lines.append(f"=== Health Status Report for Monitoring Ring ===") + report_lines.append(f"Ring Name: {ring_name}") + report_lines.append(f"Ring ID: {monitoring_ring_id}") + report_lines.append(f"Total Instances: {len(instances)}") + report_lines.append(f"Failed Instances: {len(failed_instances)}") + report_lines.append("") + + # Summary by status + status_counts = {} + for result in health_results: + status = result['overall_status'] + status_counts[status] = status_counts.get(status, 0) + 1 + + report_lines.append("=== Status Summary ===") + for status, count in sorted(status_counts.items()): + emoji = { + 'healthy': '✅', + 'failed': '❌', + 'degraded': '⚠️', + 'checking': '🔄', + 'no_data': '❓', + 'disabled': '🚫', + 'unknown': '❔' + }.get(status, '📊') + report_lines.append(f"{emoji} {status.upper()}: {count} instances") + report_lines.append("") + + # Failed instances detail + if failed_instances: + report_lines.append("=== FAILED INSTANCES DETAILS ===") + for failed in failed_instances: + report_lines.append(f"❌ {failed['instance_name']} ({failed['instance_id']})") + report_lines.append(f" Region: {failed['region']}") + report_lines.append(f" Shape: {failed['shape']}") + + if failed['active_health_check']: + ahc = failed['active_health_check'] + report_lines.append(f" Active Health Check: {ahc.get('state', 'unknown')} ({ahc.get('type', 'unknown')})") + + if failed['passive_health_check']: + phc = failed['passive_health_check'] + status_line = f" Passive Health Check: {phc.get('status', 'unknown')}" + if 'issue' in phc and phc['issue']: + status_line += f" - {phc['issue']}" + report_lines.append(status_line) + + # Add failure recommendation details if available + if phc.get('failure_recommendation'): + fr = phc['failure_recommendation'] + if fr.get('suggestion'): + report_lines.append(f" Suggestion: {fr['suggestion']}") + if fr.get('fault_code'): + report_lines.append(f" Fault Code: {fr['fault_code']}") + + report_lines.append("") + + # All instances detail + report_lines.append("=== ALL INSTANCES STATUS ===") + for result in health_results: + status_emoji = { + 'healthy': '✅', + 'failed': '❌', + 'degraded': '⚠️', + 'checking': '🔄', + 'no_data': '❓', + 'disabled': '🚫', + 'unknown': '❔' + }.get(result['overall_status'], '📊') + + report_lines.append(f"{status_emoji} {result['instance_name']} ({result['instance_id']})") + report_lines.append(f" Overall Status: {result['overall_status'].upper()}") + + if result['active_health_check']: + ahc = result['active_health_check'] + report_lines.append(f" Active: {ahc.get('state', 'unknown')} ({ahc.get('type', 'unknown')})") + + if result['passive_health_check']: + phc = result['passive_health_check'] + passive_line = f" Passive: {phc.get('status', 'unknown')}" + if 'issue' in phc and phc['issue']: + passive_line += f" - {phc['issue']}" + report_lines.append(passive_line) + + report_lines.append("") + + result_text = "\n".join(report_lines) + logger.info(f"✅ Health status report generated for monitoring ring '{ring_name}' - {len(failed_instances)} failed instances found") + + return [TextContent(type="text", text=result_text)] + + except Exception as e: + logger.error(f"❌ Error getting monitoring ring health status: {str(e)}") + if hasattr(e, 'response') and e.response is not None: + try: + error_detail = e.response.json() + return [TextContent(type="text", text=f"Error getting monitoring ring health status: {error_detail}")] + except: + return [TextContent(type="text", text=f"Error getting monitoring ring health status: {e.response.text}")] + return [TextContent(type="text", text=f"Error getting monitoring ring health status: {str(e)}")] + +