diff --git a/development/backend/mcp-configuration-architecture.mdx b/development/backend/mcp-configuration-architecture.mdx index 8033538..d5f038a 100644 --- a/development/backend/mcp-configuration-architecture.mdx +++ b/development/backend/mcp-configuration-architecture.mdx @@ -91,17 +91,20 @@ The catalog defines the configuration structure for each MCP server type: -- Template Level (with lock controls) template_args: text('template_args') -- [{value, locked, description}] template_env: text('template_env') -- Fixed environment variables -template_headers: text('template_headers') -- Fixed HTTP headers (for remotes) +template_headers: text('template_headers') -- Fixed HTTP headers (for HTTP/SSE) +template_url_query_params: text('template_url_query_params') -- Fixed URL query params (for HTTP/SSE) --- Team Schema (with lock/visibility controls) +-- Team Schema (with lock/visibility controls) team_args_schema: text('team_args_schema') -- Schema with lock controls team_env_schema: text('team_env_schema') -- [{name, type, required, default_team_locked, visible_to_users}] -team_headers_schema: text('team_headers_schema') -- HTTP headers schema +team_headers_schema: text('team_headers_schema') -- HTTP headers schema (for HTTP/SSE) +team_url_query_params_schema: text('team_url_query_params_schema') -- URL query params schema (for HTTP/SSE) -- User Schema user_args_schema: text('user_args_schema') -- User-configurable argument schema user_env_schema: text('user_env_schema') -- User-configurable environment schema -user_headers_schema: text('user_headers_schema') -- User HTTP headers schema +user_headers_schema: text('user_headers_schema') -- User HTTP headers schema (for HTTP/SSE) +user_url_query_params_schema: text('user_url_query_params_schema') -- User URL query params schema (for HTTP/SSE) ``` **Transport Configuration:** @@ -139,6 +142,8 @@ Team installations manage shared configurations: installation_name: text('installation_name') -- Team-friendly name team_args: text('team_args') -- Team-level arguments (JSON array) team_env: text('team_env') -- Team environment variables (JSON object) +team_headers: text('team_headers') -- Team HTTP headers (JSON object, for HTTP/SSE) +team_url_query_params: text('team_url_query_params') -- Team URL query params (JSON object, for HTTP/SSE) ``` ### Tier 3: User Configuration (`mcpUserConfigurations`) @@ -151,6 +156,8 @@ user_id: text('user_id') -- User who owns this config user_args: text('user_args') -- User arguments (JSON array) user_env: text('user_env') -- User environment variables (JSON object) +user_headers: text('user_headers') -- User HTTP headers (JSON object, for HTTP/SSE) +user_url_query_params: text('user_url_query_params') -- User URL query params (JSON object, for HTTP/SSE) ``` ## Configuration Flow @@ -270,6 +277,8 @@ Automatically mapped to: - URL → `template_env` or embedded in remotes config (locked) - Authentication headers → `team_headers_schema` (secrets) - Optional headers → `user_headers_schema` (personal preferences) +- API keys in URL query params → `team_url_query_params_schema` (secrets) +- User preferences in query params → `user_url_query_params_schema` (customization) The transformation layer (`officialRegistryTransforms.ts`) handles all automatic mapping without admin intervention. diff --git a/development/satellite/architecture.mdx b/development/satellite/architecture.mdx index 946e22c..9407649 100644 --- a/development/satellite/architecture.mdx +++ b/development/satellite/architecture.mdx @@ -450,7 +450,7 @@ The satellite service has completed **Phase 1: MCP Transport Implementation** an - **Command Processing**: HTTP MCP server management (spawn/kill/restart/health_check) - **Heartbeat Service**: Process status reporting and system metrics - **Configuration Sync**: Real-time MCP server configuration updates -- **Event System**: Real-time event emission with automatic batching (10 event types) +- **Event System**: Real-time event emission with automatic batching (13 event types including tool metadata) **Foundation Infrastructure:** - **HTTP Server**: Fastify with Swagger documentation diff --git a/development/satellite/event-system.mdx b/development/satellite/event-system.mdx index c3d3152..ef94217 100644 --- a/development/satellite/event-system.mdx +++ b/development/satellite/event-system.mdx @@ -65,7 +65,7 @@ Satellite Components EventBus Backend **Naming Convention**: All event data fields use **snake_case** (e.g., `server_id`, `team_id`, `spawn_duration_ms`) to match the backend API convention. -The satellite emits 12 event types across 4 categories: +The satellite emits 13 event types across 4 categories: ### MCP Server Lifecycle @@ -226,21 +226,40 @@ Emitted when SSE connection closes (client disconnect, timeout, or error). ### Tool Discovery #### `mcp.tools.discovered` -Emitted after successful tool discovery from HTTP or stdio MCP server. +Emitted after successful tool discovery from HTTP or stdio MCP server with complete tool metadata and token consumption. **Data Structure:** ```typescript { - server_id: string; - server_slug: string; + installation_id: string; + installation_name: string; team_id: string; - tool_count: number; - tool_names: string[]; - discovery_duration_ms: number; - previous_tool_count: number; + server_slug: string; + tool_count: number; // Total tools discovered + total_tokens: number; // Sum of all tool token counts + tools: Array<{ + tool_name: string; + description: string; + input_schema: Record; + token_count: number; // Tokens for this specific tool + }>; + discovered_at: string; // ISO 8601 timestamp } ``` +**Purpose:** +- Store tool metadata in backend database (`mcpToolMetadata` table) +- Calculate hierarchical router token savings (traditional vs 2-meta-tool approach) +- Enable frontend tool catalog display with token consumption metrics +- Provide analytics on MCP server complexity and context window usage + +**Emission Timing:** +- stdio servers: After handshake completion and tool caching +- HTTP/SSE servers: After startup tool discovery and caching + +**Token Calculation:** +Uses `token-counter.ts` utility to estimate tokens for each tool based on name, description, and input schema JSON. + #### `mcp.tools.updated` Emitted when tool list changes during configuration refresh. diff --git a/development/satellite/logging.mdx b/development/satellite/logging.mdx index 49b2879..a0f3eae 100644 --- a/development/satellite/logging.mdx +++ b/development/satellite/logging.mdx @@ -323,6 +323,109 @@ server.log.warn({ - **Add performance metrics**: Duration, resource usage, counts - **Use consistent naming**: camelCase and standard field names +## Secret Masking in Logs + +The satellite automatically protects sensitive credentials in log output through selective secret masking. This prevents API keys, tokens, and passwords from appearing in plain text in log files or monitoring systems. + +### How Secret Masking Works + +**Automatic Detection:** +- Backend sends metadata with MCP server configurations identifying which fields are secrets +- Satellite receives `secret_metadata` with lists of secret query parameters, headers, and environment variables +- Masking utilities automatically apply to fields marked as secrets + +**Masking Pattern:** +- First 3 characters remain visible followed by `*****` (e.g., `sk_abc123xyz789` becomes `sk_*****`) +- Values shorter than 3 characters are fully masked as `***` +- Non-secret values remain fully visible for debugging + +### Using the Log Masker Utility + +The log masking utilities are located in `src/utils/log-masker.ts` and provide three functions for masking different configuration types: + +```typescript +import { maskUrlForLogging, maskHeadersForLogging, maskEnvForLogging } from '../utils/log-masker'; + +// Example: Mask URL with secret query parameters +const maskedUrl = maskUrlForLogging( + 'https://api.example.com?token=sk_abc123®ion=us-east', + ['token'] // Only 'token' is marked as secret +); +// Result: 'https://api.example.com?token=sk_*****®ion=us-east' + +// Example: Mask HTTP headers +const maskedHeaders = maskHeadersForLogging( + { 'Authorization': 'Bearer sk_abc123', 'Content-Type': 'application/json' }, + ['Authorization'] // Only 'Authorization' is marked as secret +); +// Result: { 'Authorization': 'Bea*****', 'Content-Type': 'application/json' } +``` + +### Best Practices for Secret Protection + +**✅ DO: Use Masking Functions for URLs with Credentials** + +```typescript +// ✅ Good - URLs with query parameters masked +server.log.info({ + operation: 'mcp_server_connect', + serverId: config.server_slug, + url: maskUrlForLogging(config.url, config.secret_metadata?.query_params), + transport: config.transport_type +}, 'Connecting to MCP server'); +``` + +**✅ DO: Mask Headers Containing Authentication** + +```typescript +// ✅ Good - Headers masked before logging +server.log.debug({ + operation: 'http_request', + url: maskUrlForLogging(config.url, config.secret_metadata?.query_params), + headers: maskHeadersForLogging(config.headers, config.secret_metadata?.headers) +}, 'Making HTTP request to MCP server'); +``` + +**❌ DON'T: Log Raw Credentials** + +```typescript +// ❌ Bad - Exposes credentials in logs +server.log.info({ + operation: 'mcp_server_connect', + url: config.url, // Contains API key in query param! + headers: config.headers // Contains Authorization token! +}, 'Connecting to MCP server'); +``` + +### Implementation Locations + +Secret masking is implemented in these satellite components: + +- **Dynamic Config Manager** (`src/services/dynamic-config-manager.ts`) - 5 locations where MCP server URLs are logged +- **Command Processor** (`src/services/command-processor.ts`) - MCP server spawn and configuration logging +- **HTTP Proxy Manager** (`src/process/http-proxy-manager.ts`) - HTTP/SSE transport logging +- **MCP Server Wrapper** (`src/core/mcp-server-wrapper.ts`) - Server connection and lifecycle logging +- **Remote Tool Discovery** (`src/services/remote-tool-discovery-manager.ts`) - Tool discovery from HTTP servers + +### Debugging with Masked Logs + +When troubleshooting authentication issues: + +1. **Partial visibility helps identify credentials**: First 3 characters show which credential was used +2. **Compare prefixes**: Verify the correct API key/token is being applied +3. **Check non-secret params**: Regular parameters remain visible for debugging +4. **Use secret metadata**: Confirm which fields are marked as secrets in configuration + +**Example masked log output:** + +``` +[INFO] Connecting to MCP server + operation: "mcp_server_connect" + serverId: "brightdata-scraping" + url: "https://mcp-server.brightdata.com?token=sk_*****®ion=us-east-1" + headers: {"Authorization":"Bea*****","Content-Type":"application/json"} +``` + ## Environment-Specific Configuration ### Development Environment diff --git a/development/satellite/tool-discovery.mdx b/development/satellite/tool-discovery.mdx index bf67706..b4cdba1 100644 --- a/development/satellite/tool-discovery.mdx +++ b/development/satellite/tool-discovery.mdx @@ -231,6 +231,58 @@ stdio tools persist in cache for optimal performance: **Idle Process Management**: stdio processes that remain inactive for the configured idle timeout (default: 3 minutes) are automatically terminated to save memory. However, **tools remain cached** so when a client requests them, the process respawns instantly without needing to rediscover tools. This reduces respawn time from 1-3 seconds to 1-2 seconds. See [Idle Process Management](/development/satellite/idle-process-management) for details. +## Tool Metadata Collection + +After tool discovery completes, the satellite emits tool metadata to the backend for storage and analysis. + +### Event Emission (Post-Discovery) + +Following successful tool discovery (both HTTP/SSE and stdio), the satellite: + +1. **Calculates token consumption** using the `token-counter.ts` utility +2. **Builds event payload** with tool metadata including per-tool token counts +3. **Emits `mcp.tools.discovered` event** to backend via EventBus +4. **Backend stores metadata** in `mcpToolMetadata` table for team visibility + +**Event Payload Structure:** +```typescript +{ + installation_id: string; + installation_name: string; + team_id: string; + server_slug: string; + tool_count: number; // Total tools discovered + total_tokens: number; // Sum of all tool token counts + tools: Array<{ + tool_name: string; + description: string; + input_schema: Record; + token_count: number; // Tokens for this specific tool + }>; + discovered_at: string; // ISO 8601 timestamp +} +``` + +**Integration Points:** +- `StdioToolDiscoveryManager`: Emits after stdio tool discovery completes +- `RemoteToolDiscoveryManager`: Emits after HTTP/SSE tool discovery completes +- `EventBus`: Batches events every 3 seconds for efficient transmission +- Backend handler: Stores tools with delete-then-insert strategy + +**Token Calculation:** +The satellite uses `estimateMcpServerTokens()` from `token-counter.ts` to calculate: +- Per-tool tokens: `name` + `description` + `JSON.stringify(inputSchema)` +- Total server tokens: Sum of all tool tokens +- Uses `gpt-tokenizer` library (provider-agnostic) + +**Purpose:** +- Store tool metadata in backend database for team visibility +- Calculate hierarchical router token savings (traditional vs 2-meta-tool approach) +- Enable frontend tool catalog display with token consumption metrics +- Provide analytics on MCP server complexity and context window usage + +See [Event System](/development/satellite/event-system) for event batching and delivery details. + ## Development Considerations ### Debugging Support diff --git a/general/mcp-configuration.mdx b/general/mcp-configuration.mdx index 89a5e76..72b0962 100644 --- a/general/mcp-configuration.mdx +++ b/general/mcp-configuration.mdx @@ -221,7 +221,9 @@ Servers synced from the official MCP Registry can use different transport mechan Servers that run as local processes using standard input/output. Arguments are configured in the template level (locked), with runtime arguments at team/user levels. **HTTP/SSE Transport (via remotes):** -Servers accessed via HTTP endpoints. Headers are mapped to appropriate tiers - authentication headers at team level, optional headers at user level. +Servers accessed via HTTP endpoints. Both headers and URL query parameters are mapped to appropriate tiers: +- **HTTP Headers** - Authentication headers at team level, optional headers at user level +- **URL Query Parameters** - API keys and tokens at team level, personal preferences at user level The three-tier system adapts automatically based on the transport type detected from the official registry. diff --git a/general/security.mdx b/general/security.mdx index 598571f..653993f 100644 --- a/general/security.mdx +++ b/general/security.mdx @@ -78,6 +78,45 @@ DeployStack automatically protects sensitive MCP configuration values through a - **Audit Safe**: Logs and interfaces never contain actual secret values - **Team Security**: Team members use secrets without seeing actual values +### Satellite Log Masking + +DeployStack satellites automatically protect sensitive values in log files and monitoring systems through selective secret masking: + +**Automatic Secret Detection:** +- **Backend Metadata**: Control plane identifies which configuration fields contain secrets based on schema definitions +- **Secret Transmission**: Backend sends metadata to satellites indicating which query parameters, headers, and environment variables are secrets +- **Selective Masking**: Satellites mask only fields marked as secrets, leaving regular configuration visible for debugging + +**Masking Pattern:** +- **Partial Visibility**: First 3 characters of secret values remain visible (e.g., `sk_*****`) +- **Short Values**: Values shorter than 3 characters are fully masked as `***` +- **Context Preservation**: Masked logs still show which parameters were used without exposing actual values + +**What Gets Masked in Logs:** +- URL query parameters containing API keys or tokens +- HTTP headers with authentication credentials +- Environment variables marked as secret type +- Any configuration field transmitted with secret metadata + +**What Doesn't Get Masked:** +- Regular configuration values (debug flags, URLs without secrets) +- Server names and installation identifiers +- Non-sensitive query parameters and headers +- Status codes and timing information + +**Security Benefits:** +- **Log Safety**: Satellite logs can be safely shared for debugging without exposing credentials +- **Monitoring Protection**: Log aggregation systems won't capture plaintext secrets +- **Audit Trail**: Logs show which credentials were used without revealing actual values +- **Incident Response**: Security teams can analyze logs without credential exposure risk + +**Example Masked Log Output:** +``` +[INFO] Connecting to MCP server: brightdata-scraping-browser + URL: https://mcp-server.brightdata.com?token=sk_*****®ion=us-east-1 + Headers: Authorization=Bea***** +``` + ### Global Settings Encryption Your global configuration data is protected with encryption: diff --git a/package-lock.json b/package-lock.json index e98bc5b..2e3cadc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,7 @@ "name": "deploystack-io-documentation", "version": "0.0.0-development", "devDependencies": { - "@semantic-release/github": "^12.0.1", + "@semantic-release/github": "^12.0.2", "@types/node": "24.9.1", "markdownlint-cli": "^0.45.0", "markdownlint-cli2": "^0.18.1", @@ -463,9 +463,9 @@ } }, "node_modules/@semantic-release/github": { - "version": "12.0.1", - "resolved": "https://registry.npmjs.org/@semantic-release/github/-/github-12.0.1.tgz", - "integrity": "sha512-BSC7Ko6aRPnH8ttVBpd3gC98LTiyPdmrmX4qHilLw5EZqVrXrXwcKp/JKUC5hgm0XpJACR3nPjgbfOjTJ75PIA==", + "version": "12.0.2", + "resolved": "https://registry.npmjs.org/@semantic-release/github/-/github-12.0.2.tgz", + "integrity": "sha512-qyqLS+aSGH1SfXIooBKjs7mvrv0deg8v+jemegfJg1kq6ji+GJV8CO08VJDEsvjp3O8XJmTTIAjjZbMzagzsdw==", "dev": true, "license": "MIT", "dependencies": { @@ -484,6 +484,7 @@ "mime": "^4.0.0", "p-filter": "^4.0.0", "tinyglobby": "^0.2.14", + "undici": "^7.0.0", "url-join": "^5.0.0" }, "engines": { @@ -7942,6 +7943,16 @@ "dev": true, "license": "MIT" }, + "node_modules/undici": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.16.0.tgz", + "integrity": "sha512-QEg3HPMll0o3t2ourKwOeUAZ159Kn9mx5pnzHRQO8+Wixmh88YdZRiIwat0iNzNNXn0yoEtXJqFpyW7eM8BV7g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/undici-types": { "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", diff --git a/package.json b/package.json index 035e08d..e10bd7b 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,7 @@ }, "dependencies": {}, "devDependencies": { - "@semantic-release/github": "^12.0.1", + "@semantic-release/github": "^12.0.2", "@types/node": "24.9.1", "markdownlint-cli": "^0.45.0", "markdownlint-cli2": "^0.18.1",