diff --git a/apps/workers-observability/src/index.ts b/apps/workers-observability/src/index.ts index d5fd29fb..9dd02cf0 100644 --- a/apps/workers-observability/src/index.ts +++ b/apps/workers-observability/src/index.ts @@ -14,7 +14,7 @@ import { registerAccountTools } from '@repo/mcp-common/src/tools/account' import { registerWorkersTools } from '@repo/mcp-common/src/tools/worker' import { MetricsTracker } from '../../../packages/mcp-observability/src' -import { registerLogsTools } from './tools/logs' +import { registerObservabilityTools } from './tools/observability' import type { AccountSchema, UserSchema } from '@repo/mcp-common/src/cloudflare-oauth-handler' import type { Env } from './context' @@ -51,10 +51,6 @@ export class ObservabilityMCP extends McpAgent { return this._server } - constructor(ctx: DurableObjectState, env: Env) { - super(ctx, env) - } - async init() { this.server = new CloudflareMCPServer({ userId: this.props.user.id, @@ -64,6 +60,30 @@ export class ObservabilityMCP extends McpAgent { version: this.env.MCP_SERVER_VERSION, }, sentry: initSentryWithUser(env, this.ctx, this.props.user.id), + options: { + instructions: `# Cloudflare Workers Observability Tool + +This tool provides powerful capabilities to analyze and troubleshoot your Cloudflare Workers through logs and metrics. Here's how to use it effectively: + +## IMPORTANT: Query Discipline + +**STOP after the first successful query if it answers the user's question.** Do not run multiple queries unless absolutely necessary. The first query often contains the answer - review it thoroughly before running additional queries. + +## Best Practices + +### Efficient Querying +- Start with a focused query that's most likely to answer the question +- Review results completely before deciding if additional queries are needed +- If the first query provides the answer, STOP and present it to the user +- Only run additional queries when specifically directed or when essential information is missing + +### When to STOP Querying +- STOP after presenting meaningful results from the first query +- STOP when you've answered the user's specific question +- STOP when the user hasn't requested additional exploration +- Only continue if explicitly directed with "EXPLORE" or similar instruction +`, + }, }) registerAccountTools(this) @@ -72,7 +92,7 @@ export class ObservabilityMCP extends McpAgent { registerWorkersTools(this) // Register Cloudflare Workers logs tools - registerLogsTools(this) + registerObservabilityTools(this) } async getActiveAccountId() { diff --git a/apps/workers-observability/src/tools/logs.ts b/apps/workers-observability/src/tools/logs.ts deleted file mode 100644 index ef4bf763..00000000 --- a/apps/workers-observability/src/tools/logs.ts +++ /dev/null @@ -1,310 +0,0 @@ -import { z } from 'zod' - -import { handleWorkerLogs, handleWorkerLogsKeys } from '@repo/mcp-common/src/api/workers-logs' - -import type { zReturnedQueryRunEvents } from '@repo/mcp-common/src/types/workers-logs-schemas' -import type { ObservabilityMCP } from '../index' - -type RelevantLogInfo = z.infer -const RelevantLogInfoSchema = z.object({ - timestamp: z.string(), - path: z.string().nullable(), - method: z.string().nullable(), - status: z.number().nullable(), - outcome: z.string(), - eventType: z.string(), - duration: z.number().nullable(), - error: z.string().nullable(), - message: z.string().nullable(), - requestId: z.string(), - rayId: z.string().nullable(), - exceptionStack: z.string().nullable(), -}) - -/** - * Extracts only the most relevant information from a worker log event ( this is to avoid crashing Claude when returning too much data ) - * @param event z.array(zReturnedTelemetryEvent).optional() - * @returns Relevant information extracted from the log - */ -function extractRelevantLogInfo(events: zReturnedQueryRunEvents['events'] = []): RelevantLogInfo[] { - return events.map((event) => { - const workers = event.$workers - const metadata = event.$metadata - const source = event.source - - let path = null - let method = null - let status = null - if (workers?.event?.request) { - path = workers.event.request.path ?? null - method = workers.event.request.method ?? null - } - - if (workers?.event?.response) { - status = workers.event.response.status ?? null - } - - let error = null - if (metadata.error) { - error = metadata.error - } - - let message = metadata?.message ?? null - if (!message) { - if (workers?.event?.rpcMethod) { - message = `RPC: ${workers.event.rpcMethod}` - } else if (path && method) { - message = `${method} ${path}` - } - } - - // Calculate duration - const duration = (workers?.wallTimeMs || 0) + (workers?.cpuTimeMs || 0) - - // Extract rayId if available - const rayId = workers?.event?.rayId ?? null - - let exceptionStack = null - // Extract exception stack if available - if (typeof source !== 'string') { - exceptionStack = source?.exception?.stack ?? null - } - - return { - timestamp: new Date(event.timestamp).toISOString(), - path, - method, - status, - outcome: workers?.outcome || 'unknown', - eventType: workers?.eventType || 'unknown', - duration: duration || null, - error, - message, - requestId: workers?.requestId || metadata?.id || 'unknown', - rayId, - exceptionStack, - } - }) -} - -// Worker logs parameter schema -const workerNameParam = z.string().describe('The name of the worker to analyze logs for') -const filterErrorsParam = z.boolean().default(false).describe('If true, only shows error logs') -const limitParam = z - .number() - .min(1) - .max(100) - .default(100) - .describe('Maximum number of logs to retrieve (1-100, default 100)') -const minutesAgoParam = z - .number() - .min(1) - .max(10080) - .default(30) - .describe('Minutes in the past to look for logs (1-10080, default 30)') -const rayIdParam = z.string().optional().describe('Filter logs by specific Cloudflare Ray ID') - -/** - * Registers the logs analysis tool with the MCP server - * @param server The MCP server instance - * @param accountId Cloudflare account ID - * @param apiToken Cloudflare API token - */ -export function registerLogsTools(agent: ObservabilityMCP) { - // Register the worker logs analysis tool by worker name - agent.server.tool( - 'worker_logs_by_worker_name', - 'Analyze recent logs for a Cloudflare Worker by worker name', - { - scriptName: workerNameParam, - shouldFilterErrors: filterErrorsParam, - limitParam, - minutesAgoParam, - rayId: rayIdParam, - }, - async (params) => { - const accountId = await agent.getActiveAccountId() - if (!accountId) { - return { - content: [ - { - type: 'text', - text: 'No currently active accountId. Try listing your accounts (accounts_list) and then setting an active account (set_active_account)', - }, - ], - } - } - try { - const { scriptName, shouldFilterErrors, limitParam, minutesAgoParam, rayId } = params - const { logs, from, to } = await handleWorkerLogs({ - scriptName, - limit: limitParam, - minutesAgo: minutesAgoParam, - accountId, - apiToken: agent.props.accessToken, - shouldFilterErrors, - rayId, - }) - return { - content: [ - { - type: 'text', - text: JSON.stringify({ - logs, - stats: { - timeRange: { - from, - to, - }, - }, - }), - }, - ], - } - } catch (e) { - agent.server.recordError(e) - return { - content: [ - { - type: 'text', - text: JSON.stringify({ - error: `Error analyzing worker logs: ${e instanceof Error && e.message}`, - }), - }, - ], - } - } - } - ) - - // Register tool to search logs by Ray ID across all workers - agent.server.tool( - 'worker_logs_by_rayid', - 'Analyze recent logs across all workers for a specific request by Cloudflare Ray ID', - { - rayId: z.string().describe('Filter logs by specific Cloudflare Ray ID'), - shouldFilterErrors: filterErrorsParam, - limitParam, - minutesAgoParam, - }, - async (params) => { - const accountId = await agent.getActiveAccountId() - if (!accountId) { - return { - content: [ - { - type: 'text', - text: 'No currently active accountId. Try listing your accounts (accounts_list) and then setting an active account (set_active_account)', - }, - ], - } - } - try { - const { rayId, shouldFilterErrors, limitParam, minutesAgoParam } = params - const { logs, from, to } = await handleWorkerLogs({ - limit: limitParam, - minutesAgo: minutesAgoParam, - accountId, - apiToken: agent.props.accessToken, - shouldFilterErrors, - rayId, - }) - const events = logs?.events?.events ?? [] - return { - content: [ - { - type: 'text', - text: JSON.stringify({ - events: extractRelevantLogInfo(events), - stats: { - timeRange: { - from, - to, - }, - }, - }), - }, - ], - } - } catch (e) { - agent.server.recordError(e) - return { - content: [ - { - type: 'text', - text: JSON.stringify({ - error: `Error analyzing logs by Ray ID: ${e instanceof Error && e.message}`, - }), - }, - ], - } - } - } - ) - - // Register the worker telemetry keys tool - agent.server.tool( - 'worker_logs_keys', - 'Get available telemetry keys for a Cloudflare Worker', - { scriptName: workerNameParam, minutesAgoParam }, - async (params) => { - const accountId = await agent.getActiveAccountId() - if (!accountId) { - return { - content: [ - { - type: 'text', - text: 'No currently active accountId. Try listing your accounts (accounts_list) and then setting an active account (set_active_account)', - }, - ], - } - } - try { - const { scriptName, minutesAgoParam } = params - const keys = await handleWorkerLogsKeys( - scriptName, - minutesAgoParam, - accountId, - agent.props.accessToken - ) - - return { - content: [ - { - type: 'text', - text: JSON.stringify({ - keys: keys.map((key) => ({ - key: key.key, - type: key.type, - lastSeenAt: key.lastSeenAt ? new Date(key.lastSeenAt).toISOString() : null, - })), - stats: { - total: keys.length, - byType: keys.reduce( - (acc, key) => { - acc[key.type] = (acc[key.type] || 0) + 1 - return acc - }, - {} as Record - ), - }, - }), - }, - ], - } - } catch (e) { - agent.server.recordError(e) - return { - content: [ - { - type: 'text', - text: JSON.stringify({ - error: `Error retrieving worker telemetry keys: ${e instanceof Error && e.message}`, - }), - }, - ], - } - } - } - ) -} diff --git a/apps/workers-observability/src/tools/observability.ts b/apps/workers-observability/src/tools/observability.ts new file mode 100644 index 00000000..abea1770 --- /dev/null +++ b/apps/workers-observability/src/tools/observability.ts @@ -0,0 +1,228 @@ +import { + handleWorkerLogsKeys, + handleWorkerLogsValues, + queryWorkersObservability, +} from '@repo/mcp-common/src/api/workers-observability' +import { + zKeysRequest, + zQueryRunRequest, + zValuesRequest, +} from '@repo/mcp-common/src/types/workers-logs-schemas' + +import type { ObservabilityMCP } from '../index' + +/** + * Registers the logs analysis tool with the MCP server + * @param server The MCP server instance + * @param accountId Cloudflare account ID + * @param apiToken Cloudflare API token + */ +export function registerObservabilityTools(agent: ObservabilityMCP) { + // Register the worker logs analysis tool by worker name + agent.server.tool( + 'query_worker_observability', + `Query the Workers Observability API to analyze logs and metrics from your Cloudflare Workers. + +The resulting information should answer the users query. STOP HERE and show them the answer to there question. +If you can't answer the question ask for a follow up. + +## Output handling + +Once you have ran this query you must IMMEDIATELY present the user with this information. + +- **Events**: Display as a table with key fields. For detailed inspection, show full JSON of individual events. +- **Calculations**: Use appropriate charts based on the data (bar charts for comparisons, line charts for time series) +- **Invocations**: Show full request/response details with syntax highlighting for important fields + +## When to Use This tool + +- Investigate errors or performance issues in your Cloudflare Workers +- Monitor Worker usage patterns and resource consumption +- Debug specific request failures or unexpected behaviors +- Verify recent deployments are working as expected +- Generate performance reports for specific Workers or endpoints +- Track down user-reported issues with request ID or user information +- Analyze trends in response times, error rates, or request volumes + +## Core Capabilities +This tool provides three primary views of your Worker data: +1. **List Events** - Browse individual request logs and errors +2. **Calculate Metrics** - Compute statistics across requests (avg, p99, etc.) +3. **Find Specific Invocations** - Locate individual requests matching criteria + +## Filtering Best Practices +- Before applying filters, use the observability_keys and observability_values tools to confirm available filter fields and the correct filter value to add unless you have the data in a response from a previous query. +- Common filter fields: $metadata.service, $metadata.trigger, $metadata.message, $metadata.level, $metadata.requestId, + +## Calculation Best Practices +- Before applying calculations, use the observability_keys tools to confirm key that should be used for the calculation + +## Troubleshooting +- If no results are returned, suggest broadening the time range or relaxing filters +- For errors about invalid fields, recommend using observability_keys to see available options +`, + + { + query: zQueryRunRequest, + }, + async ({ query }) => { + const accountId = await agent.getActiveAccountId() + if (!accountId) { + return { + content: [ + { + type: 'text', + text: 'No currently active accountId. Try listing your accounts (accounts_list) and then setting an active account (set_active_account)', + }, + ], + } + } + try { + const response = await queryWorkersObservability(agent.props.accessToken, accountId, query) + return { + content: [ + { + type: 'text', + text: JSON.stringify(response), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: JSON.stringify({ + error: `Error analyzing worker logs: ${error instanceof Error && error.message}`, + }), + }, + ], + } + } + } + ) + + agent.server.tool( + 'observability_keys', + `Find keys in the Workers Observability Data. + +## When to Use This Tool +- Before creating new queries to discover available data fields +- When building complex filters to verify field names exist +- To explore the schema of your Workers data +- When troubleshooting "invalid field" errors in queries +- To discover metrics fields available for calculations + +## Core Capabilities +This tool provides a comprehensive view of available data fields: +1. **Discover Schema** - Explore what fields exist in your Workers data +2. **Validate Fields** - Confirm field names before using them in filters +3. **Understand Data Types** - Learn the type of each field for proper filtering + +## Best Practices +- Set a high limit (1000+) to ensure you see all available keys +- Add the $metadata.service filter to narrow results to a specific Worker +- Use this tool before a query with unfamiliar fields +- Pay attention to field data types when crafting filters + +## Common Key Categories +- $metadata.* fields: Core Worker metadata including service name, level, etc. +- $workers.* fields: Worker-specific metadata like request ID, trigger type, etc. +- custom fields: Any fields added via console.log in your Worker code + +## Troubleshooting +- If expected fields are missing, verify the Worker is actively logging +- For empty results, try broadening your time range +`, + { keysQuery: zKeysRequest }, + async ({ keysQuery }) => { + const accountId = await agent.getActiveAccountId() + if (!accountId) { + return { + content: [ + { + type: 'text', + text: 'No currently active accountId. Try listing your accounts (accounts_list) and then setting an active account (set_active_account)', + }, + ], + } + } + try { + const result = await handleWorkerLogsKeys(agent.props.accessToken, accountId, keysQuery) + return { + content: [ + { + type: 'text', + text: JSON.stringify(result), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: JSON.stringify({ + error: `Error retrieving worker telemetry keys: ${error instanceof Error && error.message}`, + }), + }, + ], + } + } + } + ) + + agent.server.tool( + 'observability_values', + `Find values in the Workers Observability Data. + +## When to Use This Tool +- When building complex queries requiring exact value matches + +## Best Practices +- Always specify the correct data type (string, number, boolean) +- Use needle parameter with matchCase:false for case-insensitive searches +- Combine with filters to focus on specific Workers or time periods +- When dealing with high-cardinality fields, use more specific filters + +## Troubleshooting +- For no results, verify the field exists using observability_keys first +- If expected values are missing, try broadening your time range`, + { valuesQuery: zValuesRequest }, + async ({ valuesQuery }) => { + const accountId = await agent.getActiveAccountId() + if (!accountId) { + return { + content: [ + { + type: 'text', + text: 'No currently active accountId. Try listing your accounts (accounts_list) and then setting an active account (set_active_account)', + }, + ], + } + } + try { + const result = await handleWorkerLogsValues(agent.props.accessToken, accountId, valuesQuery) + return { + content: [ + { + type: 'text', + text: JSON.stringify(result), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: JSON.stringify({ + error: `Error retrieving worker telemetry values: ${error instanceof Error && error.message}`, + }), + }, + ], + } + } + } + ) +} diff --git a/packages/mcp-common/src/api/workers-logs.ts b/packages/mcp-common/src/api/workers-logs.ts deleted file mode 100644 index 6d0f17af..00000000 --- a/packages/mcp-common/src/api/workers-logs.ts +++ /dev/null @@ -1,158 +0,0 @@ -import { fetchCloudflareApi } from '../cloudflare-api' -import { zKeysResponse, zReturnedQueryRunResult } from '../types/workers-logs-schemas' -import { V4Schema } from '../v4-api' - -/** - * Fetches recent logs for a specified Cloudflare Worker - * @param scriptName Name of the worker script to get logs for - * @param accountId Cloudflare account ID - * @param apiToken Cloudflare API token - * @returns The logs analysis result with filtered relevant information - */ -export async function handleWorkerLogs({ - limit, - minutesAgo, - accountId, - apiToken, - shouldFilterErrors, - scriptName, - rayId, -}: { - limit: number - minutesAgo: number - accountId: string - apiToken: string - shouldFilterErrors: boolean - scriptName?: string - rayId?: string -}): Promise<{ logs: zReturnedQueryRunResult | null; from: number; to: number }> { - if (scriptName === undefined && rayId === undefined) { - throw new Error('Either scriptName or rayId must be provided') - } - // Calculate timeframe based on minutesAgo parameter - const now = Date.now() - const fromTimestamp = now - minutesAgo * 60 * 1000 - - type QueryFilter = { id: string; key: string; type: string; operation: string; value?: string } - const filters: QueryFilter[] = [] - - // Build query to fetch logs - if (scriptName) { - filters.push({ - id: 'worker-name-filter', - key: '$metadata.service', - type: 'string', - value: scriptName, - operation: 'eq', - }) - } - - if (shouldFilterErrors === true) { - filters.push({ - id: 'error-filter', - key: '$metadata.error', - type: 'string', - operation: 'exists', - }) - } - - // Add Ray ID filter if provided - if (rayId) { - filters.push({ - id: 'ray-id-filter', - key: '$workers.event.rayId', - type: 'string', - value: rayId, - operation: 'eq', - }) - } - - const queryPayload = { - queryId: 'workers-logs', - timeframe: { - from: fromTimestamp, - to: now, - }, - parameters: { - datasets: ['cloudflare-workers'], - filters, - calculations: [], - groupBys: [], - havings: [], - }, - view: 'events', - limit, - } - - const data = await fetchCloudflareApi({ - endpoint: '/workers/observability/telemetry/query', - accountId, - apiToken, - responseSchema: V4Schema(zReturnedQueryRunResult), - options: { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify(queryPayload), - }, - }) - - return { logs: data.result, from: fromTimestamp, to: now } -} - -/** - * Fetches available telemetry keys for a specified Cloudflare Worker - * @param scriptName Name of the worker script to get keys for - * @param accountId Cloudflare account ID - * @param apiToken Cloudflare API token - * @returns List of telemetry keys available for the worker - */ -export async function handleWorkerLogsKeys( - scriptName: string, - minutesAgo: number, - accountId: string, - apiToken: string -): Promise { - // Calculate timeframe (last 24 hours to ensure we get all keys) - const now = Date.now() - const fromTimestamp = now - minutesAgo * 60 * 1000 - - // Build query for telemetry keys - const queryPayload = { - queryId: 'workers-keys', - timeframe: { - from: fromTimestamp, - to: now, - }, - parameters: { - datasets: ['cloudflare-workers'], - filters: [ - { - id: 'service-filter', - key: '$metadata.service', - type: 'string', - value: `${scriptName}`, - operation: 'eq', - }, - ], - }, - } - - const data = await fetchCloudflareApi({ - endpoint: '/workers/observability/telemetry/keys', - accountId, - apiToken, - responseSchema: V4Schema(zKeysResponse), - options: { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'portal-version': '2', - }, - body: JSON.stringify(queryPayload), - }, - }) - - return data.result || [] -} diff --git a/packages/mcp-common/src/api/workers-observability.ts b/packages/mcp-common/src/api/workers-observability.ts new file mode 100644 index 00000000..379d94f4 --- /dev/null +++ b/packages/mcp-common/src/api/workers-observability.ts @@ -0,0 +1,86 @@ +import { fetchCloudflareApi } from '../cloudflare-api' +import { + zKeysResponse, + zReturnedQueryRunResult, + zValuesResponse, +} from '../types/workers-logs-schemas' +import { V4Schema } from '../v4-api' + +import type { z } from 'zod' +import type { zKeysRequest, zQueryRunRequest, zValuesRequest } from '../types/workers-logs-schemas' + +type QueryRunRequest = z.infer + +function fixTimeframe(timeframe: QueryRunRequest['timeframe']) { + return { + from: new Date(timeframe.from).getTime(), + to: new Date(timeframe.to).getTime(), + } +} + +export async function queryWorkersObservability( + apiToken: string, + accountId: string, + query: QueryRunRequest +): Promise | null> { + const data = await fetchCloudflareApi({ + endpoint: '/workers/observability/telemetry/query', + accountId, + apiToken, + responseSchema: V4Schema(zReturnedQueryRunResult), + options: { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ ...query, timeframe: fixTimeframe(query.timeframe) }), + }, + }) + + return data.result +} + +type QueryKeysRequest = z.infer +export async function handleWorkerLogsKeys( + apiToken: string, + accountId: string, + keysQuery: QueryKeysRequest +): Promise { + const data = await fetchCloudflareApi({ + endpoint: '/workers/observability/telemetry/keys', + accountId, + apiToken, + responseSchema: V4Schema(zKeysResponse), + options: { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ ...keysQuery, timeframe: fixTimeframe(keysQuery.timeframe) }), + }, + }) + + return data.result || [] +} + +export async function handleWorkerLogsValues( + apiToken: string, + accountId: string, + valuesQuery: z.infer +): Promise | null> { + const data = await fetchCloudflareApi({ + endpoint: '/workers/observability/telemetry/values', + accountId, + apiToken, + responseSchema: V4Schema(zValuesResponse), + options: { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ ...valuesQuery, timeframe: fixTimeframe(valuesQuery.timeframe) }), + }, + }) + + return data.result +} diff --git a/packages/mcp-common/src/tools/account.ts b/packages/mcp-common/src/tools/account.ts index b4360a33..e125c29d 100644 --- a/packages/mcp-common/src/tools/account.ts +++ b/packages/mcp-common/src/tools/account.ts @@ -2,7 +2,8 @@ import { z } from 'zod' import { handleAccountsList } from '../api/account' import { getCloudflareClient } from '../cloudflare-api' -import { type CloudflareMcpAgent } from '../types/cloudflare-mcp-agent' + +import type { CloudflareMcpAgent } from '../types/cloudflare-mcp-agent' export function registerAccountTools(agent: CloudflareMcpAgent) { // Tool to list all accounts diff --git a/packages/mcp-common/src/tools/worker.ts b/packages/mcp-common/src/tools/worker.ts index 0fbe62bd..9da28521 100644 --- a/packages/mcp-common/src/tools/worker.ts +++ b/packages/mcp-common/src/tools/worker.ts @@ -2,7 +2,8 @@ import { z } from 'zod' import { handleWorkerScriptDownload, handleWorkersList } from '../api/workers' import { getCloudflareClient } from '../cloudflare-api' -import { type CloudflareMcpAgent } from '../types/cloudflare-mcp-agent' + +import type { CloudflareMcpAgent } from '../types/cloudflare-mcp-agent' /** * Registers the workers tools with the MCP server diff --git a/packages/mcp-common/src/types/workers-logs-schemas.ts b/packages/mcp-common/src/types/workers-logs-schemas.ts index 8394a7bf..9de912d6 100644 --- a/packages/mcp-common/src/types/workers-logs-schemas.ts +++ b/packages/mcp-common/src/types/workers-logs-schemas.ts @@ -55,14 +55,53 @@ export const zFilterCombination = z.enum(['and', 'or', 'AND', 'OR']) export const zPrimitiveUnion = z.union([z.string(), z.number(), z.boolean()]) export const zQueryFilter = z.object({ - key: z.string(), + key: z.string().describe(`Filter field name. IMPORTANT: + + • DO NOT guess keys - always use verified keys from either: + - Previous query results + - The observability_keys response + + • PREFERRED KEYS (faster & always available): + - $metadata.service: Worker service name + - $metadata.origin: Trigger type (e.g., fetch, scheduled, etc.) + - $metadata.trigger: Trigger type (e.g., GET /users, POST /orders, etc.) + - $metadata.message: Log message text (present in nearly all logs) + - $metadata.error: Error message (when applicable) +`), operation: zQueryOperation, - value: zPrimitiveUnion.optional(), + value: zPrimitiveUnion.optional().describe(`Filter comparison value. IMPORTANT: + + • MUST match actual values in your logs + • VERIFY using either: + - Actual values from previous query results + - The '/values' endpoint with your selected key + + • TYPE MATCHING: + - Ensure value type (string/number/boolean) matches the field type + - String comparisons are case-sensitive unless using specific operations + + • PATTERN USAGE: + - For 'contains', use simple wildcard patterns + - For 'regex', MUST use ClickHouse regex syntax: + - Uses RE2 syntax (not PCRE/JavaScript) + - No lookaheads/lookbehinds + - Examples: '^5\\d{2}$' for HTTP 5xx codes, '\\bERROR\\b' for word boundary + - Escape backslashes with double backslash`), type: z.enum(['string', 'number', 'boolean']), -}) +}).describe(` + ## Filtering Best Practices +- Before applying filters, use the observability_keys and observability_values queries to confirm available filter fields and values. +- If the query is asking to find something you should check that it exists. I.e. to requests with errors filter for $metadata.error exists. + `) export const zQueryCalculation = z.object({ - key: z.string().optional(), + key: z.string().optional() + .describe(`The key to use for the calculation. This key must exist in the logs. +Use the Keys endpoint to confirm that this key exists + +• DO NOT guess keys - always use verified keys from either: +- Previous query results +- The observability_keys response`), keyType: z.enum(['string', 'number', 'boolean']).optional(), operator: zQueryOperator, alias: z.string().optional(), @@ -101,9 +140,7 @@ export const zQueryRunCalculationsV2 = z.array( series: z.array( z.object({ time: z.string(), - data: z.array( - zAggregateResult.merge(z.object({ firstSeen: z.string(), lastSeen: z.string() })) - ), + data: z.array(zAggregateResult), }) ), }) @@ -115,11 +152,32 @@ export const zStatistics = z.object({ bytes_read: z.number(), }) +export const zTimeframe = z + .object({ + to: z.string(), + from: z.string(), + }) + .describe( + `Timeframe for your query (ISO-8601 format). + + • Current server time: ${new Date()} + • Default: Last hour from current time + • Maximum range: Last 7 days + • Format: "YYYY-MM-DDTHH:MM:SSZ" (e.g., "2025-04-29T14:30:00Z") + + Examples: + - Last 30 minutes: from="2025-04-29T14:00:00Z", to="2025-04-29T14:30:00Z" + - Yesterday: from="2025-04-28T00:00:00Z", to="2025-04-29T00:00:00Z" + + Note: Narrower timeframes provide faster responses and more specific results. + Omit this parameter entirely to use the default (last hour).` + ) + const zCloudflareMiniEventDetailsRequest = z.object({ url: z.string().optional(), method: z.string().optional(), path: z.string().optional(), - search: z.record(z.string()).optional(), + search: z.record(z.any()).optional(), }) const zCloudflareMiniEventDetailsResponse = z.object({ @@ -249,37 +307,81 @@ export const zQueryRunRequest = z.object({ // TODO: Fix these types queryId: z.string(), parameters: z.object({ - datasets: z.array(z.string()).optional(), + datasets: z + .array(z.string()) + .optional() + .describe('Leave this empty to use the default datasets'), filters: z.array(zQueryFilter).optional(), filterCombination: zFilterCombination.optional(), calculations: z.array(zQueryCalculation).optional(), - groupBys: z.array(zQueryGroupBy).optional(), + groupBys: z.array(zQueryGroupBy).optional().describe(`Only valid when doing a Calculation`), orderBy: z .object({ - value: z.string(), + value: z.string().describe('This must be the alias of a calculation'), order: z.enum(['asc', 'desc']).optional(), }) - .optional(), - limit: z.number().int().nonnegative().max(100).optional(), + .optional() + .describe('Order By only workers when a group by is present'), + limit: z + .number() + .int() + .nonnegative() + .max(100) + .optional() + .describe( + 'Use this limit when view is calculation and a group by is present. 10 is a sensible default' + ), needle: zSearchNeedle.optional(), }), - timeframe: z.object({ - to: z.number(), - from: z.number(), - }), - granularity: z.number().optional(), - limit: z.number().max(100).optional().default(50), - view: zViews.optional().default('calculations'), - dry: z.boolean().optional().default(false), - offset: z.string().optional(), + timeframe: zTimeframe, + granularity: z + .number() + .optional() + .describe( + 'This is only used when the view is calculations - by leaving it empty workers observability will detect the correct granularity' + ), + limit: z + .number() + .max(100) + .optional() + .default(5) + .describe( + 'Use this limit to limit the number of events returned when the view is events. 5 is a sensible default' + ), + view: zViews.optional().default('calculations').describe(`## Examples by View Type +### Events View +- "Show me all errors for the worker api-proxy in the last 30 minutes" +- "List successful requests for the image-resizer worker with status code 200" +- "Show events from worker auth-service where the path contains /login" + +### Calculation View +- "What is the p99 of wall time for worker api-proxy?" +- "What's the count of requests by status code for worker cdn-router?" + +### Invocation View +- "Find a request to worker api-proxy that resulted in a 500 error" +- "Find the slowest request to worker image-processor in the last hour" + +TRACES AND PATTERNS ARE NOT CURRENTLY SUPPORTED + `), + dry: z.boolean().optional().default(true), + offset: z + .string() + .optional() + .describe( + 'The offset to use for pagination. Use the $metadata.id field to get the next offset.' + ), offsetBy: z.number().optional(), - offsetDirection: z.string().optional(), + offsetDirection: z + .string() + .optional() + .describe('The direction to use for pagination. Use "next" or "prev".'), }) /** * The response from the API */ -export type zReturnedQueryRunResult = z.infer +export type ReturnedQueryRunResult = z.infer export const zReturnedQueryRunResult = z.object({ // run: zQueryRunRequest, calculations: zQueryRunCalculationsV2.optional(), @@ -293,17 +395,19 @@ export const zReturnedQueryRunResult = z.object({ * Keys Request */ export const zKeysRequest = z.object({ - timeframe: z - .object({ - to: z.number(), - from: z.number(), - }) - .optional(), - datasets: z.array(z.string()).default([]), + timeframe: zTimeframe, + datasets: z + .array(z.string()) + .default([]) + .describe('Leave this empty to use the default datasets'), filters: z.array(zQueryFilter).default([]), - limit: z.number().optional(), + limit: z.number().optional().describe(` + • ADVANCED USAGE: + set limit=1000+ to retrieve comprehensive key options without needing additional filtering`), needle: zSearchNeedle.optional(), - keyNeedle: zSearchNeedle.optional(), + keyNeedle: zSearchNeedle.optional() + .describe(`If the user makes a suggestion for a key, use this to narrow down the list of keys returned. + Make sure match case is fals to avoid case sensitivity issues.`), }) /** @@ -322,13 +426,13 @@ export const zKeysResponse = z.array( * Values Request */ export const zValuesRequest = z.object({ - timeframe: z.object({ - to: z.number(), - from: z.number(), - }), + timeframe: zTimeframe, key: z.string(), type: z.enum(['string', 'boolean', 'number']), - datasets: z.array(z.string()), + datasets: z + .array(z.string()) + .default([]) + .describe('Leave this empty to use the default datasets'), filters: z.array(zQueryFilter).default([]), limit: z.number().default(50), needle: zSearchNeedle.optional(), diff --git a/packages/mcp-common/tests/logs.spec.ts b/packages/mcp-common/tests/logs.spec.ts deleted file mode 100644 index 367738c3..00000000 --- a/packages/mcp-common/tests/logs.spec.ts +++ /dev/null @@ -1,467 +0,0 @@ -import { env, fetchMock } from 'cloudflare:test' -import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' - -import { handleWorkerLogs, handleWorkerLogsKeys } from '../src/api/workers-logs' -import { cloudflareClientMockImplementation } from './utils/cloudflare-mock' - -beforeAll(() => { - vi.mock('cloudflare', () => { - return { - Cloudflare: vi.fn().mockImplementation(() => { - return cloudflareClientMockImplementation() - }), - } - }) - // Enable outbound request mocking... - fetchMock.activate() - // ...and throw errors if an outbound request isn't mocked - fetchMock.disableNetConnect() -}) - -// Ensure we matched every mock we defined -afterEach(() => fetchMock.assertNoPendingInterceptors()) - -describe('Logs API', () => { - describe('handleWorkerLogs', () => { - it('should fetch and analyze worker logs correctly', async () => { - const scriptName = 'test-worker' - - // Create mock log events - const mockEvents = [ - { - timestamp: Date.now() - 100000, - $workers: { - scriptName: 'test-worker', - outcome: 'ok', - eventType: 'fetch', - requestId: '123456abcdef', - wallTimeMs: 45.2, - cpuTimeMs: 12.8, - event: { - request: { - method: 'GET', - path: '/api/v1/resource', - url: 'https://example.com/api/v1/resource', - }, - response: { - status: 200, - }, - rayId: 'ray123abc456def', - }, - }, - source: { - message: 'Successful request to resource', - }, - dataset: 'cloudflare-workers', - $metadata: { - id: '1', - message: 'GET /api/v1/resource', - }, - }, - { - timestamp: Date.now() - 200000, - $workers: { - scriptName: 'test-worker', - outcome: 'ok', - eventType: 'fetch', - requestId: '456789bcdef01', - wallTimeMs: 88.7, - cpuTimeMs: 33.2, - event: { - request: { - method: 'POST', - path: '/api/v1/resource/create', - url: 'https://example.com/api/v1/resource/create', - }, - response: { - status: 201, - }, - rayId: 'ray456def789ghi', - }, - }, - source: { - message: 'Created new resource', - }, - dataset: 'cloudflare-workers', - $metadata: { - id: '2', - message: 'POST /api/v1/resource/create', - }, - }, - { - timestamp: Date.now() - 300000, - $workers: { - scriptName: 'test-worker', - outcome: 'error', - eventType: 'fetch', - requestId: '789012defghi34', - wallTimeMs: 112.3, - cpuTimeMs: 45.8, - event: { - request: { - method: 'PUT', - path: '/api/v1/resource/update', - url: 'https://example.com/api/v1/resource/update', - }, - rayId: 'ray789ghi012jkl', - }, - }, - source: { - message: 'Resource not found', - }, - dataset: 'cloudflare-workers', - $metadata: { - id: '3', - message: 'Error updating resource', - }, - }, - ] - - const mockResponse = { - success: true, - result: { - events: { - events: mockEvents, - count: 3, - }, - statistics: { - elapsed: 10, - rows_read: 6000, - bytes_read: 30000000, - }, - }, - errors: [], - messages: [{ message: 'Successful request' }], - } - - fetchMock - .get('https://api.cloudflare.com') - .intercept({ - method: 'POST', - path: `/client/v4/accounts/${env.CLOUDFLARE_MOCK_ACCOUNT_ID}/workers/observability/telemetry/query`, - }) - .reply(200, mockResponse) - - const limit = 100 - const minutesAgo = 30 - const result = await handleWorkerLogs({ - scriptName, - limit, - minutesAgo, - accountId: env.CLOUDFLARE_MOCK_ACCOUNT_ID, - apiToken: env.CLOUDFLARE_MOCK_API_TOKEN, - shouldFilterErrors: false, - }) - - // Verify that the timestamp range is set correctly - expect(result).toHaveProperty('from') - expect(result).toHaveProperty('to') - expect(result.from).toBeLessThan(result.to) - - expect(result.logs?.events?.count).toBe(3) - - const events = result.logs?.events?.events ?? [] - const getLog = events[0] - expect(getLog.$workers?.event?.request).toStrictEqual(mockEvents[0].$workers.event.request) - expect(getLog.$workers?.outcome).toBe(mockEvents[0].$workers.outcome) - expect(getLog.$workers?.event?.rayId).toBe(mockEvents[0].$workers.event.rayId) - expect(getLog.$workers?.cpuTimeMs).toBeGreaterThan(0) - expect(getLog.$workers?.wallTimeMs).toBeGreaterThan(0) - - const postLog = events[1] - expect(postLog.$workers?.event?.request).toStrictEqual(mockEvents[1].$workers.event.request) - expect(postLog.$workers?.outcome).toBe(mockEvents[1].$workers.outcome) - expect(postLog.$workers?.event?.rayId).toBe(mockEvents[1].$workers.event.rayId) - expect(postLog.$workers?.cpuTimeMs).toBeGreaterThan(0) - expect(postLog.$workers?.wallTimeMs).toBeGreaterThan(0) - - const errorLog = events[2] - expect(errorLog.$workers?.event?.request).toStrictEqual(mockEvents[2].$workers.event.request) - expect(errorLog.$workers?.outcome).toBe(mockEvents[2].$workers.outcome) - expect(errorLog.$workers?.event?.rayId).toBe(mockEvents[2].$workers.event.rayId) - expect(errorLog.$workers?.cpuTimeMs).toBeGreaterThan(0) - expect(errorLog.$workers?.wallTimeMs).toBeGreaterThan(0) - }) - - it('should handle empty logs', async () => { - const scriptName = 'empty-worker' - - const mockResponse = { - success: true, - result: { - events: { - events: [], - count: 0, - }, - statistics: { - elapsed: 10, - rows_read: 6000, - bytes_read: 30000000, - }, - }, - errors: [], - messages: [{ message: 'Successful request' }], - } - - fetchMock - .get('https://api.cloudflare.com') - .intercept({ - method: 'POST', - path: `/client/v4/accounts/${env.CLOUDFLARE_MOCK_ACCOUNT_ID}/workers/observability/telemetry/query`, - }) - .reply(200, mockResponse) - - const limit = 100 - const minutesAgo = 30 - const result = await handleWorkerLogs({ - scriptName, - limit, - minutesAgo, - accountId: env.CLOUDFLARE_MOCK_ACCOUNT_ID, - apiToken: env.CLOUDFLARE_MOCK_API_TOKEN, - shouldFilterErrors: false, - }) - - expect(result.logs?.events?.count).toBe(0) - }) - - it('should handle API errors', async () => { - const scriptName = 'error-worker' - - fetchMock - .get('https://api.cloudflare.com') - .intercept({ - method: 'POST', - path: `/client/v4/accounts/${env.CLOUDFLARE_MOCK_ACCOUNT_ID}/workers/observability/telemetry/query`, - }) - .reply(500, 'Server error') - - const limit = 100 - const minutesAgo = 30 - await expect( - handleWorkerLogs({ - scriptName, - limit, - minutesAgo, - accountId: env.CLOUDFLARE_MOCK_ACCOUNT_ID, - apiToken: env.CLOUDFLARE_MOCK_API_TOKEN, - shouldFilterErrors: false, - }) - ).rejects.toThrow('Cloudflare API request failed') - }) - - it('should filter logs by error status when requested', async () => { - const scriptName = 'test-worker' - - const mockEvents = [ - { - timestamp: Date.now() - 100000, - $workers: { - scriptName: 'test-worker', - outcome: 'ok', - eventType: 'fetch', - requestId: '123456abcdef', - wallTimeMs: 45.2, - cpuTimeMs: 12.8, - event: { - request: { - method: 'GET', - path: '/api/v1/resource', - url: 'https://example.com/api/v1/resource', - }, - response: { - status: 200, - }, - rayId: 'ray123abc456def', - }, - }, - source: {}, - dataset: 'cloudflare-workers', - $metadata: { id: '1' }, - }, - { - timestamp: Date.now() - 200000, - $workers: { - scriptName: 'test-worker', - outcome: 'error', - eventType: 'fetch', - requestId: '456789bcdef01', - wallTimeMs: 88.7, - cpuTimeMs: 33.2, - event: { - request: { - method: 'POST', - path: '/api/v1/resource/create', - url: 'https://example.com/api/v1/resource/create', - }, - rayId: 'ray456def789ghi', - }, - }, - source: { - message: 'Invalid request data', - }, - dataset: 'cloudflare-workers', - $metadata: { id: '2', error: 'Invalid request data' }, - }, - { - timestamp: Date.now() - 300000, - $workers: { - scriptName: 'test-worker', - outcome: 'error', - eventType: 'fetch', - requestId: '789012defghi34', - wallTimeMs: 112.3, - cpuTimeMs: 45.8, - event: { - request: { - method: 'PUT', - path: '/api/v1/resource/update', - url: 'https://example.com/api/v1/resource/update', - }, - rayId: 'ray789ghi012jkl', - }, - }, - source: { - message: 'Resource not found', - }, - dataset: 'cloudflare-workers', - $metadata: { id: '3', error: 'Resource not found' }, - }, - ] - - const mockResponse = { - success: true, - result: { - events: { - events: mockEvents.filter((event) => event.$workers.outcome === 'error'), - count: 3, - }, - statistics: { - elapsed: 10, - rows_read: 6000, - bytes_read: 30000000, - }, - }, - errors: [], - messages: [{ message: 'Successful request' }], - } - - fetchMock - .get('https://api.cloudflare.com') - .intercept({ - method: 'POST', - path: `/client/v4/accounts/${env.CLOUDFLARE_MOCK_ACCOUNT_ID}/workers/observability/telemetry/query`, - }) - .reply(200, mockResponse) - - // error filtering enabled - const limit = 100 - const minutesAgo = 30 - const result = await handleWorkerLogs({ - scriptName, - limit, - minutesAgo, - accountId: env.CLOUDFLARE_MOCK_ACCOUNT_ID, - apiToken: env.CLOUDFLARE_MOCK_API_TOKEN, - shouldFilterErrors: true, - }) - - // Check results - we should only get error logs - expect( - result.logs?.events?.events?.filter((event) => event.$workers?.outcome == 'error').length - ).toBe(2) - expect(result.logs?.events?.count).toBe(3) - - const firstErrorLog = result.logs?.events?.events?.find( - (event) => event.$metadata?.error === 'Invalid request data' - ) - console.log(firstErrorLog) - expect(firstErrorLog).toBeDefined() - expect(firstErrorLog?.$workers?.event?.rayId).toBe('ray456def789ghi') - - const secondErrorLog = result.logs?.events?.events?.find( - (event) => event.$metadata?.error === 'Resource not found' - ) - expect(secondErrorLog).toBeDefined() - expect(secondErrorLog?.$workers?.event?.rayId).toBe('ray789ghi012jkl') - }) - }) - - describe('handleWorkerLogsKeys', () => { - it('should fetch worker telemetry keys correctly', async () => { - const scriptName = 'test-worker' - - // Mock telemetry keys response - const mockKeysResponse = { - success: true, - result: [ - { - key: '$workers.outcome', - type: 'string', - lastSeenAt: Date.now() - 1000000, - }, - { - key: '$workers.wallTimeMs', - type: 'number', - lastSeenAt: Date.now() - 2000000, - }, - { - key: '$workers.event.error', - type: 'boolean', - lastSeenAt: Date.now() - 3000000, - }, - ], - errors: [], - messages: [{ message: 'Successful request' }], - } - - fetchMock - .get('https://api.cloudflare.com') - .intercept({ - method: 'POST', - path: `/client/v4/accounts/${env.CLOUDFLARE_MOCK_ACCOUNT_ID}/workers/observability/telemetry/keys`, - }) - .reply(200, mockKeysResponse) - - const minutesAgo = 10080 - const result = await handleWorkerLogsKeys( - scriptName, - minutesAgo, - env.CLOUDFLARE_MOCK_ACCOUNT_ID, - env.CLOUDFLARE_MOCK_API_TOKEN - ) - - expect(result).toEqual(mockKeysResponse.result) - expect(result.length).toBe(3) - expect(result[0].key).toBe('$workers.outcome') - expect(result[0].type).toBe('string') - expect(result[1].key).toBe('$workers.wallTimeMs') - expect(result[1].type).toBe('number') - expect(result[2].key).toBe('$workers.event.error') - expect(result[2].type).toBe('boolean') - }) - - it('should handle API errors when fetching keys', async () => { - const scriptName = 'error-worker' - - // Setup mock for error response - fetchMock - .get('https://api.cloudflare.com') - .intercept({ - method: 'POST', - path: `/client/v4/accounts/${env.CLOUDFLARE_MOCK_ACCOUNT_ID}/workers/observability/telemetry/keys`, - }) - .reply(500, 'Server error') - - const minutesAgo = 10080 - await expect( - handleWorkerLogsKeys( - scriptName, - minutesAgo, - env.CLOUDFLARE_MOCK_ACCOUNT_ID, - env.CLOUDFLARE_MOCK_API_TOKEN - ) - ).rejects.toThrow('Cloudflare API request failed') - }) - }) -})