feat(sampling): require JSON responses and log model/role

yaonyan · yaonyan · commit f8f3391217bb · 2025-10-05T09:19:32.000+08:00
Add model and role fields to LLMResponse and capture stopReason for better observability.
Initialize conversationHistory with an initial user prompt that enforces returning raw JSON
(Claude requirement) and ensures at least one message. Increase maxTokens to 55_000 to allow
larger responses. Implement minimal self-healing: when parsed responses lack required
fields (action or decision), re-prompt for JSON-only output and continue the loop.

Pass model, role, and stopReason into logIterationProgress and record them on iteration
spans (including stopReason when present). Also add a default user message for the summary
call. These changes improve robustness against malformed LLM outputs and surface model
metadata for debugging and tracing.
diff --git a/packages/core/src/executors/sampling/base-sampling-executor.ts b/packages/core/src/executors/sampling/base-sampling-executor.ts
@@ -36,6 +36,8 @@ export interface ResponseContent {
 export interface LLMResponse {
   content: ResponseContent[];
   stopReason?: string;
+  model: string;
+  role: "user" | "assistant";
 }
 
 export interface ExternalTool {
@@ -95,8 +97,16 @@ export abstract class BaseSamplingExecutor {
     schema: Record<string, unknown>,
     state?: TState,
   ) {
-    // Initialize conversation
-    this.conversationHistory = [];
+    // Initialize conversation with an initial user message
+    // Ensure at least one message (Claude requirement) and enforce JSON-only output
+    this.conversationHistory = [{
+      role: "user",
+      content: {
+        type: "text",
+        text:
+          'Return ONLY raw JSON (no code fences or explanations). The JSON MUST include action and decision. Example: {"action":"<tool>","decision":"proceed|complete","<tool>":{}}',
+      },
+    }];
 
     // Create a root span for the entire sampling loop
     const loopSpan: Span | null = this.tracingEnabled
@@ -133,10 +143,13 @@ export abstract class BaseSamplingExecutor {
           const response = await this.server.createMessage({
             systemPrompt: systemPrompt(),
             messages: this.conversationHistory,
-            maxTokens: Number.MAX_SAFE_INTEGER,
+            maxTokens: 55_000,
           });
 
           const responseContent = (response.content.text as string) || "{}";
+          const model = response.model;
+          const stopReason = response.stopReason;
+          const role = response.role;
 
           // Parse JSON response
           let parsedData: Record<string, unknown>;
@@ -184,14 +197,34 @@ export abstract class BaseSamplingExecutor {
             }
           }
 
+          // Minimal self-healing: ensure required fields exist
+          if (!action || typeof parsedData["decision"] !== "string") {
+            this.conversationHistory.push({
+              role: "user",
+              content: {
+                type: "text",
+                text:
+                  'Required fields missing: action or decision. Return ONLY raw JSON, no code fences or explanations. Example: {"action":"<tool>","decision":"proceed|complete","<tool>":{}}',
+              },
+            });
+            if (iterationSpan) endSpan(iterationSpan);
+            continue;
+          }
+
           // Process the parsed data using subclass implementation
           const result = await this.processAction(
             parsedData,
             schema,
             state,
             loopSpan,
           );
-          this.logIterationProgress(parsedData, result);
+          this.logIterationProgress(
+            parsedData,
+            result,
+            model,
+            stopReason,
+            role,
+          );
 
           if (iterationSpan) {
             // Simplified: store full raw JSON, raw LLM response, and full tool result if present (no truncation)
@@ -210,7 +243,12 @@ export abstract class BaseSamplingExecutor {
               action: typeof action === "string" ? action : String(action),
               samplingResponse: responseContent,
               toolResult: JSON.stringify(result),
+              model: model,
+              role: role,
             };
+            if (stopReason) {
+              attr.stopReason = stopReason;
+            }
             iterationSpan.setAttributes(attr);
           }
 
@@ -374,7 +412,13 @@ Actions Taken: (high-level flow)
 Errors/Warnings: (if any)
 
 ${history}`,
-        messages: [],
+        messages: [{
+          role: "user",
+          content: {
+            type: "text",
+            text: "Please provide a concise summary.",
+          },
+        }],
         maxTokens: 3000,
       });
 
@@ -442,13 +486,19 @@ ${history}`,
   protected logIterationProgress(
     parsedData: Record<string, unknown>,
     result: CallToolResult,
+    model?: string,
+    stopReason?: string,
+    role?: string,
   ): void {
     // Log iteration progress using MCP logging
     this.logger.debug({
       iteration: `${this.currentIteration + 1}/${this.maxIterations}`,
       parsedData,
       isError: result.isError,
       isComplete: result.isComplete,
+      model,
+      stopReason,
+      role,
       result: inspect(result, {
         depth: 5,
         maxArrayLength: 10,