feat(json): add function to strip markdown and explanatory text from LLM responses

yaonyan · yaonyan · commit 0d9f7d969bbd · 2025-10-05T16:41:59.000+08:00
diff --git a/packages/core/src/executors/sampling/base-sampling-executor.ts b/packages/core/src/executors/sampling/base-sampling-executor.ts
@@ -122,21 +122,7 @@ export abstract class BaseSamplingExecutor {
         this.currentIteration < this.maxIterations;
         this.currentIteration++
       ) {
-        // Create a span for each iteration
-        const iterationSpan: Span | null = this.tracingEnabled
-          ? startSpan(
-            "mcpc.sampling_iteration",
-            {
-              iteration: this.currentIteration + 1,
-              agent: this.name,
-              systemPrompt: systemPrompt(),
-              maxTokens: String(Number.MAX_SAFE_INTEGER),
-              maxIterations: this.maxIterations,
-              messages: JSON.stringify(this.conversationHistory),
-            },
-            loopSpan ?? undefined,
-          )
-          : null;
+        let iterationSpan: Span | null = null;
 
         try {
           const response = await this.server.createMessage({
@@ -155,11 +141,20 @@ export abstract class BaseSamplingExecutor {
           try {
             parsedData = parseJSON(responseContent.trim(), true);
           } catch (parseError) {
-            if (iterationSpan) {
-              iterationSpan.addEvent("parse_error", {
-                error: String(parseError),
-              });
-            }
+            // Create span for parse error iteration
+            iterationSpan = this.tracingEnabled
+              ? startSpan(
+                "mcpc.sampling_iteration.parse_error",
+                {
+                  iteration: this.currentIteration + 1,
+                  agent: this.name,
+                  error: String(parseError),
+                  maxIterations: this.maxIterations,
+                },
+                loopSpan ?? undefined,
+              )
+              : null;
+
             this.addParsingErrorToHistory(responseContent, parseError);
             if (iterationSpan) endSpan(iterationSpan);
             continue;
@@ -177,24 +172,27 @@ export abstract class BaseSamplingExecutor {
 
           const action = parsedData["action"];
 
-          // If an action name is present, record it as an attribute on the iteration span for easier tracing/debugging.
-          if (action && typeof action === "string") {
-            // Update the span name to include the action for clearer traces.
-            try {
-              const safeAction = String(action).replace(/\s+/g, "_");
-              // updateName is part of the OpenTelemetry Span API
-              if (
-                iterationSpan &&
-                typeof (iterationSpan as any).updateName === "function"
-              ) {
-                (iterationSpan as any).updateName(
-                  `mcpc.sampling_iteration.${safeAction}`,
-                );
-              }
-            } catch {
-              // Ignore any errors while updating span name
-            }
-          }
+          // Create span with action name
+          const actionStr = action && typeof action === "string"
+            ? String(action)
+            : "unknown_action";
+          const spanName = `mcpc.sampling_iteration.${actionStr}`;
+
+          iterationSpan = this.tracingEnabled
+            ? startSpan(
+              spanName,
+              {
+                iteration: this.currentIteration + 1,
+                agent: this.name,
+                action: actionStr,
+                systemPrompt: systemPrompt(),
+                maxTokens: String(Number.MAX_SAFE_INTEGER),
+                maxIterations: this.maxIterations,
+                messages: JSON.stringify(this.conversationHistory),
+              },
+              loopSpan ?? undefined,
+            )
+            : null;
 
           // Minimal self-healing: ensure required fields exist
           if (!action || typeof parsedData["decision"] !== "string") {
diff --git a/packages/utils/src/json.ts b/packages/utils/src/json.ts
@@ -1,7 +1,38 @@
 import { jsonrepair } from "jsonrepair";
 
 /**
- * Attempts to parse JSON with a repair function if initial parse fails.
+ * Strips common markdown code fences and explanatory text from LLM responses
+ */
+function stripMarkdownAndText(text: string): string {
+  // Remove leading/trailing whitespace
+  text = text.trim();
+
+  // Remove markdown code fences: ```json ... ``` or ```...```
+  text = text.replace(/^```(?:json)?\s*\n?/i, "");
+  text = text.replace(/\n?```\s*$/, "");
+
+  // Remove common LLM prefixes like "Here is the JSON:" or "Response:"
+  text = text.replace(
+    /^(?:here is|here's|response|result|output|json):\s*/i,
+    "",
+  );
+
+  // Try to find JSON object/array boundaries if there's surrounding text
+  const jsonMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
+  if (jsonMatch) {
+    text = jsonMatch[1];
+  }
+
+  return text.trim();
+}
+
+/**
+ * Attempts to parse JSON with automatic cleanup and repair if initial parse fails.
+ * Handles common LLM output formats like:
+ * - ```json{"key":"value"}```
+ * - "Here is: {"key":"value"}"
+ * - Markdown code fences
+ * - Malformed JSON that can be repaired
  */
 export function parseJSON<T, U extends boolean = false>(
   text: string,
@@ -11,17 +42,27 @@ export function parseJSON<T, U extends boolean = false>(
     return JSON.parse(text) as T;
   } catch (_error) {
     try {
-      const repairedText = jsonrepair(text);
-      console.warn(
-        `Failed to parse JSON, attempting to repair, result: ${text}`,
-      );
-      if (throwError) {
-        throw _error;
+      // First attempt: strip markdown and explanatory text
+      const cleanedText = stripMarkdownAndText(text);
+      try {
+        return JSON.parse(cleanedText) as T;
+      } catch {
+        // Second attempt: repair the cleaned JSON
+        const repairedText = jsonrepair(cleanedText);
+        console.warn(
+          `Failed to parse JSON, cleaned and repaired. Original: ${
+            text.slice(0, 100)
+          }...`,
+        );
+        return JSON.parse(repairedText) as T;
       }
-      return JSON.parse(repairedText) as T;
-    } catch {
+    } catch (_repairError) {
       if (throwError) {
-        throw new Error("Failed to parse repaired JSON");
+        throw new Error(
+          `Failed to parse JSON after cleanup and repair. Original error: ${
+            _error instanceof Error ? _error.message : String(_error)
+          }`,
+        );
       }
       return null as T;
     }