fix: separate reasoning from text in OpenCode streaming for Kimi

alari76 · claude · alari76 · commit 3c30ac4b27bc · 2026-04-12T19:22:57.000+03:00
OpenCode sends Kimi's reasoning content as field=text deltas (not
field=reasoning), with part.updated type=reasoning events marking the
boundaries. Track a reasoning phase flag so text deltas during reasoning
are routed to the reasoning buffer and shown as a thinking indicator
instead of being mixed into the response text.

Also: don't emit system_init with "opencode (default)" when no model is
set — the frontend's model validation effect will set the proper model
shortly, avoiding a spurious placeholder model message.

Removed the &lt;think&gt; tag stripping approach (reasoning was never sent
with tags — it was sent as raw text deltas).

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/server/opencode-process.ts b/server/opencode-process.ts
@@ -275,10 +275,14 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
   private reasoningBuffer = ''
   /** Whether we've already emitted a thinking summary from reasoning deltas. */
   private emittedReasoningSummary = false
-  /** Whether we're currently inside a <think> block in streamed text deltas. */
-  private insideThinkTag = false
-  /** Buffer for accumulating text inside <think> tags during streaming. */
-  private thinkTagBuffer = ''
+  /**
+   * Whether text deltas currently belong to a reasoning part rather than the
+   * actual response. Some providers (e.g. Kimi via OpenCode) send reasoning
+   * content as `field=text` deltas, with `part.updated type=reasoning` events
+   * marking the boundaries. When true, text deltas are routed to the reasoning
+   * buffer instead of being emitted as visible text.
+   */
+  private inReasoningPhase = false
 
   constructor(workingDir: string, opts?: Partial<OpenCodeProcessOptions>) {
     super()
@@ -349,9 +353,14 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
       this.startupTimer = null
     }
 
-    // Model is stored as "providerID/modelID" — show everything after the first slash
-    const modelName = this.model?.includes('/') ? this.model.slice(this.model.indexOf('/') + 1) : (this.model || 'opencode (default)')
-    this.emit('system_init', modelName)
+    // Model is stored as "providerID/modelID" — show everything after the first slash.
+    // Only emit system_init when we have an explicit model. If model is undefined,
+    // the frontend's model validation effect will call setModel shortly, which
+    // triggers a restart with the correct model — no point showing a placeholder.
+    if (this.model) {
+      const modelName = this.model.includes('/') ? this.model.slice(this.model.indexOf('/') + 1) : this.model
+      this.emit('system_init', modelName)
+    }
   }
 
   /** Subscribe to the OpenCode SSE event stream and map events to CodingProcess events. */
@@ -471,10 +480,9 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
       this.deltaBufferFlushed = true
       if (this.lastUserInput && this.deltaBuffer.startsWith(this.lastUserInput)) {
         const remainder = this.deltaBuffer.slice(this.lastUserInput.length)
-        if (remainder) { const clean = this.stripThinkTags(remainder); if (clean) this.emit('text', clean) }
+        if (remainder) this.emit('text', remainder)
       } else {
-        const clean = this.stripThinkTags(this.deltaBuffer)
-        if (clean) this.emit('text', clean)
+        this.emit('text', this.deltaBuffer)
       }
       this.deltaBuffer = ''
     }
@@ -495,6 +503,23 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
         }
         if (field === 'text' && delta) {
           this.receivedDeltas = true
+
+          // Some providers (e.g. Kimi via OpenCode) send reasoning content as
+          // field=text deltas. When inReasoningPhase is set (by a preceding
+          // part.updated type=reasoning event), route to reasoning buffer.
+          if (this.inReasoningPhase) {
+            this.reasoningBuffer += delta
+            if (this.reasoningBuffer.length > 20 && !this.emittedReasoningSummary) {
+              this.emittedReasoningSummary = true
+              const match = this.reasoningBuffer.match(/^(.+?[.!?\n])/)
+              const summary = match && match[1].length <= 120
+                ? match[1].replace(/\n/g, ' ').trim()
+                : this.reasoningBuffer.slice(0, 80).trim()
+              this.emit('thinking', summary)
+            }
+            break
+          }
+
           // Buffer initial deltas to detect and strip user echo prefix.
           // Some providers echo the user message at the start of the assistant
           // response, which causes duplicate display.
@@ -504,19 +529,15 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
               this.deltaBufferFlushed = true
               if (this.deltaBuffer.startsWith(this.lastUserInput)) {
                 const remainder = this.deltaBuffer.slice(this.lastUserInput.length)
-                if (remainder) { const clean = this.stripThinkTags(remainder); if (clean) this.emit('text', clean) }
+                if (remainder) this.emit('text', remainder)
               } else {
-                const clean = this.stripThinkTags(this.deltaBuffer)
-                if (clean) this.emit('text', clean)
+                this.emit('text', this.deltaBuffer)
               }
               this.deltaBuffer = ''
             }
             // Still buffering — don't emit yet
           } else {
-            // Strip <think>...</think> tags — some models (e.g. Kimi) embed
-            // chain-of-thought reasoning in the text output using these tags.
-            const clean = this.stripThinkTags(delta)
-            if (clean) this.emit('text', clean)
+            this.emit('text', delta)
           }
         } else if (field === 'reasoning' && delta) {
           // Accumulate reasoning deltas and emit a thinking summary once we
@@ -548,6 +569,11 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
 
         switch (part.type) {
           case 'text': {
+            // A text part.updated signals that text deltas are now actual
+            // response text, not reasoning. Clear the reasoning phase flag.
+            if (this.inReasoningPhase) {
+              this.inReasoningPhase = false
+            }
             // Text may arrive via message.part.delta (streaming) or as full
             // content here (OpenCode >=1.4 message.updated). Only emit if we
             // haven't already streamed it via delta events or emitted it from
@@ -559,18 +585,21 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
               if (this.lastUserInput && text.startsWith(this.lastUserInput)) {
                 text = text.slice(this.lastUserInput.length)
               }
-              // Strip <think>...</think> blocks from full text
-              text = OpenCodeProcess.stripThinkTagsFull(text)
               if (text) this.emit('text', text)
             }
             break
           }
 
           case 'reasoning': {
+            // A reasoning part.updated signals that subsequent text deltas
+            // are reasoning content, not visible text. Set the phase flag so
+            // the delta handler routes them to the reasoning buffer.
+            this.inReasoningPhase = true
             // OpenCode uses 'text' field, not 'content'. Reasoning may be
             // empty or encrypted (e.g. OpenAI models). Only emit if present.
             const content = part.text || ''
-            if (content.length > 20) {
+            if (content.length > 20 && !this.emittedReasoningSummary) {
+              this.emittedReasoningSummary = true
               const match = content.match(/^(.+?[.!?\n])/)
               const summary = match && match[1].length <= 120
                 ? match[1].replace(/\n/g, ' ').trim()
@@ -760,67 +789,6 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
     }
   }
 
-  /**
-   * Strip `<think>...</think>` tags from text content. Some models (e.g. Kimi)
-   * embed chain-of-thought reasoning in the text output using these tags rather
-   * than using a separate reasoning/thinking field.
-   * Returns the text with think blocks removed; thinking content is routed to
-   * the thinking summary emitter.
-   */
-  private stripThinkTags(text: string): string {
-    // Fast path: no think tags at all
-    if (!text.includes('<think') && !text.includes('</think') && !this.insideThinkTag) {
-      return text
-    }
-
-    let result = ''
-    let i = 0
-    while (i < text.length) {
-      if (this.insideThinkTag) {
-        const closeIdx = text.indexOf('</think>', i)
-        if (closeIdx === -1) {
-          // Still inside think block, buffer the rest
-          this.thinkTagBuffer += text.slice(i)
-          i = text.length
-        } else {
-          this.thinkTagBuffer += text.slice(i, closeIdx)
-          this.insideThinkTag = false
-          // Emit thinking summary from accumulated think content
-          if (this.thinkTagBuffer.length > 20 && !this.emittedReasoningSummary) {
-            this.emittedReasoningSummary = true
-            const match = this.thinkTagBuffer.match(/^(.+?[.!?\n])/)
-            const summary = match && match[1].length <= 120
-              ? match[1].replace(/\n/g, ' ').trim()
-              : this.thinkTagBuffer.slice(0, 80).trim()
-            this.emit('thinking', summary)
-          }
-          this.thinkTagBuffer = ''
-          i = closeIdx + '</think>'.length
-        }
-      } else {
-        const openIdx = text.indexOf('<think>', i)
-        if (openIdx === -1) {
-          result += text.slice(i)
-          i = text.length
-        } else {
-          result += text.slice(i, openIdx)
-          this.insideThinkTag = true
-          i = openIdx + '<think>'.length
-        }
-      }
-    }
-    return result
-  }
-
-  /**
-   * Strip `<think>...</think>` blocks from a complete text string (non-streaming).
-   * Returns the text with all think blocks removed.
-   */
-  private static stripThinkTagsFull(text: string): string {
-    // Remove complete <think>...</think> blocks (including multiline)
-    return text.replace(/<think>[\s\S]*?<\/think>\s*/g, '').trimStart()
-  }
-
   /**
    * Detect TodoWrite/TaskCreate/TaskUpdate tool calls and emit todo_update events.
    * Mirrors the task-tracking logic in ClaudeProcess.handleTaskTool().
@@ -889,8 +857,7 @@ export class OpenCodeProcess extends EventEmitter<ClaudeProcessEvents> implement
     this.deltaBufferFlushed = false
     this.reasoningBuffer = ''
     this.emittedReasoningSummary = false
-    this.insideThinkTag = false
-    this.thinkTagBuffer = ''
+    this.inReasoningPhase = false
 
     const baseUrl = `http://localhost:${serverState.port}`
     // Parse [Attached files: ...] prefix and convert image paths to proper parts.
diff --git a/src/hooks/useChatSocket.ts b/src/hooks/useChatSocket.ts
@@ -431,12 +431,12 @@ export function useChatSocket({
   }, [send])
 
   const createSession = useCallback((name: string, workingDir: string, useWorktree?: boolean, permissionMode?: PermissionMode, provider?: import('../types').CodingProvider) => {
-    // Include the current model so the server starts with the right model
-    // immediately, avoiding a "default model" message followed by a second
-    // "actual model" message when setModel fires later.
-    const model = currentModel ?? undefined
-    send({ type: 'create_session', name, workingDir, model, useWorktree, permissionMode, provider })
-  }, [send, currentModel])
+    // Don't pass a model — the provider-specific model validation effect
+    // will call setModel with the correct provider-appropriate model after
+    // the session is created. Passing a cross-provider model (e.g. a Claude
+    // model to an OpenCode session) causes a spurious model message.
+    send({ type: 'create_session', name, workingDir, useWorktree, permissionMode, provider })
+  }, [send])
 
   const sendInput = useCallback((data: string, displayText?: string) => {
     send({ type: 'input', data, ...(displayText ? { displayText } : {}) } as WsClientMessage)