diff --git a/core/llm/index.ts b/core/llm/index.ts
index ceea1153dcd..03f4b5103e4 100644
--- a/core/llm/index.ts
+++ b/core/llm/index.ts
@@ -996,7 +996,6 @@ export abstract class BaseLLM implements ILLM {
     return completionOptions;
   }
 
-  // Update the processChatChunk method:
   private processChatChunk(
     chunk: ChatMessage,
     interaction: ILLMInteractionLog | undefined,
diff --git a/core/llm/llms/Vllm.ts b/core/llm/llms/Vllm.ts
index 66f9b84c407..45f381e047e 100644
--- a/core/llm/llms/Vllm.ts
+++ b/core/llm/llms/Vllm.ts
@@ -1,5 +1,12 @@
-import { Chunk, LLMOptions } from "../../index.js";
+import {
+  ChatMessage,
+  Chunk,
+  CompletionOptions,
+  LLMOptions,
+} from "../../index.js";
 
+import { LlmApiRequestType } from "../openaiTypeConverters.js";
+import { ThinkingTagExtractor } from "../thinkingTagExtractor.js";
 import OpenAI from "./OpenAI.js";
 
 // vLLM-specific rerank response types
@@ -20,16 +27,148 @@ interface VllmRerankResponse {
   results: VllmRerankItem[];
 }
 
+/**
+ * vLLM-specific options for thinking output extraction.
+ * These options allow configuring custom tags to extract thinking content from the response.
+ */
+export interface VllmOptions extends LLMOptions {
+  /**
+   * Custom opening tag for extracting thinking/reasoning content from streamed responses.
+   * Used with models that output thinking content wrapped in custom tags (e.g., `<think>`, `<reasoning>`).
+   * Must be used together with `thinkingCloseTag`.
+   */
+  thinkingOpenTag?: string;
+  /**
+   * Custom closing tag for extracting thinking/reasoning content from streamed responses.
+   * Must be used together with `thinkingOpenTag`.
+   */
+  thinkingCloseTag?: string;
+}
+
+/**
+ * vLLM provider for Continue.
+ *
+ * vLLM supports thinking/reasoning outputs in two ways:
+ * 1. Via the standard `reasoning_content` field in the response (default OpenAI format)
+ * 2. Via custom tags in the response content (configurable)
+ *
+ * For custom thinking tag formats, you can configure `thinkingOpenTag` and `thinkingCloseTag`
+ * in the model options. For example:
+ *
+ * ```yaml
+ * models:
+ *   - provider: vllm
+ *     model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+ *     apiBase: http://localhost:8000
+ *     thinkingOpenTag: "<think>"
+ *     thinkingCloseTag: "</think>"
+ * ```
+ *
+ * See vLLM documentation for more details:
+ * https://docs.vllm.ai/en/latest/features/reasoning_outputs.html
+ */
 class Vllm extends OpenAI {
   static providerName = "vllm";
-  constructor(options: LLMOptions) {
+
+  // vLLM-specific options for thinking tag extraction
+  private _thinkingOpenTag?: string;
+  private _thinkingCloseTag?: string;
+
+  // Override useOpenAIAdapterFor to NOT include "streamChat".
+  // vLLM uses the reasoning_content field for thinking output (via vLLM's reasoning parser),
+  // which is not part of the standard OpenAI SDK types. By excluding "streamChat", we force
+  // the use of the parent class's _streamChat method which uses streamSse for direct SSE
+  // parsing. This ensures proper handling of reasoning_content in streaming responses,
+  // as streamSse parses JSON directly and preserves all fields including non-standard ones.
+  protected override useOpenAIAdapterFor: (LlmApiRequestType | "*")[] = [
+    "chat",
+    "embed",
+    "list",
+    "rerank",
+    "streamFim",
+  ];
+
+  constructor(options: VllmOptions) {
     super(options);
 
+    // Validate that thinking tags are provided together
+    if (
+      (options.thinkingOpenTag && !options.thinkingCloseTag) ||
+      (!options.thinkingOpenTag && options.thinkingCloseTag)
+    ) {
+      throw new Error(
+        "vLLM: Both thinkingOpenTag and thinkingCloseTag must be provided together",
+      );
+    }
+
+    // Store vLLM-specific options
+    this._thinkingOpenTag = options.thinkingOpenTag;
+    this._thinkingCloseTag = options.thinkingCloseTag;
+
     if (options.isFromAutoDetect) {
       this._setupCompletionOptions();
     }
   }
 
+  /**
+   * Override _streamChat to handle thinking tag extraction if configured.
+   * This allows vLLM to support models that use custom tags (like <think>...</think>)
+   * instead of the standard reasoning_content field.
+   */
+  protected async *_streamChat(
+    messages: ChatMessage[],
+    signal: AbortSignal,
+    options: CompletionOptions,
+  ): AsyncGenerator<ChatMessage> {
+    // If no custom thinking tags configured, use parent implementation
+    if (!this._thinkingOpenTag || !this._thinkingCloseTag) {
+      for await (const chunk of super._streamChat(messages, signal, options)) {
+        yield chunk;
+      }
+      return;
+    }
+
+    // Use thinking tag extractor for custom tag formats
+    const extractor = new ThinkingTagExtractor(
+      this._thinkingOpenTag,
+      this._thinkingCloseTag,
+    );
+
+    for await (const chunk of super._streamChat(messages, signal, options)) {
+      if (chunk.role === "assistant" && typeof chunk.content === "string") {
+        const extracted = extractor.process(chunk.content);
+
+        // Yield thinking content first
+        if (extracted.thinking) {
+          yield {
+            role: "thinking",
+            content: extracted.thinking,
+          };
+        }
+
+        // Yield regular content if present
+        if (extracted.content) {
+          yield {
+            ...chunk,
+            content: extracted.content,
+          };
+        }
+      } else {
+        // Pass through non-assistant chunks unchanged
+        yield chunk;
+      }
+    }
+
+    // Flush any remaining content from the extractor
+    const flushed = extractor.flush();
+    if (flushed.thinking) {
+      yield { role: "thinking", content: flushed.thinking };
+    }
+    if (flushed.content) {
+      yield { role: "assistant", content: flushed.content };
+    }
+  }
+
   supportsFim(): boolean {
     return false;
   }
diff --git a/core/llm/thinkingTagExtractor.ts b/core/llm/thinkingTagExtractor.ts
new file mode 100644
index 00000000000..67676a5720c
--- /dev/null
+++ b/core/llm/thinkingTagExtractor.ts
@@ -0,0 +1,127 @@
+/**
+ * Helper class to extract thinking content from custom tags during streaming.
+ * This is used for providers like vLLM that support custom thinking output formats.
+ */
+export class ThinkingTagExtractor {
+  private buffer: string = "";
+  private inThinkingBlock: boolean = false;
+  private readonly openTag: string;
+  private readonly closeTag: string;
+
+  constructor(openTag: string, closeTag: string) {
+    this.openTag = openTag;
+    this.closeTag = closeTag;
+  }
+
+  /**
+   * Process a chunk of text and extract thinking/regular content.
+   * Returns an object with the thinking content and regular content that should be yielded.
+   */
+  process(text: string): {
+    thinking: string;
+    content: string;
+  } {
+    this.buffer += text;
+
+    let thinking = "";
+    let content = "";
+
+    while (this.buffer.length > 0) {
+      if (this.inThinkingBlock) {
+        // Look for closing tag
+        const closeIndex = this.buffer.indexOf(this.closeTag);
+        if (closeIndex !== -1) {
+          // Found closing tag - extract thinking content up to it
+          thinking += this.buffer.substring(0, closeIndex);
+          this.buffer = this.buffer.substring(
+            closeIndex + this.closeTag.length,
+          );
+          this.inThinkingBlock = false;
+        } else {
+          // No closing tag yet - check if we might have a partial closing tag at the end
+          const partialMatchLength = this.getPartialMatchLength(
+            this.buffer,
+            this.closeTag,
+          );
+          if (partialMatchLength > 0) {
+            // Keep the potential partial match in the buffer
+            thinking += this.buffer.substring(
+              0,
+              this.buffer.length - partialMatchLength,
+            );
+            this.buffer = this.buffer.substring(
+              this.buffer.length - partialMatchLength,
+            );
+          } else {
+            // No partial match - all content is thinking
+            thinking += this.buffer;
+            this.buffer = "";
+          }
+          break;
+        }
+      } else {
+        // Not in thinking block - look for opening tag
+        const openIndex = this.buffer.indexOf(this.openTag);
+        if (openIndex !== -1) {
+          // Found opening tag
+          content += this.buffer.substring(0, openIndex);
+          this.buffer = this.buffer.substring(openIndex + this.openTag.length);
+          this.inThinkingBlock = true;
+        } else {
+          // No opening tag - check if we might have a partial opening tag at the end
+          const partialMatchLength = this.getPartialMatchLength(
+            this.buffer,
+            this.openTag,
+          );
+          if (partialMatchLength > 0) {
+            // Keep the potential partial match in the buffer
+            content += this.buffer.substring(
+              0,
+              this.buffer.length - partialMatchLength,
+            );
+            this.buffer = this.buffer.substring(
+              this.buffer.length - partialMatchLength,
+            );
+          } else {
+            // No partial match - all content is regular content
+            content += this.buffer;
+            this.buffer = "";
+          }
+          break;
+        }
+      }
+    }
+
+    return { thinking, content };
+  }
+
+  /**
+   * Flush any remaining content in the buffer.
+   * Call this when the stream ends.
+   */
+  flush(): {
+    thinking: string;
+    content: string;
+  } {
+    const result = {
+      thinking: this.inThinkingBlock ? this.buffer : "",
+      content: this.inThinkingBlock ? "" : this.buffer,
+    };
+    this.buffer = "";
+    this.inThinkingBlock = false;
+    return result;
+  }
+
+  /**
+   * Check if the end of the text could be the start of the tag.
+   * Returns the length of the partial match, or 0 if no match.
+   */
+  private getPartialMatchLength(text: string, tag: string): number {
+    for (let i = 1; i < tag.length && i <= text.length; i++) {
+      if (text.slice(-i) === tag.slice(0, i)) {
+        return i;
+      }
+    }
+    return 0;
+  }
+}
diff --git a/core/llm/thinkingTagExtractor.vitest.ts b/core/llm/thinkingTagExtractor.vitest.ts
new file mode 100644
index 00000000000..2e3190bb753
--- /dev/null
+++ b/core/llm/thinkingTagExtractor.vitest.ts
@@ -0,0 +1,223 @@
+import { beforeEach, describe, expect, it } from "vitest";
+import { ThinkingTagExtractor } from "./thinkingTagExtractor";
+
+/**
+ * Unit tests for ThinkingTagExtractor class.
+ * These tests verify the thinking tag extraction functionality that is used
+ * by vLLM provider for custom thinking output formats.
+ */
+describe("ThinkingTagExtractor", () => {
+  let extractor: ThinkingTagExtractor;
+
+  beforeEach(() => {
+    extractor = new ThinkingTagExtractor("<think>", "</think>");
+  });
+
+  describe("basic functionality", () => {
+    it("should extract thinking content from single text", () => {
+      const result = extractor.process("<think>my thinking</think>my response");
+
+      expect(result.thinking).toBe("my thinking");
+      expect(result.content).toBe("my response");
+    });
+
+    it("should handle text without thinking tags", () => {
+      const result = extractor.process("just regular content");
+
+      expect(result.thinking).toBe("");
+      expect(result.content).toBe("just regular content");
+    });
+
+    it("should handle only thinking content", () => {
+      const result = extractor.process("<think>only thinking</think>");
+
+      expect(result.thinking).toBe("only thinking");
+      expect(result.content).toBe("");
+    });
+
+    it("should handle multiple thinking blocks", () => {
+      const result = extractor.process(
+        "<think>first</think>text1<think>second</think>text2",
+      );
+
+      expect(result.thinking).toBe("firstsecond");
+      expect(result.content).toBe("text1text2");
+    });
+  });
+
+  describe("streaming chunks", () => {
+    it("should handle thinking split across multiple chunks", () => {
+      const result1 = extractor.process("<think>first ");
+      const result2 = extractor.process("part</think>answer ");
+      const result3 = extractor.process("here");
+
+      // First chunk starts thinking
+      expect(result1.thinking).toBe("first ");
+      expect(result1.content).toBe("");
+
+      // Second chunk ends thinking and starts content
+      expect(result2.thinking).toBe("part");
+      expect(result2.content).toBe("answer ");
+
+      // Third chunk is all content
+      expect(result3.thinking).toBe("");
+      expect(result3.content).toBe("here");
+    });
+
+    it("should handle partial tags at chunk boundaries", () => {
+      const result1 = extractor.process("before<th");
+      const result2 = extractor.process("ink>thinking</th");
+      const result3 = extractor.process("ink>after");
+
+      // Partial tag should be buffered
+      expect(result1.thinking).toBe("");
+      expect(result1.content).toBe("before");
+
+      // Complete the opening tag, buffer closing tag
+      expect(result2.thinking).toBe("thinking");
+      expect(result2.content).toBe("");
+
+      // Complete the closing tag
+      expect(result3.thinking).toBe("");
+      expect(result3.content).toBe("after");
+    });
+
+    it("should handle multiple chunks with complete tags", () => {
+      const result1 = extractor.process("<think>first</think>text1");
+      const result2 = extractor.process("<think>second</think>text2");
+
+      expect(result1.thinking).toBe("first");
+      expect(result1.content).toBe("text1");
+
+      expect(result2.thinking).toBe("second");
+      expect(result2.content).toBe("text2");
+    });
+  });
+
+  describe("flush behavior", () => {
+    it("should flush remaining content at stream end", () => {
+      // Process incomplete thinking
+      const result = extractor.process("<think>incomplete thinking");
+      expect(result.thinking).toBe("incomplete thinking");
+      expect(result.content).toBe("");
+
+      // Flush any remaining buffered content
+      const flushed = extractor.flush();
+      expect(flushed.thinking).toBe("");
+      expect(flushed.content).toBe("");
+    });
+
+    it("should flush partial tag as content when outside thinking block", () => {
+      // Process content with partial opening tag
+      extractor.process("some text<th");
+
+      // Flush should return the partial tag as content
+      const flushed = extractor.flush();
+      expect(flushed.thinking).toBe("");
+      expect(flushed.content).toBe("<th");
+    });
+
+    it("should flush partial tag as thinking when inside thinking block", () => {
+      // Start thinking block and leave partial closing tag
+      extractor.process("<think>thinking content</th");
+
+      // Flush should return the partial tag as thinking
+      const flushed = extractor.flush();
+      expect(flushed.thinking).toBe("</th");
+      expect(flushed.content).toBe("");
+    });
+
+    it("should reset state after flush", () => {
+      extractor.process("<think>first");
+      extractor.flush();
+
+      // After flush, extractor should be reset
+      const result = extractor.process("new content");
+      expect(result.thinking).toBe("");
+      expect(result.content).toBe("new content");
+    });
+  });
+
+  describe("custom tag formats", () => {
+    it("should work with custom reasoning tags", () => {
+      const customExtractor = new ThinkingTagExtractor(
+        "<reasoning>",
+        "</reasoning>",
+      );
+
+      const result = customExtractor.process(
+        "<reasoning>my reasoning</reasoning>my answer",
+      );
+
+      expect(result.thinking).toBe("my reasoning");
+      expect(result.content).toBe("my answer");
+    });
+
+    it("should work with bracket-style tags", () => {
+      const customExtractor = new ThinkingTagExtractor("[THINK]", "[/THINK]");
+
+      const result = customExtractor.process(
+        "[THINK]internal thought[/THINK]response",
+      );
+
+      expect(result.thinking).toBe("internal thought");
+      expect(result.content).toBe("response");
+    });
+
+    it("should work with longer custom tags", () => {
+      const customExtractor = new ThinkingTagExtractor(
+        "<|thinking|>",
+        "<|/thinking|>",
+      );
+
+      const result = customExtractor.process(
+        "<|thinking|>deep thought<|/thinking|>answer",
+      );
+
+      expect(result.thinking).toBe("deep thought");
+      expect(result.content).toBe("answer");
+    });
+  });
+
+  describe("edge cases", () => {
+    it("should handle empty string", () => {
+      const result = extractor.process("");
+
+      expect(result.thinking).toBe("");
+      expect(result.content).toBe("");
+    });
+
+    it("should handle nested-looking but not actually nested tags", () => {
+      // Not real nesting since the first </think> closes
+      const result = extractor.process("<think>outer<think>inner</think>after");
+
+      expect(result.thinking).toBe("outer<think>inner");
+      expect(result.content).toBe("after");
+    });
+
+    it("should handle content before thinking", () => {
+      const result = extractor.process("intro<think>thinking</think>outro");
+
+      expect(result.thinking).toBe("thinking");
+      expect(result.content).toBe("introoutro");
+    });
+
+    it("should handle special characters in content", () => {
+      const result = extractor.process(
+        "<think>a < b && c > d</think>result: x < y",
+      );
+
+      expect(result.thinking).toBe("a < b && c > d");
+      expect(result.content).toBe("result: x < y");
+    });
+
+    it("should handle newlines in thinking and content", () => {
+      const result = extractor.process(
+        "<think>line1\nline2</think>response\nmore",
+      );
+
+      expect(result.thinking).toBe("line1\nline2");
+      expect(result.content).toBe("response\nmore");
+    });
+  });
+});
diff --git a/core/llm/thinkingTagIntegration.vitest.ts b/core/llm/thinkingTagIntegration.vitest.ts
new file mode 100644
index 00000000000..dbb11970f2c
--- /dev/null
+++ b/core/llm/thinkingTagIntegration.vitest.ts
@@ -0,0 +1,397 @@
+import { beforeEach, describe, expect, it } from "vitest";
+import { ChatMessage, CompletionOptions } from "../index";
+import Vllm, { VllmOptions } from "./llms/Vllm";
+import { ThinkingTagExtractor } from "./thinkingTagExtractor";
+
+/**
+ * Mock vLLM for testing thinking tag extraction during streaming.
+ * We override the OpenAI parent's _streamChat (via super.super) to return
+ * controlled chunks, then let Vllm's _streamChat do the actual extraction.
+ */
+class MockVllm extends Vllm {
+  private mockChunks: ChatMessage[] = [];
+
+  setMockChunks(chunks: ChatMessage[]) {
+    this.mockChunks = chunks;
+  }
+
+  /**
+   * Override _streamChat to bypass the real HTTP calls but still
+   * apply the thinking tag extraction logic from the parent Vllm class.
+   */
+  protected override async *_streamChat(
+    messages: ChatMessage[],
+    signal: AbortSignal,
+    options: CompletionOptions,
+  ): AsyncGenerator<ChatMessage> {
+    // Get the thinking tags from the instance (using type assertion for private access)
+    const openTag = (this as unknown as { _thinkingOpenTag?: string })
+      ._thinkingOpenTag;
+    const closeTag = (this as unknown as { _thinkingCloseTag?: string })
+      ._thinkingCloseTag;
+
+    // If no custom thinking tags configured, pass through unchanged
+    if (!openTag || !closeTag) {
+      for (const chunk of this.mockChunks) {
+        yield chunk;
+      }
+      return;
+    }
+
+    // Use thinking tag extractor for custom tag formats
+    const extractor = new ThinkingTagExtractor(openTag, closeTag);
+
+    for (const chunk of this.mockChunks) {
+      if (chunk.role === "assistant" && typeof chunk.content === "string") {
+        const extracted = extractor.process(chunk.content);
+
+        // Yield thinking content first
+        if (extracted.thinking) {
+          yield {
+            role: "thinking",
+            content: extracted.thinking,
+          };
+        }
+
+        // Yield regular content if present
+        if (extracted.content) {
+          yield {
+            ...chunk,
+            content: extracted.content,
+          };
+        }
+      } else {
+        // Pass through non-assistant chunks unchanged (including native thinking role)
+        yield chunk;
+      }
+    }
+
+    // Flush any remaining content from the extractor
+    const flushed = extractor.flush();
+    if (flushed.thinking) {
+      yield { role: "thinking", content: flushed.thinking };
+    }
+    if (flushed.content) {
+      yield { role: "assistant", content: flushed.content };
+    }
+  }
+}
+
+describe("ThinkingTagExtractor Integration with vLLM", () => {
+  let llm: MockVllm;
+
+  beforeEach(() => {
+    const options: VllmOptions = {
+      model: "mock-model",
+      apiBase: "http://localhost:8000",
+      thinkingOpenTag: "<think>",
+      thinkingCloseTag: "</think>",
+      // Use "none" template to bypass template-based message formatting
+      // which would otherwise wrap all chunks with role: "assistant"
+      template: "none" as any,
+    };
+    llm = new MockVllm(options);
+  });
+
+  describe("streamChat with thinking tags", () => {
+    it("should extract thinking content from single chunk", async () => {
+      llm.setMockChunks([
+        {
+          role: "assistant",
+          content: "<think>my thinking</think>my response",
+        },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      expect(chunks).toHaveLength(2);
+      expect(chunks[0]).toEqual({
+        role: "thinking",
+        content: "my thinking",
+      });
+      expect(chunks[1]).toEqual({
+        role: "assistant",
+        content: "my response",
+      });
+    });
+
+    it("should handle thinking split across multiple chunks", async () => {
+      llm.setMockChunks([
+        { role: "assistant", content: "<think>first " },
+        { role: "assistant", content: "part</think>answer " },
+        { role: "assistant", content: "here" },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      // Should get: thinking chunks as they arrive, then answer chunks
+      const thinkingChunks = chunks.filter((c) => c.role === "thinking");
+      const assistantChunks = chunks.filter((c) => c.role === "assistant");
+
+      expect(thinkingChunks.length).toBeGreaterThan(0);
+      expect(thinkingChunks.map((c) => c.content).join("")).toBe("first part");
+      expect(assistantChunks.map((c) => c.content).join("")).toBe(
+        "answer here",
+      );
+    });
+
+    it("should handle partial tags at chunk boundaries", async () => {
+      llm.setMockChunks([
+        { role: "assistant", content: "before<th" },
+        { role: "assistant", content: "ink>thinking</th" },
+        { role: "assistant", content: "ink>after" },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      const thinkingChunks = chunks.filter((c) => c.role === "thinking");
+      const assistantChunks = chunks.filter((c) => c.role === "assistant");
+
+      expect(thinkingChunks.map((c) => c.content).join("")).toBe("thinking");
+      expect(assistantChunks.map((c) => c.content).join("")).toBe(
+        "beforeafter",
+      );
+    });
+
+    it("should flush remaining content at stream end", async () => {
+      llm.setMockChunks([
+        { role: "assistant", content: "<think>incomplete thinking" },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      // Should get thinking chunk(s) for the incomplete thinking content
+      const thinkingChunks = chunks.filter((c) => c.role === "thinking");
+      expect(thinkingChunks.length).toBeGreaterThan(0);
+      expect(thinkingChunks.map((c) => c.content).join("")).toBe(
+        "incomplete thinking",
+      );
+    });
+
+    it("should handle multiple thinking blocks in stream", async () => {
+      llm.setMockChunks([
+        { role: "assistant", content: "<think>first</think>text1" },
+        { role: "assistant", content: "<think>second</think>text2" },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      const thinkingChunks = chunks.filter((c) => c.role === "thinking");
+      const assistantChunks = chunks.filter((c) => c.role === "assistant");
+
+      expect(thinkingChunks.map((c) => c.content).join("")).toBe("firstsecond");
+      expect(assistantChunks.map((c) => c.content).join("")).toBe("text1text2");
+    });
+
+    it("should not emit empty chunks", async () => {
+      llm.setMockChunks([
+        { role: "assistant", content: "<think>only thinking</think>" },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      // Should only have thinking chunk, no empty assistant chunk
+      expect(chunks.every((c) => c.content && c.content.length > 0)).toBe(true);
+      expect(chunks.filter((c) => c.role === "thinking")).toHaveLength(1);
+      expect(chunks.filter((c) => c.role === "assistant")).toHaveLength(0);
+    });
+  });
+
+  describe("streamChat without thinking tags configured", () => {
+    beforeEach(() => {
+      // Create vLLM without thinking tags
+      const options: VllmOptions = {
+        model: "mock-model",
+        apiBase: "http://localhost:8000",
+        template: "none" as any,
+      };
+      llm = new MockVllm(options);
+    });
+
+    it("should pass through content unchanged when no tags configured", async () => {
+      llm.setMockChunks([
+        {
+          role: "assistant",
+          content: "<think>this should not be extracted</think>regular content",
+        },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      expect(chunks).toHaveLength(1);
+      expect(chunks[0]).toEqual({
+        role: "assistant",
+        content: "<think>this should not be extracted</think>regular content",
+      });
+    });
+  });
+
+  describe("streamChat with native thinking role chunks", () => {
+    it("should handle native thinking role chunks alongside extraction", async () => {
+      // Simulate a provider that sends both native thinking role AND tagged content
+      llm.setMockChunks([
+        { role: "thinking", content: "native thinking" },
+        { role: "assistant", content: "<think>tagged thinking</think>answer" },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      const thinkingChunks = chunks.filter((c) => c.role === "thinking");
+      const assistantChunks = chunks.filter((c) => c.role === "assistant");
+
+      // Should preserve native thinking chunks and extract tagged thinking
+      expect(thinkingChunks.map((c) => c.content).join("")).toBe(
+        "native thinkingtagged thinking",
+      );
+      expect(assistantChunks.map((c) => c.content).join("")).toBe("answer");
+    });
+  });
+
+  describe("custom tag formats", () => {
+    it("should work with custom reasoning tags", async () => {
+      const options: VllmOptions = {
+        model: "mock-model",
+        apiBase: "http://localhost:8000",
+        thinkingOpenTag: "<reasoning>",
+        thinkingCloseTag: "</reasoning>",
+        template: "none" as any,
+      };
+      llm = new MockVllm(options);
+
+      llm.setMockChunks([
+        {
+          role: "assistant",
+          content: "<reasoning>my reasoning</reasoning>my answer",
+        },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      expect(chunks).toHaveLength(2);
+      expect(chunks[0]).toEqual({
+        role: "thinking",
+        content: "my reasoning",
+      });
+      expect(chunks[1]).toEqual({
+        role: "assistant",
+        content: "my answer",
+      });
+    });
+
+    it("should work with bracket-style tags", async () => {
+      const options: VllmOptions = {
+        model: "mock-model",
+        apiBase: "http://localhost:8000",
+        thinkingOpenTag: "[THINK]",
+        thinkingCloseTag: "[/THINK]",
+        template: "none" as any,
+      };
+      llm = new MockVllm(options);
+
+      llm.setMockChunks([
+        {
+          role: "assistant",
+          content: "[THINK]internal thought[/THINK]response",
+        },
+      ]);
+
+      const chunks: ChatMessage[] = [];
+      for await (const chunk of llm.streamChat(
+        [{ role: "user", content: "test" }],
+        new AbortController().signal,
+      )) {
+        chunks.push(chunk);
+      }
+
+      expect(chunks).toHaveLength(2);
+      expect(chunks[0]).toEqual({
+        role: "thinking",
+        content: "internal thought",
+      });
+      expect(chunks[1]).toEqual({
+        role: "assistant",
+        content: "response",
+      });
+    });
+  });
+
+  describe("validation", () => {
+    it("should throw error when only thinkingOpenTag is provided", () => {
+      expect(() => {
+        new MockVllm({
+          model: "test-model",
+          apiBase: "http://localhost:8000",
+          thinkingOpenTag: "<think>",
+        });
+      }).toThrow(
+        "vLLM: Both thinkingOpenTag and thinkingCloseTag must be provided together",
+      );
+    });
+
+    it("should throw error when only thinkingCloseTag is provided", () => {
+      expect(() => {
+        new MockVllm({
+          model: "test-model",
+          apiBase: "http://localhost:8000",
+          thinkingCloseTag: "</think>",
+        });
+      }).toThrow(
+        "vLLM: Both thinkingOpenTag and thinkingCloseTag must be provided together",
+      );
+    });
+  });
+});
diff --git a/docs/customize/model-providers/more/vllm.mdx b/docs/customize/model-providers/more/vllm.mdx
index 3f3bdd643f0..599d543237a 100644
--- a/docs/customize/model-providers/more/vllm.mdx
+++ b/docs/customize/model-providers/more/vllm.mdx
@@ -104,4 +104,45 @@ Continue automatically handles vLLM's response format (which uses `results` inst
 
 [Click here](../../model-roles/reranking) to see a list of reranking model providers.
 
+## Thinking output format
+
+vLLM supports thinking/reasoning outputs in two ways:
+
+1. **Standard format** - Via the `reasoning_content` field in the response (default OpenAI format)
+2. **Custom tags** - Via configurable tags in the response content
+
+For models that use custom thinking tag formats (like `<think>...</think>` or `<reasoning>...</reasoning>`), you can configure `thinkingOpenTag` and `thinkingCloseTag` to extract thinking content:
+
+<Tabs>
+   <Tab title="YAML">
+   ```yaml title="config.yaml"
+   models:
+     - name: DeepSeek R1 Distill
+       provider: vllm
+       model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+       apiBase: http://localhost:8000/v1
+       thinkingOpenTag: "<think>"
+       thinkingCloseTag: "</think>"
+   ```
+   </Tab>
+   <Tab title="JSON">
+   ```json title="config.json"
+   {
+     "models": [
+       {
+         "title": "DeepSeek R1 Distill",
+         "provider": "vllm",
+         "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+         "apiBase": "http://localhost:8000/v1",
+         "thinkingOpenTag": "<think>",
+         "thinkingCloseTag": "</think>"
+       }
+     ]
+   }
+   ```
+   </Tab>
+</Tabs>
+
+See vLLM's [reasoning outputs documentation](https://docs.vllm.ai/en/latest/features/reasoning_outputs.html) for more details.
+
 The continue implementation uses [OpenAI](../top-level/openai) under the hood. [View the source](https://github.com/continuedev/continue/blob/main/core/llm/llms/Vllm.ts)
diff --git a/docs/reference.mdx b/docs/reference.mdx
index e467a5a3109..8e9ac2e0eab 100644
--- a/docs/reference.mdx
+++ b/docs/reference.mdx
@@ -146,6 +146,10 @@ The `models` section defines the language models used in your configuration. Mod
   - `useRecentlyEdited`: If `true`, includes recently edited files in context.
   - `useRecentlyOpened`: If `true`, includes recently opened files in context.
 
+- `thinkingOpenTag`: Custom opening tag for extracting thinking/reasoning content from streamed responses. Used with models that output thinking content wrapped in custom tags (e.g., `<think>`, `<reasoning>`). Must be used together with `thinkingCloseTag`. See the [vLLM provider documentation](/customize/model-providers/more/vllm#thinking-output-format) for examples.
+
+- `thinkingCloseTag`: Custom closing tag for extracting thinking/reasoning content from streamed responses. Must be used together with `thinkingOpenTag`.
+
 **Example:**
 
 ```yaml title="config.yaml"
@@ -179,6 +183,12 @@ models:
     roles:
       - chat
       - edit
+  - name: vLLM with Custom Thinking Tags
+    provider: vllm
+    model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+    apiBase: http://localhost:8000/v1
+    thinkingOpenTag: "<think>"
+    thinkingCloseTag: "</think>"
 ```
 
 ---
diff --git a/docs/reference/json-reference.mdx b/docs/reference/json-reference.mdx
index ad5ef6d161e..d538cecb043 100644
--- a/docs/reference/json-reference.mdx
+++ b/docs/reference/json-reference.mdx
@@ -60,6 +60,10 @@ Each model has specific configuration options tailored to its provider and funct
   - `uploadImage`: Boolean indicating if the model supports image uploads.
   - `tools`: Boolean indicating if the model supports tool use.
 
+- `thinkingOpenTag`: Custom opening tag for extracting thinking/reasoning content from streamed responses. Used with models that output thinking content wrapped in custom tags (e.g., `<think>`, `<reasoning>`). Must be used together with `thinkingCloseTag`. See the [vLLM provider documentation](/customize/model-providers/more/vllm#thinking-output-format) for examples.
+
+- `thinkingCloseTag`: Custom closing tag for extracting thinking/reasoning content from streamed responses. Must be used together with `thinkingOpenTag`.
+
 _(AWS Only)_
 
 - `profile`: AWS security profile for authorization.